mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 11:55:21 +00:00
Compare commits
60 Commits
langchain-
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
3fb0a55122 | ||
|
5fb8fd863a | ||
|
79a537d308 | ||
|
ba2518995d | ||
|
04a899ebe3 | ||
|
a82d987f09 | ||
|
a60fd06784 | ||
|
629b7a5a43 | ||
|
ab871a7b39 | ||
|
d30c56a8c1 | ||
|
09c1991e96 | ||
|
a7903280dd | ||
|
d0f0d1f966 | ||
|
403fae8eec | ||
|
d6b50ad3f6 | ||
|
10a9c24dae | ||
|
8fc7a723b9 | ||
|
f4863f82e2 | ||
|
ae4b6380d9 | ||
|
ffbc64c72a | ||
|
6b0b317cb5 | ||
|
21962e2201 | ||
|
1eb0bdadfa | ||
|
7ecdac5240 | ||
|
faef3e5d50 | ||
|
d4fc734250 | ||
|
4bc70766b5 | ||
|
e4877e5ef1 | ||
|
8c5ae108dd | ||
|
eedda164c6 | ||
|
4be55f7c89 | ||
|
577cb53a00 | ||
|
a7c1bccd6a | ||
|
25d77aa8b4 | ||
|
59fd4cb4c0 | ||
|
b8c454b42b | ||
|
a43df006de | ||
|
0f6fa34372 | ||
|
e8a84b05a4 | ||
|
8574442c57 | ||
|
920d504e47 | ||
|
1f3054502e | ||
|
589bc19890 | ||
|
27296bdb0c | ||
|
0e9d0dbc10 | ||
|
de56c31672 | ||
|
335f089d6a | ||
|
9418c0d8a5 | ||
|
23f701b08e | ||
|
b344f34635 | ||
|
017c8079e1 | ||
|
d0cd115356 | ||
|
34ddfba76b | ||
|
5ffcd01c41 | ||
|
096f0e5966 | ||
|
46de0866db | ||
|
d624a475e4 | ||
|
dbf9986d44 | ||
|
0c723af4b0 | ||
|
f14bcee525 |
4
.github/scripts/prep_api_docs_build.py
vendored
4
.github/scripts/prep_api_docs_build.py
vendored
@ -69,7 +69,7 @@ def main():
|
||||
clean_target_directories([
|
||||
p
|
||||
for p in package_yaml["packages"]
|
||||
if p["repo"].startswith("langchain-ai/")
|
||||
if (p["repo"].startswith("langchain-ai/") or p.get("include_in_api_ref"))
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
])
|
||||
|
||||
@ -78,7 +78,7 @@ def main():
|
||||
p
|
||||
for p in package_yaml["packages"]
|
||||
if not p.get("disabled", False)
|
||||
and p["repo"].startswith("langchain-ai/")
|
||||
and (p["repo"].startswith("langchain-ai/") or p.get("include_in_api_ref"))
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
])
|
||||
|
||||
|
23
.github/workflows/api_doc_build.yml
vendored
23
.github/workflows/api_doc_build.yml
vendored
@ -26,7 +26,20 @@ jobs:
|
||||
id: get-unsorted-repos
|
||||
uses: mikefarah/yq@master
|
||||
with:
|
||||
cmd: yq '.packages[].repo' langchain/libs/packages.yml
|
||||
cmd: |
|
||||
yq '
|
||||
.packages[]
|
||||
| select(
|
||||
(
|
||||
(.repo | test("^langchain-ai/"))
|
||||
and
|
||||
(.repo != "langchain-ai/langchain")
|
||||
)
|
||||
or
|
||||
(.include_in_api_ref // false)
|
||||
)
|
||||
| .repo
|
||||
' langchain/libs/packages.yml
|
||||
|
||||
- name: Parse YAML and checkout repos
|
||||
env:
|
||||
@ -38,11 +51,9 @@ jobs:
|
||||
|
||||
# Checkout each unique repository that is in langchain-ai org
|
||||
for repo in $REPOS; do
|
||||
if [[ "$repo" != "langchain-ai/langchain" && "$repo" == langchain-ai/* ]]; then
|
||||
REPO_NAME=$(echo $repo | cut -d'/' -f2)
|
||||
echo "Checking out $repo to $REPO_NAME"
|
||||
git clone --depth 1 https://github.com/$repo.git $REPO_NAME
|
||||
fi
|
||||
REPO_NAME=$(echo $repo | cut -d'/' -f2)
|
||||
echo "Checking out $repo to $REPO_NAME"
|
||||
git clone --depth 1 https://github.com/$repo.git $REPO_NAME
|
||||
done
|
||||
|
||||
- name: Setup python ${{ env.PYTHON_VERSION }}
|
||||
|
1
.github/workflows/run_notebooks.yml
vendored
1
.github/workflows/run_notebooks.yml
vendored
@ -61,6 +61,7 @@ jobs:
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
|
@ -11,6 +11,7 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import toml
|
||||
@ -104,7 +105,7 @@ def skip_private_members(app, what, name, obj, skip, options):
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "🦜🔗 LangChain"
|
||||
copyright = "2023, LangChain Inc"
|
||||
copyright = f"{datetime.now().year}, LangChain Inc"
|
||||
author = "LangChain, Inc"
|
||||
|
||||
html_favicon = "_static/img/brand/favicon.png"
|
||||
|
@ -0,0 +1 @@
|
||||
eNqVVnlwE9cZtzkSh1JKmmYgmTGoGwYS4pV2dUuOGx/CxgRbxhb4Koin3Sdppb28h2TZ9UyBkskE0mYHSBNKpg22ZXANBkw54zQUCMz0dtzO2FCTpslASupp66QZmkD6Vge2Y//THY32Pb3v+H3f+33fp+09MSjJjMDn9jG8AiVAKWgja9t7JNiiQln5QZKDSligu2q8db5OVWJGVocVRZTdJhMQGaMgQh4wRkrgTDHSRIWBYkJrkYUpM10BgU6MzilsxzgoyyAEZczd3I5RAnLFK6m1khAh5sYU2KpgBemXG/NAmZKYADQoYWiIQ4BekoHh0ZaRDQyHDLmxjoL7yqlf/KrEIguTa3c7lnphWbyqyAqANsaZKMNBmgFGQQqZ9J2o73TcHMJsUsIqFzDRJpo2VQRFPM7ICK/M8DgHaEYWeByhwXmgqBLEAwKQ6Dhgo8aIGDKZbXZCbMX/Py2so2NzASYJrB6JKkMJ0/ecQEMde0hUcKuRRIHJigQBh7mDgJUhypQgsPLUDAZVPnV3SPT+EqWAB5x+mkmiX1dDEnQqwWJaaNZ0IyERSEgZkULWDYkSumtJYWBql5VCS8irCFYzJqs8n0BqFCuotL6QAIN+QMFkEKIIGF6PFx0hcjESpHW9rKlJQSEQgZSCBDs2d/SEIaARhLGcxV1hQVa0o9Np1w8oCqIkQZ4SaGReOxJqY8QCAw2DLFBgL7oFHqayofVGIRRxwDIxmExraceAKLIMBfRzUwRdU1+Gm7gOZeZxr35BOGIYr2gnvQhESaWpJoHqgzeQRgdhJI614rKC4mYR33EWIDxJMXV+fuqBCKgoMoJnak9LppWPTpURZK27ClDeumkmgUSFtW4gcXbrwNTfJZVXEKu1nrKame4yh/fd9ViMJIk+x6dZlhM8pXWn6HV6mjZUpAROCciI9gaRpAQhykBtNPdBv58K+gNcUZ3fEvNbAhFPzMFyld6AnWcb1Von79vokNpCzFpbiTGyaf26al+8CicdVqvLaXcRZpw0EkaEArdXcrF4bcOGhMjzmzzlDRTXsq6lPFASLfFz3tKm0rWkTfFtiK6vMEf9PlaqdoQ50OJItFRG1pWysKKkwZMI0eIGX4uDV8iGoLVGoBWy1Ot0rPfVVjdWEy22RiHibamzSaFCA4Ksxhi6iKvdaC8JcYlgIF6/NlaxRoow6yo3EZsIm9PjquQCpRba72+CSn1TaApmh43AiSxswuok9OdoljIs5ENKWOu0OchDEpRFVP9wRxIlUlHl7V2InvA3V3syjfCg97lJZj/W5UFU1QbLJabAQDoNJaJkMBNmm4G0ui02N2EzVFT5+soybnyzMvO4TwK8HETsXJOthB4qrPJRSPeWzVoDg3oNoPvV4aP2g8NWUZAhnkGl9TXgtekRgFd6BtIFh6OOCXimLeVWG0wVQ7ytNU5TKk2HY3GOcLVZLaiTqFTwZEYF9Q3dDQKEc7LWZbbbzEczR1k+9qJgCZxEuSXfasUllAuW4RiU0NQ3w4sq+tb7OioJm57yczOlMuMKedAlzswUUIQoRIMtmb60EWyqhAQ5xHYd4nRnVhd63ppdctKhLvXm7EJZpw6Xy2y2fsWSDL8SXCfJyedmykx6Is2cfGamQMZLl83JyX2tWXmcobWRFWjjt9sdpJ12WkiHy0zazRQBbAGCouwOGyRsdpfjrN6ZKWRHp5AoSAouQwqNeiWhjRRwoFVveEUW0maxo8wVokFMsSoN69SAR9ADlQsNogT12dpPBXEKUGjSpVmv9Xgaq0uqKstONeBT6Yt7xfTfjB5ekHkmGEzWQQmxQetNDQ/UuSWYLCvHa0satZMui9kKzRDaKcJqpUknXop6YtbafbJ36W2/B7AIe4zSBsKWIsxttVqwQgMHipx2K0Gk/oxsS6an0OXcL5bvystJPXN311UJ14iFg5/X5z2zaueecy3vN7ePLmo+PWeQfmrhmiRnO7HiFrV58Qe//MYrj1yrtGz9ztLty5bf+WTtMqz0+0tefnirTe37Y+C/nwxuudS/8e6/N+7/jDv+zs3lQ6tuDw+c9YIh8/B/8l8rHBh/NXKy+Mq+j7cGX3azA78bWtGIL/m586HizgXfwo8M8fuv/zY50XqPeGnPRwUuMnr5saarV/tfWFy66K83Invn39u2+9Keiaodfx5a8O3bO/JyOz3tc4eb+Px9i1bOvbpXebJ5ZOhr5JwDtVjI98Ib4mf55TcvPLnas+WZic//8rcrv/59/b+4R5/t45g/jT93uOLTc8Pt+fP2bTkx9rOHx26E6i/n5Vb+5O27xX+PWBNPT7zozaupu/NAd/QXF4H8w7HiU7m3u68L7+Ut6aKU21/YzZe2foQ/+tPXD9aurmi+cKH39CPSx+cxX/dLo6NPr977Xan55gOHv/6j/PZlr++c99B7ptEFo4fKz18/nP/8wYGRwFj3TjBe/s/357k/PLT03aEPPyh/583iisSt7z3L7j7x+K5thQvzP51f/4Th8cF7AweazxY+eKbv3qq7OcTzo0CL7pgf+rF32a8Wnmq/039s7KZhacH+GwfAuHP/qSfmvzK8i6feHj9ycaXpbN8fwhcvjhYfnvjH3W++u2Js5VOeK7dM6Jq//HJujrk3ea14Xk7O/wDBk1ep
|
File diff suppressed because one or more lines are too long
@ -1 +1 @@
|
||||
eNrdVn1sG2cZT5sxJg1NXVWl0iTWkzexUvKe787n80eWjcRJvCRNnMZJ1sCQeX332nfxffXeO9txSBEZ7B9GywlEN5WKZkvsYkLarqXNOjIYowiham2lDS376KZOY6qoJm1ITEMp5T3HIYnaf/gX/3F3z/t8/Z73+b3P66lKHllYMfRNc4puIwuKNhGwO1Wx0D4HYfv7ZQ3ZsiHNDCSSQ887lrK0S7ZtE0f9fmgqtGEiHSq0aGj+POsXZWj7ybepolqYmbQhjb+1OT7h0xDGMIuwL0p9c8InGiSXbq8I9riJyJfPRkXb11x/ExlaiLJlhMmzYFCK5rl7CxSGGnrUN9lMrfnWtCnHUr0Aa0KUmKy8faugHVM1oEQXlJyiIUmBtGFl/Z5kepIHXiPA/bbsaGm/5JckfzxjgoKCCWas6ECDkoINHRAgQIe2YyGQNqAlFaCao8fMrJ8LCoxZBP+bl2/y/62cbxHklqHWynEwsny1Fc2QUA1/1rQBb3jl6URkyRvbFoIaETJQxajGA80khPSik1WGDk1WZAQlQtcrDVtmZAPb7vxGCh6HoohIYKSLhqToWffX2ZJiNlMSyqjQRlWCXkc1grvVHEImgKqSR+UVL/cENE1VEaGn94+R8ubqNAVeW25VV72yAOmNbrunEwREW7d/YJycFZ1i6SBHMyeKANtQ0VXCfaBCgqds1vQvrVeYUMyRIKB+Dt3yivP8ehsDu7N9UEwkN4SElii7s9DSBP7U+nXL0W3CBrcSG7g1XV25li5AswzNn9wQGI/rojtba8TZDc7ItsaBaJAY7jRTFg0jpyB36dNUSsyk0lqrkYh7GyyoMnQgsvhOmAoPjPfE5K6BXEc8m97XGeO0hJTePVAAbIiLBAOBIMcBlmZolmYBcvql3GBAlNsH421DhfDI7t6SmAwybK/V7bAdOb3EBfYGeh0uI/eMFYdjI85jQ5ZhyomOflW3u5VMHg2hIh4Z3tsed5jQiP1YtygXWiiCzskrUqusBVA6nuzXVCFt8bkIEvQunBrtpEfp4fZSfJ80UOht0zvbpGTfOng8HwRMHaHA8GHG+82vckNFetaW3eeD4cAxC2GTHBD0ZJlsme3gqRnCQ3Thz5X69Hsu0btG4aaZDsJJd7HP0JspjqUSok1xDMdTbCgajEQZgYr3Dc3F6mmGbkvBk0MW1HGG0LBzlfIVUXb0HJKqsduSfdEjO+mkB58cT4CKpoERqKNy5/aCwZW5D7o7Tq2cLEBGCtSVUi2tu1hjfaFULEiiI0lyvqAxkRIfUNLIETOn6y6mZXhpCCCgYXeGZyLMfF21SrwqKZYBLAMY9lwRkHOOVEVTyIbWnvXbh/gGyW4v3GpgGzlE7qnySjteXm9gIY0Q1su9FoWPRCK/vb3RaqRQJExafW6jEUbrsbCchhduNahHmGVZQcNzxVUHoEju0oNESDHhtBQgzOE5nuEy4SBMh3hOFNMRGBTFgMC+SIafIpJAXjtNw7IBRiK5a+1xd6lZg0VvyrQG2GBAILW2UIouqo6Ekk66w/CqwC2UaSHvIjge6wIxKJKxnKwx0K10jPa39XXHzuwF66kEEubKPV/RDawrmUw5iSzSGbcqqoYjkXFpoTKJNdg26p4OS0KAC0REQQoJvChxoJ0MotVo/yXejDdrK1Al2POie0oOtPqiPB/wtVAabA0LPMPU/g18r+zVqmfPb8I7fnhXQ+3XqO55pf8dZsviR1/reuShnX27vzoRo6Y3bx32z7bv5F5Tr14tPp09s+3Sv1t6hOv6F7bvuHL4s78tfpK78+gTv2o6qhp/vOfy9ocOHbz40XevXV9+4cae/Y+2vlRaeHHh4/2t1w5t21/9ztClGztOfzofPcF3HLy6U5UPX7rrwHzqQPWDV8898nrJd+fY5YMv0D3Tvzz2l57tH559bjl694TVNAreFxobikeusGn5k1984+vP/u6BRu7wPW+kW+44uvXze/c8tXTyGWrXe+7Ppr79zODNrtfaSneM+8wnr/34lQd/TgmN91//+1b6cfVPb8IPbzx9/Gzbww57ufXiclPq3PsLxtb7ygyNpo99JX9k0zt9P3j8pjPAXt4ye2/p98x9I8OzoafGCh98efJh3J5efrk6/4/Elw43nQ9fnAgl//rm/W8fOXPhn29NPvGvZ8+Xp+9+94uH1Pzg8oWp2Cl7+tVtP3l7+Q+cUN71o66534wsRScaGxpu3mxsCP705MyBzQ0N/wEq8t3d
|
||||
eNrdVn1sE2UYL0MBJ4ZgAEEEmgNBYG/v+rGuLRJXxpgT18HWgJtBfHv3tr2tvTvug22QQWQoCQh4fMSA8jHWrdJMWBk4BwwVI98BMUCsCyAQGQaMkQXFBJ3vdd3G3PjwX5vLte/7fP2e5/k9T7osvBCJEstzfWpZTkYipGV8kNRlYREtUJAkL68JItnPM6FZufnuKkVkYy/6ZVmQHCQJBdYAOdkv8gJLG2g+SC40kkEkSdCHpJCHZ8p+SEpbTARh6XyZL0acRDiMlMmSQnToEI43FxMiH0CEg1AkJBIpBM1jFJwcl8hlgiaRUamMJfEvB+EUkV72Iwm/S3g9G9T8aBd6CQbRK0R5SqddXIYNJV4RaXzRKVDEAL7W3g6iIxdFCPCQMZSwxWwQMSw08KKP1E6CdiJxckFcFlL2K0EPyZAMQ2Z5BVDCShiwxHIgCBlW4jmAkQAOyoqIgIeHIlMCA8WGIsFHmlKtlFAK/psVUf6/y2de+TxMAJ5BGlo6ABUGATNIBdiMQzIIQBmTjigP+xFkMDMv6QaH/Lwkq9EebNsNaRoJMkAczTMs51M/9S1ihRQ9g7yalwiteYzTWY0UIyQAGGAXovpSIMmQ5QKYg0DGxeEVWd3pynXPz8qek+mqaXeq1kFBCLA01MzJIgyuNsFMoBW9pzii8RfgBnGy2uDsgEnOKsOzw+kpg8VuoOruDx2AGHGNEJcfuF8gQLoY+wGJuVRr2o133a/DS2p1DqRz87u5hCLtV6uhGLRaumUpKpyWqBrOmNUzXELYGS5sNhiN+Il28yyVcbRa7YUBCUU7m9BpEzFRJjOgrIAyNnTzjWSxDNA8DqFWUrs6KhhAnE/2q1Wp1rRPRCQJmDuoogabyYq0LISbiU4dCyd2xI7cmV1UGB6ajhurNs0Q2RS90aZ3CqIeh07VGy0OM35S9Vk57tqMRBh3r42KukXISV7crMwO3oRpv8IVIyaS0StjYkRXxiKOH2CDrAwS6xE3UjuqIQtFUbHxD9UUURBXRosYMtvt9kf4xZVBsrpXyw9QFmC0uRNZmgp7j8NygoLpGd+0CVQ1GiqMa9Ij9buwddiMfwybByA0F8Ym9GaNR60HxGpbPNrkR+t3QUzYTHgcmwdATC2M6Xsz/1f52gONe4jm/YVr19Y/VPuBJYskOg9YRj2If8+njMYMV5bLLOVbTJkSnFZiQaKL87oau/zjpQ45dlGc3ZpdbJzZbjWnMmYPQB4vAyx2Wxqw201G4DGZbIzFZkyzMNaqhSxUI3jI9T6e9wXQbtoLaEjjnd0+g2p4eoHLmZOdUfsGyOM9PCajG2LScjyHavKRiMdejdABXmHwohVRTcYMkOcsUPfazSYLMpkpK4PsNq/NA6bhBdUxjZ3TFtK2dPzfwTt45kV89U2fPWNWDdDFP32Z2c1FI50D/xpa2aLUbZl58tDQJBAc87z/6eDEGVPGevufWWMdm3PhyN3LJ7kXXr2xPj2rhfh5+Vl6VcV7Fxe1Xjn69ZWWXz86unbLkqYv2m6nb7sZ/uNynXPKmpdzo5OePNJvVajRGZm9eg+sqCh1fb9jpnvq7l1zGyZv/Dg2p/nO7976S1zUOSFmG/nZUhe548iKTYc2DFsatb49dXJFTtKC7V8mu4ffTT/93YJ+TvbHyXcvtQ52REfv/fCpAeMHnxnhqLw64bXFwvmU67oLz+2s7F+1QT/k+sDKDyadu9Woc8713Hhmv3uEvHT5xMJB+ed+G2DIvrP/c+eOW+fPHku/dsw95PXt12Kbg/2cyQPde7eNzW+tpd8tuDKx6NazGadOJo9qLH8pr37FprCJeiJ7a+uJ7HWHR2+11amZx/Vnxq3eSs5+a974RaeHjaouaD5OrTyy6Xz65oaVdsuqoz/dnVv6d/P2ze8fOBxaF808vcS3jzmoO7F2Yz0dkHJ2frX/XtNUeIret+X41nrP4qbGpuJv1ZtlLclkQQmfdYEshbfb6LxflJaLF/+8N0ina2vrqyu2XU1an6TT/QMhrxTS
|
@ -0,0 +1 @@
|
||||
eNqNVQ9QFGUUB01TEZNySkvh3EFNYbm/4t2pJB5IJhwEV4AE9N3ud+zK3u66u4cCYolUBpYtCIikph53chFCMpGjNlqTWqOOjaVcWua/STNrpnIsNem74wBN/HNzf3a/937v/d7vvbdX7i6CgkhzbHALzUpQAISEbkS53C3AxQ4oShUuO5QojnSmpWZYtjoE2juJkiReNCqVgKdjACtRAsfTRAzB2ZVFaqUdiiIogKLTypHF3oulmB0szZe4QsiKmFGt0uiisV4XzJhTigkcAzEj5hChgEVjBIdIsJLfIhXzPosEl0rI4v8xYglQJATaChUSBRUkRzjsyN2IlUX3+fceIozIOQQCnfXZHAKDjn3fRqy3Cp60cdZFkJD8FaA7pQjsPANj0CVWVpZblosYcyT0YQgGOEiIa/HpuMixLJRwBkhIJKzMTUFAIiV/DApzUpwoye13qbMdEATkJRyyBEfSbIH8UUEJzUcrSGjzRfEQvoh++WVPIYQ8Dhi6CO5YiosSoFkGiYZLtB1yDkluNqda8pPmv5xodvUEldsAzzM0AXxw5SJEriUgJe4r/W6zxyc4jrrASnJnfC9NZVox6jWrUMXoDDGqtttTMwAxdvF++67bDTwgClEcPDBHsqsH3Hq7DyfKTSmASM24IyQQCEpuAoI9VndHlYKD9RUqu01pd6cLGPvSubUxajV6t98RWSxmCbnJBhgRtvc1oQ/j0ag0WlwVi6vUnXfEhpJQjBMcSiFvVrX2KshAtkCi5K0arWabAEUe7Qdc6UIwySGWO1Ez4aGD7sBQb0ld0D8KY50JqLHynnkCHa1Q6xXxvKBAqacr1DqjVmfUxSqSUiwtpkAay4CNarcIgBVtqFmJvXPjJigHWwhJj2nAifFi/RULKD9D22kJD6wzaqTvVnbqVCqVd/J9PQVoR8r4Mjq1BoPhAXGRMlCSO3z14SodrtZbAlWqFw6ch2Z5BxpP/6MhwMrlY4V4TXugfz+3Xszkh8Dcg6FmoXfKQGi0andRbNL7s0U92L+fYgAz5WEw96AYu9CrGAj+P/l6EkXex/N24Xq8Fff1vqdknkDncZqUd6PrfJVabTInmXXZMxLjs6hkQJloW+JC8/Sd/fE5oQCwdIl/un04b6TWEKudTmqtOLTaSFxn0M/ADQaNGrdqNHpSp1fP0JGxW4toIHvQkisKOK6AgdsJG04AgoJ4zw7K7oRsc3zKfFNLFp7OWTk0jBaAhpblWOjKgAJae9lDMJyDRA9aAbpM8/D0+Gy5w6DV6KAmVg/1ep3eNgPic9EDqncb+7bN6XtK+//NVqCdF9DRl4NgRNWwIP9rMPp0dzMZC7iTqpG3oiqOmN+zlQXFjUpc4t1qKvXWTXw1ev9xLMPz/OGQylvX4ouqi66MWhFcfT2PDH9nhyt2T8flL721y3/89c0/xc5dN67+e6x02d9PLzkdt+7ri3HpDVgY85Vl8pD8Ec+eTZC/s7geq9vdnHJOnbN+T2PuoUhVzZG6qgv/eK07dbX0oKj29dcPnAuzXEotjlu+SDxtCF4tq4ZTr4cGl78aMXvflauhXUfPb8wC/Pf7JlOPXpikyJkz9ASxQPz094vF6WNqvv4rKH5DSXntzpmgdSSz8hpQhDTOppJ+mlDf8GLyN4PPmkd8cuTYeFbzRdH1w5m2yOTIEWDmoPcPVgEpWvFX4+Mlq9a1pYydOXVHhGlL+CtrT62JmzbWeEJ6o2N4WUTX7xNDsleFP/NLeefa/ftOhO9SHTwY3fXEeOvQAx+YXrc/v2fvyLQR5qkbsZqMRYnmcQkvffyouvpkXcKEzp/GlJyqLxvTmt7wuXL/4nWj13QMKWZG57xAnumsWX2rLfP8m+8afsg7fShz91mrrYL/Y24FMYeaNTFzdEZX9aqQ1eGzfjPFnZpzyf1zFqgZtkRzQTuuamJzyE3M+XYluJh2Oq8Ry0l+sqI6qmH8dtw1euqKBKay++zHB7411hmWjeu8/GH1scK9oZWFe9MjsnRntk1yduc56spvlniHNK96bp5IbrKua3Rvqg0LXbxh2KS25tD23EIzMyp081vJp95rNpTmN8y6teZy0jm9LbewK6yqub4+M7OWbrtE/RD5dFNr94Ts8TfePvrZlTdalFEF9U+lpr4W7JulwUFReZXHyUeCgv4Dl57hHA==
|
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
||||
eNqVVQlwE1UYTkG5VMQqtMoIIRTQmpfsJmmbZCi2tBwFmnSgaCmF8rL70myz2V336AHWoYjjiBZdULAql6RJibUHVqkn4gUiakcULSA6g6Iz1dGRDohcvk3TAymHmUw27/3/9//ff+6qcBkSJYbn4hoYTkYipGR8kNRVYRE9pCBJXh0KINnH08E894L87YrIdEzyybIgOc1mKDAmyMk+kRcYykTxAXMZaQ4gSYIlSAp6eLrycJy6whCAFcUy70ecZHCShMVmNPToGJyLVxhEnkUGp0GRkGgwGiges+DkqESuFDSJjCpkLIk+nIZsJFEi40F62Yf05Qjih6hnOHxkJD0TwGadhipjLzh6g9ESr4gUvugVKCKLr7Vfp6EnIEVgeUibyhk/E0A0A028WGLWToJ2MuMIAzg3ZtmnBDxm2kzT5lleAZQzEmYtMRwIQJqReA5gToCDsiIi4OGhSJdD1m8qFUrMlpRUQqgA/w+FwzFQkPKhYi05OF39okCCDwVw1VhDVdWSqiU4tTyNtJAoFio0AlaQArBtDsmAhTIup6Eq7EOQxjU/prst6OMlWW25rI5NkKKQIAPEUTzNcCXqqyXLGcGop5FXsxKhNIvRRlEjfoQEAFmmDL1WASQZMhyLqwtknEFekdUdLnd+8aycB2a4Qt1G1WYoCCxDQQ1uLsXkGmI1B1pMl4sjWmcAXEVOVndl9tA051XiruT0hMnmMBHN/V2zEDMOCVH52/0FAqT82A6Idbwa6gY39tfhJbUuF1LuBZeYhCLlU+ugGEi1XRKlqHBaoGo4K+9ydzFhr7uw1USS+NtyiWWpkqPUOi9kJdTSW4ReTMRCWKyASAUEuesS20gWKwHFYxfqNqKxJ4Ms4kpkn7rdRpD1IpIE3GDo0RCGyYq0KoiLiQ7sC8em72X33L5WSAxm48Kq784UGaOetOszBVGPXafoSZvTanOmWPWzcvMbsmJu8gcsVEu+CDnJi4s1o6dvwpRP4fyIjmQN2DEdhr6IReyfZQKMDGKLBxdSO6pBG0EQHZOvqimiAM6M5jFodTgc17CLM4NktVWLDxA2QNrzu6O0OQoH9sNwgoLbM7rDYqxCGivMK/ma+n3cejCTrwMzMMMUorBjykBoPGqXUayzR73dd239PooxzJTrwVyBorWwQz8Q/D/p63aUdBXN/onr1tZfVfuKKYvEKg8YWn0H/y8mSDLLNcuVkuaF4hx/djZX5vJYCly+N/vs480POWZ5tLs1XEeS1ZFqTaGtHoA8XhrYHPY04HBYSOCxWOy0zU6m2ejU7WUMVCN4yPUlPF/CoibKC6KrG3TPoBrOXuTKzM3JaigA83kPj5sxH+Km5XgOhRYgEY+9GqFYXqHxohVRKGsmmJ+5SG11WC02ZHE4aA9B270OCkzHC6pnGnunLaht6eh7txrPvIivPh40afyTw3TRz+Ci+Z9xR4mbz42evnHpO5k7c9sPjW+/o6pozdE1nsy5RS8W1IrZpXdP3Lc7YWfasK/W69d9WDvhzG/2tkM1h8c38uPeOvf32b0bz0y5MOKH3WUHz39//sKRjpQvNjxROnqHx934kOPjuzIWPpBFfttG3rimUZnmGd56tu3tI0z8hCO/N53elfbt2Xf9L7k9SScO3/9lU27N5uSHqwpPE6MOrE7NGzKmuXp0xUebdeEniPsKVq3VbZto/GRv/NjnicVF7+/N/2BQXMLzHUO2TnGT6xJfWvTm8X1vFGWi9oODGlYKJ0zbE+WW9iEvLArfOPWXIrmzfuuGT89lxLX/EhzBLt4yd5x1z9Tm27aufjLPvmCpXrz/u7vSudCGIa1d6Ttaun4k195SDxN+YM8mVdes/3XmzlP13yQsq7u4ddScxsyxw2aOnBAfPP7PTW22FzeamroKs4eOGbS25Smjr+n9abcP77pz4YOqNKf6hrdWOw4kul9ZOW/8HUlxc5c9vbAA7d83vy48NfUZyz2b7t1d+13c2I+edY1Q3Urx7T9PrPQv+ynx2IbWXJ93+Ij0LTnLZp+ccTS58/vDnS2PRR6ZV/jg0tDnnmMFgYzqtaV7MipOtGUkHDSuXJJc9vPSC6kukPv4vPqcos7ZH+yZM6Y85570MfHuxufiqeP7l//5+2tU7Zq7jZtmf/36rpqTvw51/j2vc/bIP06mlW853TSplji38T2uKPnUyOaLXX/9datOd/HiYJ11p+NMzWCd7l+O/AW4
|
@ -0,0 +1 @@
|
||||
eNqVVQtMFFcUhWBNSesPatQaZTtQCHVn/+AutFVYkaDlIyBVROHtzNvdkdmZceYtHxFT0aZRInGMn0YiFllYu0GEQksVq1YtNRVNtaaKMbSptpjaRq1S06TVvlkW0IqfbiY7+969597z7rn3bZW3BIoSw3PBzQyHoAgohBeSXOUV4So3lNCGJhdETp72ZGXm5Da4Rab3dSdCgpSg1QKB0QAOOUVeYCgNxbu0JXqtC0oScEDJY+Pp8svBbAXhAmWFiC+GnEQk6HUGk5oY8iESllUQIs9CIoFwS1Ak1ATFYxYc8ltQuaBYECxD2OJ/JRDzoESJjA2qkBOqSiHAL1HFcHjJSCrGhcMmEJXqYbB/B6Ml3i1SeGPY4BZZvK18JxBDB3ILLA9oTSlTzLggzQANLzq0ykpQVlp8QheujRY53S6bltbStDbVLpCljIRZSwxHugDNSDxHYk4kB5BbhKSNByJdCthizUrBoTXExeuEMvL/oYjKyuWVy3HVeBoqbCkWuGlIGsk4EsM4iEgWIKwUUel1QkBjOfuCJnmcvITktsckOgAoCgqIhBzF0wznkPc7VjOCWkVDuxLFRykR/T0g+4ohFEjAMiWwvYyUEGA4FgtHIlwc3o3kjzMycwtT0/JSMpoGg8qtQBBYhgIKXLsSk2sOyEkqRX/c7FNEJ7FAHJI7k4ZoarPKccNxKp3GZNHoWh9OzQLMuEnw27seNgiAKsZxyEAzy02D4JaHfXhJbkwHVGbOIyGBSDnlRiC64k2PnFJ0c8pBZa816/F0AeNwOq9Ro9fjp+2RyFI5R8mNdsBKsG1YhGGMz6AzGEldPKnTdz4SGyKxnKR4nEKu17UMVZCFnAM55QZjvGmfCCUB9w5c34RhyC1VebCYsOeUNzBYezMXjrTCVM88LKz8xXyRUav0ZlWSIKpw6jiV3pRgxI9BlZqe22wNpMkdVai2XBFwkh2LlTLUN17K6eaKIe2zjtoxvcTIiUWcn2VcDCIDdwoWUlnKHpNOp+uNfqqnCF24MkpGj9FisTwjLq4MRHKHcj5SZyL15tzBUxos+aPnYTjBjdvTfz0FWDUprDCvN57pP8JtCBP9HJjRGRp1+b0xo6HxqD1GsdHszzbr2f4jFAOYmOfBPIGiIb9XNRr8P+UbTBT1FM+HCzforXqq9xNL5gsoTzK0fBj/LtTp9daM1AwjvXgRYpKd4hJudn4GPd92cCQ+vtQBx6z2d7eC640yWuKNcbTRRkKbnSZNFvNs0mIx6EmbwWCmTWb9bBMd31DCANmHh1zl4HkHCw9QdpICFL6zB2dQ9s5bmpGUnmZtXkJm8zYeN2MuwE3L8RxsyoEiHnvZR7G8m8YXrQibrPPJ7KSlcofFaDBBg95mMtvN+LGRyfiCGprG4WnzKLe0/y91HZ55EW99FXwjovrFIP8nhM7+sua7uS//M7lYU3B+8bKSSd9M/lyduiM8eXvi1ei8mLrm33vDfPcTFxxKaTZvvnbv7772t/qPxXaAVq6zb80Dw5XSZRFH1lZMe/vG1J9v3773S46zKjPyjNpwoiY5vOPawffsbM1P9aHWtG3W6bbuXc6BtQOtnxDJRy63dN6rHSiZvmtitXnjB39+/Vf0r3eLucK6+DcHxsQk30xb98JhR8jM1shjq6JyQ7StMXMvhef1W8+MO/nOvi1CVXjFhEhff6hNE5delBrdtd5bty1o+48n4zMmLv308na6qGVD6o7xupX62oHtXX1gz6x1M7wpwX8U1DJ5XTU94y79MM3jk85V7MoxT7hwbElP2Z6YU9fHlAnZsSkfXZ2y9f3VyTNevb4gI+pWgb49M2v8S59NTToYlpjSVSOc7uo5d0etDlXvTYxtHLskLLWAmLll4KaGkk+eTfntUGRs0ZWNZ9ZuUme90lM9fnZEz4mj6e3151/LQQsunp74oXT2IlpYI+vO19vnRmzafLq7sL93ygV0fM2Y6ig0RTOntmhF9+6wfPnbsVs77vjCVgQ39C8Ii+3Ilrjk5fdXVIxXUwO7d0ZufL/kzq3b1nCiu65kUWj/4a7ObQPj3j1+d06e7+ikAl67cw5W9cGDkKD9dd/vnhoSFPQvid/bWw==
|
@ -0,0 +1 @@
|
||||
eNqVVntsHEcZd5UqIEJBEVFLkVqm25AIenPvc+8OFOE6TpqaOIltWhqIktndudvJ7e4sM7M+X4wFsSukCvHYBglERKsmZ5teQxuLKlVUqKo2EqUU2qrQyJUKfaj9J0X8gQQSUjDf7J19NnEeWOe9m5nv8ft9r9mpuTEqJOP+daeYr6ggloKFjKbmBP1mSKV6YNajyuF2c++ekdGToWALn3OUCmQ5lSIBSxJfOYIHzEpa3EuNZVIelZJUqWya3G68ef2hCcMj4wcVr1FfGuVMOptPGEsyRvnrE4bgLjXKRiipMBKGxQGFr+IT1Qj0iaLjCk7ir7KxnUpLMJMi5VBUpwS+BGI+LJlEzAOzZWMysawc74C25KGwYGP5IBQubOtn2VgiFAYuJ3ayzmrMozYjSS6qKb0K9CoFDD2ITUo5oWem7JRtp3ZWAlxnElBL5mOP2ExyHwMm7BMVCopNToRdJ24teTioprKF3nQwjv8/LaBjWMRy6EEdHAjXChY0cKgHWXONyckDmnYnmkRKJhUkZ2VIjdHLhQyRIKBESKQ4gtASeJBQsUroJlBAhHIbyHJ5aDeQTRqozpSD6LhFXRfsojEmmclcphpJpD3IWgOBYQIHpgAIyHRD2lYC0gGYYkKEsm1RIqkEVcDORsQSXAIi1bbTOScClB2mAJZvd4AiKNoGclnVUfFupU2qkUAyrEJhKeZXkUVcDzYUMd0ubQiGzeISbzuJbWjpjqJ2v1UiGwodNnWNecy2wQCvxCvgn2hzMUXsXoa+dJhPEXPd0GOQwCXFqqDUhyfkIgZZ59yGjW5uNQBBwZvPIR02s4gGpj0JwnwNnQtgwAWSFAh3SWAAiVzOaxLw13S+Aioq1FIJQOva3SS1I1QH4xJV2Rh4d3i9Ay6GtSrtumBsjd7jUqc8DNoURdurcgigp1rAoWSMgYTp8rqPiMlDlTRWlN+1NvNI6HlEsCO6m4nSNVmAMAlbgrUDkwdgUkDIdIdaLgltinO4gKFVfKqwSxSky5icAyw2jLC/9GxsOoA7mr9kLD1JLIsGClPf4ppf9MvqERYkkE0r2krL0hbjuRe1apQGmLgQq1+NYwgI810YVljBQACS0WNDe0YP7tx178DQbNtodBpi6HZSlzoM4E51WGPN+NLjlo4Nhp7zVfR03xLM1N4GDFkfpZP5UjJ9eqVrlwDi2SA+f2blQUCsGtjBnQEezbaVn1gpw2U0s5tYe0ZWmSTCcqIZIrze/CqWIvQ10Wiuf++l7jqHy+7mcslMBj7zqyzLhm9FMxXiSjq/nIRlnVY2nc3hdC9OZ55eZZsq0cAWBxfRo+knliII46WqnKiZyWTTvxBUBtC2dHoW9FQop5qQTfryi3Od2+TEnsFuLXy6uR0yG/1mh2AJlCmivkAg8F1AmXw5ly8XCmjn7tFT/R0/o2tman4UppesQLYGlgpnznJCv0btVv+aJbNgdClD01CXeUzhzkUKmdTLqJlPp9MLW64oKagHodEem7lSqXQVuxAZqqKnND+czuNMcbTDMrd/bT/MD0Koz/hO7qCa1agA1xeuKt/FtqSz5Rp0LoMwv39h61ra0GuXQJwpxt7uuLp8F2JHZ+u16FwGYmH/AlpL/X/C13a0+QqSKwPXlkZXlL5syFqdzGNmR7+G3wfTmUz/0M6hwogpB746zO7KkcFhNnTk7rNd+/AmQ3x2JK5urbewOVfqzRXsnImpWbFxvlS8E5dK2Qw2s9minS9m7szbvSfHGIla0OWoynnVpU9aFRy/iuB2D0Zz2+8f6tu9q//U1/AwNzkU4yiBovW5T2dHqIC+j1rxPQ6TVtDZ/h14uO/+6KlSLpunWbNgmul0sVKy8F0woZa6cbnbmnpMx++RR6Hn9YV8bvGz3/toT/y3Dv4XF7/xg77BF7686YHFe376jx0P5QcWTn7klRMDG6PTb33p8MsfeD/714033LZ4YP+Hjz68/oPjN1f6zj9v/qh/5MfP3HJh8J4/v/7eCw//8OLfLpy/WBmfPvrH238+vPndT1w//fnvHntne3nfbz82fduZTYfemR7E7x//1vFBe8sfHj/R+v2J1qG3b79j/vlnUz/ZN/PIyMXEja882Ohd2Lb5pebf7/7KP9/6XbG67dyfXrrhjU+ue63Y+6nSgx/f8O6Z1/KZ/3zn1czGian3N031lb99ZkP51mO9n5kwj+ILbz73xcVd33/kRXbT2bNDm27aFtUW58XEfTefa54/duHfGzTLdT1/Xb9+377renr+C7bLfqw=
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1 +0,0 @@
|
||||
eNqVVX1QFOcZPyVNIBPUpGDbyUe3ZzpUw97t3u19Qc4OHHhBQJCvYhql7+2+x663X+zH3QFSjXEytVh1zTRJaeq0ctxZQsAERdHaZExUEtO0TTK1YEpnNE2b6EzaVBtRE/vucRQY/ad7M/fus+/z/p7f8z6/93m3pqJQUTlJXDDAiRpUAK0hQzW2phTYpkNV25YUoMZKTKK2pr6hV1e48RWspslqkd0OZM4myVAEnI2WBHuUtNMs0OzoXeZhGiYRkpj2iQWbOq0CVFXQClVrEfb9TistoViiNm1o7TJEb1YNxjVrYWZENgNVWuFCENNYiMUgQIOCcSIyORXjBIRm7SrEZtenP7XoCm+CzBpFyGV6tM4Q12VeAowtxkU4ATIcsElKq920ZNMyExAQebvG6kLIztgZxh4My3iMUxFvlRNxATCcKok4YoSLQNMViIckoDAxwEdsG+VWu8PlJuQ4/v+tsnZ1rUfMFYlPp6OrULGmvwgSA9P8W2UNpyQzPRGZJBpVTYFAQEYY8CpMb54goyqa6OgrYfN0pVgIGFTjScuSBCupmjE4v25DgKYhAoYiLTGc2Gq81NrByYUYA8M80GA/Yi/CtCqM/giEMg54LgqT06uMA0CWeY4G5rx9I0pvIFNb3CzLrdP9Zlo4qo2oGQdrEImSCnttOxKYiJE2l8NGHIjjqgY4kUeCwXmA+CTl9PyxuRMyoCMIBM+I10hOLx6c6yOpRl81oGvq50EChWaNPqAIbmp47ndFFzWkBiMVqL01XGZyNpzTRhI26uV5wGq7SBt96UIcnrcYako7TksIw/gVkaQlKcJBY/yzlhY63BIS/FJN0NxgN88CHUCFKgct3tr21QF2VW2kLNgaaisPOIQaJlRVG8NJj8PncjpdDgdO2ggbaSNxqK9hInVOmi2tC5Y0xLxNVZUddL2LICuVCp0si4gdDmezs1J3hNnVG+ONgSb9sQZFktmasjW8qFVw4ShsgHG1qbG5NKgTnibtsQqajRVjiJ0e5Rg/KzhhKFi/RuDdIYWK+KBbXKW2rCu3rbM1lnYE25jaWGWJWF7C1FfPoUdRLpzIMHQTlJcwn8EZbfBQbNVYo9fpJfYrUJXRAYFPJdGWabq6NYF0CN8eS2Vaxr6aylkJL02UIU0ax6slsRBzkFgNrWEOwkFhpKfI5SsiSCxY3TAQyIRpuK0EX25QgKiGkQzLZySfolldjECmP3BbsR83xY4qadJHxxOHcVlSIZ5hZQw043XTzRKvKBuePlk4ailA5DrSYY3jadXHOuIxhtYZho3GBMLXQTlRd9Pp8MHMElmRzDCIEC4g7ZI+kiIGM3MzyutH2RI4SeAEeTSOo4MOeU7g0I6m/zM9WzUSLrTdR2510KQIRN09OV2P3851UKCAFGsGn0WhfD7fb27vNIPk8fkchO/ofCcVzuVCOgT1yK0OGYSEyyeoA/EZf5xjjPGHkdHi8RJunxOQTreTIhkv6SF9XsIXIimHz+FyAdcoan4cjXDMcsqSouEqpNEFpbUb44UCiJtdxu8kXU43SrUY3Rw0rzOwXg+VSWYSajEmK9C8CIYCq/AAoFFbrk8r0EiVrVtTUl0RGGnG50oJr5GnL8eUKKkiFw4n66GCCmP007ykM6hdKjCJsOpK1hkHvYzb6UC/cJhyUDTjwEtRI5pB+5/wEmavTQEecY/SxjDr9FuLKMppLcYE4Pe6KYJIX6FPJs1cxdaTC+/8Zne2Jf1k8XXV0jninuPXv7fk6KXzeVP8xAdP/+Jp8NqW3O8cANns9pFDY/seebd6eN/Nrt3PuL8x0b3o03/593RsWmHZEQtnnRrZP3Lx9Q8vPHf99OlznT2TXwxc+vzKtXPrb1yroDbtXfzl0PLhrS9+l7lSrsPRi1m//ns8IVgb3hocvnIjeqh54KMXjX/8ee3YK8mzb+4p+PfHm3w/m6xKni0AZyb3PJ97Y4XFsveUZ9fXH/T35Fad+Vpi2b38GL+h21J1fve9T1k35PX2fFb1+M7VbS9cC1YeKHj/2fuuZk/cd3XR/QsvdZ54+Jf35D+Zuy2HKNr9rVOBnM4hHGt40/NSaf6dC8cfPHX3tp7/YEsblxtZ98dl5SujjWN/W+nGAu/t3r/l7OKF2V1Y3o67Fy3+9h2v9spb/MnBOz7Nznr/hS136W/n/KF/8z8/+fhkfu9Xu/0lP5jMefTqCjj0aPUzPwrlrCyS2vj1F7IKBk60t53549QDPTuEnjc6hSHb5sK1/uyPXju/5ETO5h/+BJPfKxj88I1Xzx/atWFCsObtnDpWPupZdv0Jy42Heh/6svL3l7cf23D4reIfj8vun//lgdzfTR195/LGNutP81/v4/2P/LXtg6K6yyW2d042H8RHh6OPPzHw+cjFzqWn87v4ySN5BQXL7lr7ycihXbl9K6cuLPjTF9HRV87ufPZwd9/yvd2bUcFv3syyNDGD727Pslj+C1CB3ug=
|
File diff suppressed because one or more lines are too long
@ -1 +0,0 @@
|
||||
eNqVVn1sU9cVd4DR0Q2BNm10Slk9M5UK5dnvy3acNGsSO5gQEpskREmqLFzfd5334vfF+4jtMG8t7R8VbJ1epMLUqa1IHbvLXKcosDIYdO3E1mls1dBWmjGBVHUaYk3VMlVlzSp237PdJIJ/5j9877n3nHN/55zfPfcdKk4gTRcUua4kyAbSADSwoFuHiho6YCLdeLIgIYNXuHw81tf/oqkJCzt4w1D1Jp8PqIJXUZEMBC9UJN8E5YM8MHx4rorIcZNPKFz2b2smD3okpOtgDOmeJvejBz1QwWfJRkUwsirCM4+BMoanoTpimUM61IQEchs8cqcRwIPmFmQsCrpbkLA3T67BvWzvLI2ammg7WRaasEpl9NSAm6qoAM6bFlKChDgBeBVtzGdLqi3ZAUgYvM/gTSnh43wc54smVSIt6Bi3LsiEBDhBV2QCIyJkYJgaIhIK0Lg0EFPecXXMR/sDpJoh/j8rTy43gpFriuiEY+pI8zgrksIhB/+YahCsYocnY5HCo25oCEhYSAJRR07yJBVX0faOV0lv0F5TFFFfneukKTuFtn19PrczJQPJUajme9S2tZUqxVCrep7IXWpjq6lAww4wj3THm6phfmiGgCpiTdGeI9m0cT/q0U1Zztq2UFRMzplpQMBLdug1vDhOQbYzZO9iYgoa4hzrmsuVykpiHEEDK+dGckUeAQ7DueranOcV3bDKq1k7ByBEOK1IhgqHj7BeHpsU1AY3h5IiMNAsrp2MnPRYsymEVAKIwgQqVKysV4CqigIE9r5vHBe3VGU2YWO5c3vWLiqBmSkb1skYBtHW6Ytn8fWS3ZTXT3vJVzKEbuDoRXxdCBFgPAXV2T+7ckMFMIWdENWraxUqxuWVOopuzXQDGOtb5RJokLdmgCYF2PmV65opG/guWMVw/M7jqpvLxzFeivSyJ1Y51rMytGYcGr66yhgZWpaACvZhHScLUFFSArIWbo6OwuRoQmpRYlE7wQGRByZAGtsBRhvj2d1hfmc8FYmOJQ50hGkpxiX2xNMEFaRDfobx0zRBeUkv5aUIZPZwqV4G8u290bb+dOPAnq5J2OcnqS6t06QiKXmSZgaZLpNO8rvHM/vCA+aufkxKPhbpEWWjU0hOoH6U0Qf2DbZHTTI4YOzqhHy62Y3RmRMC18JLDEpE+3okMZDQ2FQIBeSd+uhQh3fIu699MnqAi6e72uSONq6vewU8lvUTZBVhgGQbSftXrnFDRPKYwVsvBhj2JQ3pKm4P6IkCTplh6ofymIfo4pvFasOcjnUtU/jr+QjmpHWuW5Eb3DTljkHDTZM066aCTf5QE0W6o939pXD1mP67UvBEvwZkPYlp2FGjfBHyppxC3Gz4rmQ/Z5MdV9KGj5sTgTKqoiOiisoqDRK9laeC6IzMV24WgRsqkIVJ51jrnMP69GQmzUGT4/iJtESGJlkG9w8TJk9WTXCvsI/BgAhJt/IsSTPl6laNeLM4WJKgSIKkfml3BIjvmR2NqmgGoSOIXycjay00SCBjX7IWhvIzAZz5ZvxsQNHkUJ+ZiCgSpqbe7FY1ZL8CZzIEbpdIFCQBV8b5r758GIQfG5++U8FQUgi/kYVKXc+vVNCQ7d4OYtkLGwqFfnV3pZqnYChEk6Ezq5V0tBILRUv66TsVqh7y/pCklzI1fULgrIVvY2G0EaJgMgQDITZBBgAHko0sSNI0pGCSoZgQnAvvJMIA4jepzyGgVYwM9bR1d4Z/MUisZBIRUytfBkVZ0WUhmSz0IQ0Xxpp1+jbulhoqYF+9bUPWyUYuwNBMIpgASchCjibacR+qefucd3m71RaBiGs3Aa15nmnxNLEs42l2S6ClMcCSpPP98Hih0v0v1P30gSNfdDm/teLekdSV1s3fuzw3+MHQvVMNzSeufTrVOvzQk633zjz1sUCMaGfOXi2rez78gW/qka6b2iYmOM+cfWDDY/SPhh+/tPXwB0/88Xzu9tmlj25dOn/s9ZaB2PzsO5fmlq68ENv01ie+HVu/9v3Pdg/3zNYvLu3v/GH6nuFo+/E/nzpNHPv3gvhdYvv0JxsX9r69/vc7XngmO4T+M3LU9/Qbf6rvLg182FznuvWTq9R06vrIy+5YbFfs4jtTw889UvfS9cWnHuICDdcjM1t6D25/r/CXt7a+fjTedHj//bui+yfFDeS6N950f/PtdQ83vtoFUvm166QN20qLrb9Wmlsvu5aef8x/OJ5ff/Lj8G+Cl8PbpPW/PTFfX2p5d0n/0r+ubP7s2R//9xvX7hs7/fNt3zoYfDD5UVv3+9+pu8VIqVtK3Y7ItXt+duE1q37jkcTe+7/813+0bjnzsPXuFxZPvV/e85paqv8Ke3QuUGZvPP3skUCsfCP3/KfPXDg+fWBx+7H3ruczv2vPnaPKM/dNh/6ufvXGeHnLxn1/GJqZyt3c5HLdvr3WtebiqX9eXONy/Q8Vr/3y
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -15,7 +15,10 @@
|
||||
* [Messages](/docs/concepts/messages)
|
||||
:::
|
||||
|
||||
Multimodal support is still relatively new and less common, model providers have not yet standardized on the "best" way to define the API. As such, LangChain's multimodal abstractions are lightweight and flexible, designed to accommodate different model providers' APIs and interaction patterns, but are **not** standardized across models.
|
||||
LangChain supports multimodal data as input to chat models:
|
||||
|
||||
1. Following provider-specific formats
|
||||
2. Adhering to a cross-provider standard (see [how-to guides](/docs/how_to/#multimodal) for detail)
|
||||
|
||||
### How to use multimodal models
|
||||
|
||||
@ -26,38 +29,85 @@ Multimodal support is still relatively new and less common, model providers have
|
||||
|
||||
#### Inputs
|
||||
|
||||
Some models can accept multimodal inputs, such as images, audio, video, or files. The types of multimodal inputs supported depend on the model provider. For instance, [Google's Gemini](/docs/integrations/chat/google_generative_ai/) supports documents like PDFs as inputs.
|
||||
Some models can accept multimodal inputs, such as images, audio, video, or files.
|
||||
The types of multimodal inputs supported depend on the model provider. For instance,
|
||||
[OpenAI](/docs/integrations/chat/openai/),
|
||||
[Anthropic](/docs/integrations/chat/anthropic/), and
|
||||
[Google Gemini](/docs/integrations/chat/google_generative_ai/)
|
||||
support documents like PDFs as inputs.
|
||||
|
||||
Most chat models that support **multimodal inputs** also accept those values in OpenAI's content blocks format. So far this is restricted to image inputs. For models like Gemini which support video and other bytes input, the APIs also support the native, model-specific representations.
|
||||
|
||||
The gist of passing multimodal inputs to a chat model is to use content blocks that specify a type and corresponding data. For example, to pass an image to a chat model:
|
||||
The gist of passing multimodal inputs to a chat model is to use content blocks that
|
||||
specify a type and corresponding data. For example, to pass an image to a chat model
|
||||
as URL:
|
||||
|
||||
```python
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{"type": "text", "text": "Describe the weather in this image:"},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
},
|
||||
],
|
||||
)
|
||||
response = model.invoke([message])
|
||||
```
|
||||
|
||||
We can also pass the image as in-line data:
|
||||
|
||||
```python
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Describe the weather in this image:"},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "image/jpeg",
|
||||
},
|
||||
],
|
||||
)
|
||||
response = model.invoke([message])
|
||||
```
|
||||
|
||||
To pass a PDF file as in-line data (or URL, as supported by providers such as
|
||||
Anthropic), just change `"type"` to `"file"` and `"mime_type"` to `"application/pdf"`.
|
||||
|
||||
See the [how-to guides](/docs/how_to/#multimodal) for more detail.
|
||||
|
||||
Most chat models that support multimodal **image** inputs also accept those values in
|
||||
OpenAI's [Chat Completions format](https://platform.openai.com/docs/guides/images?api-mode=chat):
|
||||
|
||||
```python
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Describe the weather in this image:"},
|
||||
{"type": "image_url", "image_url": {"url": image_url}},
|
||||
],
|
||||
)
|
||||
response = model.invoke([message])
|
||||
```
|
||||
|
||||
:::caution
|
||||
The exact format of the content blocks may vary depending on the model provider. Please refer to the chat model's
|
||||
integration documentation for the correct format. Find the integration in the [chat model integration table](/docs/integrations/chat/).
|
||||
:::
|
||||
Otherwise, chat models will typically accept the native, provider-specific content
|
||||
block format. See [chat model integrations](/docs/integrations/chat/) for detail
|
||||
on specific providers.
|
||||
|
||||
|
||||
#### Outputs
|
||||
|
||||
Virtually no popular chat models support multimodal outputs at the time of writing (October 2024).
|
||||
Some chat models support multimodal outputs, such as images and audio. Multimodal
|
||||
outputs will appear as part of the [AIMessage](/docs/concepts/messages/#aimessage)
|
||||
response object. See for example:
|
||||
|
||||
The only exception is OpenAI's chat model ([gpt-4o-audio-preview](/docs/integrations/chat/openai/)), which can generate audio outputs.
|
||||
|
||||
Multimodal outputs will appear as part of the [AIMessage](/docs/concepts/messages/#aimessage) response object.
|
||||
|
||||
Please see the [ChatOpenAI](/docs/integrations/chat/openai/) for more information on how to use multimodal outputs.
|
||||
- Generating [audio outputs](/docs/integrations/chat/openai/#audio-generation-preview) with OpenAI;
|
||||
- Generating [image outputs](/docs/integrations/chat/google_generative_ai/#multimodal-usage) with Google Gemini.
|
||||
|
||||
#### Tools
|
||||
|
||||
|
@ -50,6 +50,7 @@ See [supported integrations](/docs/integrations/chat/) for details on getting st
|
||||
- [How to: force a specific tool call](/docs/how_to/tool_choice)
|
||||
- [How to: work with local models](/docs/how_to/local_llms)
|
||||
- [How to: init any model in one line](/docs/how_to/chat_models_universal_init/)
|
||||
- [How to: pass multimodal data directly to models](/docs/how_to/multimodal_inputs/)
|
||||
|
||||
### Messages
|
||||
|
||||
@ -67,6 +68,7 @@ See [supported integrations](/docs/integrations/chat/) for details on getting st
|
||||
- [How to: use few shot examples in chat models](/docs/how_to/few_shot_examples_chat/)
|
||||
- [How to: partially format prompt templates](/docs/how_to/prompts_partial)
|
||||
- [How to: compose prompts together](/docs/how_to/prompts_composition)
|
||||
- [How to: use multimodal prompts](/docs/how_to/multimodal_prompts/)
|
||||
|
||||
### Example selectors
|
||||
|
||||
@ -170,7 +172,7 @@ Indexing is the process of keeping your vectorstore in-sync with the underlying
|
||||
|
||||
### Tools
|
||||
|
||||
LangChain [Tools](/docs/concepts/tools) contain a description of the tool (to pass to the language model) as well as the implementation of the function to call. Refer [here](/docs/integrations/tools/) for a list of pre-buit tools.
|
||||
LangChain [Tools](/docs/concepts/tools) contain a description of the tool (to pass to the language model) as well as the implementation of the function to call. Refer [here](/docs/integrations/tools/) for a list of pre-built tools.
|
||||
|
||||
- [How to: create tools](/docs/how_to/custom_tools)
|
||||
- [How to: use built-in tools and toolkits](/docs/how_to/tools_builtin)
|
||||
|
@ -5,120 +5,165 @@
|
||||
"id": "4facdf7f-680e-4d28-908b-2b8408e2a741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to pass multimodal data directly to models\n",
|
||||
"# How to pass multimodal data to models\n",
|
||||
"\n",
|
||||
"Here we demonstrate how to pass [multimodal](/docs/concepts/multimodality/) input directly to models. \n",
|
||||
"We currently expect all input to be passed in the same format as [OpenAI expects](https://platform.openai.com/docs/guides/vision).\n",
|
||||
"For other model providers that support multimodal input, we have added logic inside the class to convert to the expected format.\n",
|
||||
"Here we demonstrate how to pass [multimodal](/docs/concepts/multimodality/) input directly to models.\n",
|
||||
"\n",
|
||||
"In this example we will ask a [model](/docs/concepts/chat_models/#multimodality) to describe an image."
|
||||
"LangChain supports multimodal data as input to chat models:\n",
|
||||
"\n",
|
||||
"1. Following provider-specific formats\n",
|
||||
"2. Adhering to a cross-provider standard\n",
|
||||
"\n",
|
||||
"Below, we demonstrate the cross-provider standard. See [chat model integrations](/docs/integrations/chat/) for detail\n",
|
||||
"on native formats for specific providers.\n",
|
||||
"\n",
|
||||
":::note\n",
|
||||
"\n",
|
||||
"Most chat models that support multimodal **image** inputs also accept those values in\n",
|
||||
"OpenAI's [Chat Completions format](https://platform.openai.com/docs/guides/images?api-mode=chat):\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\"url\": image_url},\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e30a4ff0-ab38-41a7-858c-a93f99bb2f1b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Images\n",
|
||||
"\n",
|
||||
"Many providers will accept images passed in-line as base64 data. Some will additionally accept an image from a URL directly.\n",
|
||||
"\n",
|
||||
"### Images from base64 data\n",
|
||||
"\n",
|
||||
"To pass images in-line, format them as content blocks of the following form:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"mime_type\": \"image/jpeg\", # or image/png, etc.\n",
|
||||
" \"data\": \"<base64 data string>\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "0d9fd81a-b7f0-445a-8e3d-cfc2d31fdd59",
|
||||
"execution_count": 10,
|
||||
"id": "1fcf7b27-1cc3-420a-b920-0420b5892e20",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The image shows a beautiful clear day with bright blue skies and wispy cirrus clouds stretching across the horizon. The clouds are thin and streaky, creating elegant patterns against the blue backdrop. The lighting suggests it's during the day, possibly late afternoon given the warm, golden quality of the light on the grass. The weather appears calm with no signs of wind (the grass looks relatively still) and no indication of rain. It's the kind of perfect, mild weather that's ideal for walking along the wooden boardwalk through the marsh grass.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"image_url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\""
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"# Fetch image data\n",
|
||||
"image_url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
|
||||
"image_data = base64.b64encode(httpx.get(image_url).content).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Pass to LLM\n",
|
||||
"llm = init_chat_model(\"anthropic:claude-3-5-sonnet-latest\")\n",
|
||||
"\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the weather in this image:\",\n",
|
||||
" },\n",
|
||||
" # highlight-start\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"data\": image_data,\n",
|
||||
" \"mime_type\": \"image/jpeg\",\n",
|
||||
" },\n",
|
||||
" # highlight-end\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ee2b678a-01dd-40c1-81ff-ddac22be21b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See [LangSmith trace](https://smith.langchain.com/public/eab05a31-54e8-4fc9-911f-56805da67bef/r) for more detail.\n",
|
||||
"\n",
|
||||
"### Images from a URL\n",
|
||||
"\n",
|
||||
"Some providers (including [OpenAI](/docs/integrations/chat/openai/),\n",
|
||||
"[Anthropic](/docs/integrations/chat/anthropic/), and\n",
|
||||
"[Google Gemini](/docs/integrations/chat/google_generative_ai/)) will also accept images from URLs directly.\n",
|
||||
"\n",
|
||||
"To pass images as URLs, format them as content blocks of the following form:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"source_type\": \"url\",\n",
|
||||
" \"url\": \"https://...\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "fb896ce9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model=\"gpt-4o\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4fca4da7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The most commonly supported way to pass in images is to pass it in as a byte string.\n",
|
||||
"This should work for most model integrations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9ca1040c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"\n",
|
||||
"image_data = base64.b64encode(httpx.get(image_url).content).decode(\"utf-8\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ec680b6b",
|
||||
"id": "99d27f8f-ae78-48bc-9bf2-3cef35213ec7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The weather in the image appears to be clear and pleasant. The sky is mostly blue with scattered, light clouds, suggesting a sunny day with minimal cloud cover. There is no indication of rain or strong winds, and the overall scene looks bright and calm. The lush green grass and clear visibility further indicate good weather conditions.\n"
|
||||
"The weather in this image appears to be pleasant and clear. The sky is mostly blue with a few scattered, light clouds, and there is bright sunlight illuminating the green grass and plants. There are no signs of rain or stormy conditions, suggesting it is a calm, likely warm day—typical of spring or summer.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_data}\"},\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the weather in this image:\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" # highlight-start\n",
|
||||
" \"source_type\": \"url\",\n",
|
||||
" \"url\": image_url,\n",
|
||||
" # highlight-end\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"response = model.invoke([message])\n",
|
||||
"print(response.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8656018e-c56d-47d2-b2be-71e87827f90a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can feed the image URL directly in a content block of type \"image_url\". Note that only some model providers support this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a8819cf3-5ddc-44f0-889a-19ca7b7fe77e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The weather in the image appears to be clear and sunny. The sky is mostly blue with a few scattered clouds, suggesting good visibility and a likely pleasant temperature. The bright sunlight is casting distinct shadows on the grass and vegetation, indicating it is likely daytime, possibly late morning or early afternoon. The overall ambiance suggests a warm and inviting day, suitable for outdoor activities.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"response = model.invoke([message])\n",
|
||||
"print(response.content)"
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -126,12 +171,12 @@
|
||||
"id": "1c470309",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also pass in multiple images."
|
||||
"We can also pass in multiple images:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "325fb4ca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -139,20 +184,460 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Yes, the two images are the same. They both depict a wooden boardwalk extending through a grassy field under a blue sky with light clouds. The scenery, lighting, and composition are identical.\n"
|
||||
"Yes, these two images are the same. They depict a wooden boardwalk going through a grassy field under a blue sky with some clouds. The colors, composition, and elements in both images are identical.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"are these two images the same?\"},\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\"type\": \"text\", \"text\": \"Are these two images the same?\"},\n",
|
||||
" {\"type\": \"image\", \"source_type\": \"url\", \"url\": image_url},\n",
|
||||
" {\"type\": \"image\", \"source_type\": \"url\", \"url\": image_url},\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"response = model.invoke([message])\n",
|
||||
"print(response.content)"
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d72b83e6-8d21-448e-b5df-d5b556c3ccc8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Documents (PDF)\n",
|
||||
"\n",
|
||||
"Some providers (including [OpenAI](/docs/integrations/chat/openai/),\n",
|
||||
"[Anthropic](/docs/integrations/chat/anthropic/), and\n",
|
||||
"[Google Gemini](/docs/integrations/chat/google_generative_ai/)) will accept PDF documents.\n",
|
||||
"\n",
|
||||
"### Documents from base64 data\n",
|
||||
"\n",
|
||||
"To pass documents in-line, format them as content blocks of the following form:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"type\": \"file\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"mime_type\": \"application/pdf\",\n",
|
||||
" \"data\": \"<base64 data string>\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6c1455a9-699a-4702-a7e0-7f6eaec76a21",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This document appears to be a sample PDF file that contains Lorem ipsum placeholder text. It begins with a title \"Sample PDF\" followed by the subtitle \"This is a simple PDF file. Fun fun fun.\"\n",
|
||||
"\n",
|
||||
"The rest of the document consists of several paragraphs of Lorem ipsum text, which is a commonly used placeholder text in design and publishing. The text is formatted in a clean, readable layout with consistent paragraph spacing. The document appears to be a single page containing four main paragraphs of this placeholder text.\n",
|
||||
"\n",
|
||||
"The Lorem ipsum text, while appearing to be Latin, is actually scrambled Latin-like text that is used primarily to demonstrate the visual form of a document or typeface without the distraction of meaningful content. It's commonly used in publishing and graphic design when the actual content is not yet available but the layout needs to be demonstrated.\n",
|
||||
"\n",
|
||||
"The document has a professional, simple layout with generous margins and clear paragraph separation, making it an effective example of basic PDF formatting and structure.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"# Fetch PDF data\n",
|
||||
"pdf_url = \"https://pdfobject.com/pdf/sample.pdf\"\n",
|
||||
"pdf_data = base64.b64encode(httpx.get(pdf_url).content).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Pass to LLM\n",
|
||||
"llm = init_chat_model(\"anthropic:claude-3-5-sonnet-latest\")\n",
|
||||
"\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the document:\",\n",
|
||||
" },\n",
|
||||
" # highlight-start\n",
|
||||
" {\n",
|
||||
" \"type\": \"file\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"data\": pdf_data,\n",
|
||||
" \"mime_type\": \"application/pdf\",\n",
|
||||
" },\n",
|
||||
" # highlight-end\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "efb271da-8fdd-41b5-9f29-be6f8c76f49b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Documents from a URL\n",
|
||||
"\n",
|
||||
"Some providers (specifically [Anthropic](/docs/integrations/chat/anthropic/))\n",
|
||||
"will also accept documents from URLs directly.\n",
|
||||
"\n",
|
||||
"To pass documents as URLs, format them as content blocks of the following form:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"type\": \"file\",\n",
|
||||
" \"source_type\": \"url\",\n",
|
||||
" \"url\": \"https://...\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "55e1d937-3b22-4deb-b9f0-9e688f0609dc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This document appears to be a sample PDF file with both text and an image. It begins with a title \"Sample PDF\" followed by the text \"This is a simple PDF file. Fun fun fun.\" The rest of the document contains Lorem ipsum placeholder text arranged in several paragraphs. The content is shown both as text and as an image of the formatted PDF, with the same content displayed in a clean, formatted layout with consistent spacing and typography. The document consists of a single page containing this sample text.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the document:\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"file\",\n",
|
||||
" # highlight-start\n",
|
||||
" \"source_type\": \"url\",\n",
|
||||
" \"url\": pdf_url,\n",
|
||||
" # highlight-end\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e661c26-e537-4721-8268-42c0861cb1e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Audio\n",
|
||||
"\n",
|
||||
"Some providers (including [OpenAI](/docs/integrations/chat/openai/) and\n",
|
||||
"[Google Gemini](/docs/integrations/chat/google_generative_ai/)) will accept audio inputs.\n",
|
||||
"\n",
|
||||
"### Audio from base64 data\n",
|
||||
"\n",
|
||||
"To pass audio in-line, format them as content blocks of the following form:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" \"type\": \"audio\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"mime_type\": \"audio/wav\", # or appropriate mime-type\n",
|
||||
" \"data\": \"<base64 data string>\",\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a0b91b29-dbd6-4c94-8f24-05471adc7598",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The audio appears to consist primarily of bird sounds, specifically bird vocalizations like chirping and possibly other bird songs.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"# Fetch audio data\n",
|
||||
"audio_url = \"https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav\"\n",
|
||||
"audio_data = base64.b64encode(httpx.get(audio_url).content).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Pass to LLM\n",
|
||||
"llm = init_chat_model(\"google_genai:gemini-2.0-flash-001\")\n",
|
||||
"\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe this audio:\",\n",
|
||||
" },\n",
|
||||
" # highlight-start\n",
|
||||
" {\n",
|
||||
" \"type\": \"audio\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"data\": audio_data,\n",
|
||||
" \"mime_type\": \"audio/wav\",\n",
|
||||
" },\n",
|
||||
" # highlight-end\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92f55a6c-2e4a-4175-8444-8b9aacd6a13e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Provider-specific parameters\n",
|
||||
"\n",
|
||||
"Some providers will support or require additional fields on content blocks containing multimodal data.\n",
|
||||
"For example, Anthropic lets you specify [caching](/docs/integrations/chat/anthropic/#prompt-caching) of\n",
|
||||
"specific content to reduce token consumption.\n",
|
||||
"\n",
|
||||
"To use these fields, you can:\n",
|
||||
"\n",
|
||||
"1. Store them on directly on the content block; or\n",
|
||||
"2. Use the native format supported by each provider (see [chat model integrations](/docs/integrations/chat/) for detail).\n",
|
||||
"\n",
|
||||
"We show three examples below.\n",
|
||||
"\n",
|
||||
"### Example: Anthropic prompt caching"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "83593b9d-a8d3-4c99-9dac-64e0a9d397cb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The image shows a beautiful, clear day with partly cloudy skies. The sky is a vibrant blue with wispy, white cirrus clouds stretching across it. The lighting suggests it's during daylight hours, possibly late afternoon or early evening given the warm, golden quality of the light on the grass. The weather appears calm with no signs of wind (the grass looks relatively still) and no threatening weather conditions. It's the kind of perfect weather you'd want for a walk along this wooden boardwalk through the marshland or grassland area.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input_tokens': 1586,\n",
|
||||
" 'output_tokens': 117,\n",
|
||||
" 'total_tokens': 1703,\n",
|
||||
" 'input_token_details': {'cache_read': 0, 'cache_creation': 1582}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = init_chat_model(\"anthropic:claude-3-5-sonnet-latest\")\n",
|
||||
"\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the weather in this image:\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"source_type\": \"url\",\n",
|
||||
" \"url\": image_url,\n",
|
||||
" # highlight-next-line\n",
|
||||
" \"cache_control\": {\"type\": \"ephemeral\"},\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())\n",
|
||||
"response.usage_metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9bbf578e-794a-4dc0-a469-78c876ccd4a3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Clear blue skies, wispy clouds.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input_tokens': 1716,\n",
|
||||
" 'output_tokens': 12,\n",
|
||||
" 'total_tokens': 1728,\n",
|
||||
" 'input_token_details': {'cache_read': 1582, 'cache_creation': 0}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"next_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Summarize that in 5 words.\",\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message, response, next_message])\n",
|
||||
"print(response.text())\n",
|
||||
"response.usage_metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "915b9443-5964-43b8-bb08-691c1ba59065",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Anthropic citations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ea7707a1-5660-40a1-a10f-0df48a028689",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'citations': [{'cited_text': 'Sample PDF\\r\\nThis is a simple PDF file. Fun fun fun.\\r\\n',\n",
|
||||
" 'document_index': 0,\n",
|
||||
" 'document_title': None,\n",
|
||||
" 'end_page_number': 2,\n",
|
||||
" 'start_page_number': 1,\n",
|
||||
" 'type': 'page_location'}],\n",
|
||||
" 'text': 'Simple PDF file: fun fun',\n",
|
||||
" 'type': 'text'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Generate a 5 word summary of this document.\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"file\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"data\": pdf_data,\n",
|
||||
" \"mime_type\": \"application/pdf\",\n",
|
||||
" # highlight-next-line\n",
|
||||
" \"citations\": {\"enabled\": True},\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"response.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e26991eb-e769-41f4-b6e0-63d81f2c7d67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: OpenAI file names\n",
|
||||
"\n",
|
||||
"OpenAI requires that PDF documents be associated with file names:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ae076c9b-ff8f-461d-9349-250f396c9a25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The document is a sample PDF file containing placeholder text. It consists of one page, titled \"Sample PDF\". The content is a mixture of English and the commonly used filler text \"Lorem ipsum dolor sit amet...\" and its extensions, which are often used in publishing and web design as generic text to demonstrate font, layout, and other visual elements.\n",
|
||||
"\n",
|
||||
"**Key points about the document:**\n",
|
||||
"- Length: 1 page\n",
|
||||
"- Purpose: Demonstrative/sample content\n",
|
||||
"- Content: No substantive or meaningful information, just demonstration text in paragraph form\n",
|
||||
"- Language: English (with the Latin-like \"Lorem Ipsum\" text used for layout purposes)\n",
|
||||
"\n",
|
||||
"There are no charts, tables, diagrams, or images on the page—only plain text. The document serves as an example of what a PDF file looks like rather than providing actual, useful content.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = init_chat_model(\"openai:gpt-4.1\")\n",
|
||||
"\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the document:\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"file\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"data\": pdf_data,\n",
|
||||
" \"mime_type\": \"application/pdf\",\n",
|
||||
" # highlight-next-line\n",
|
||||
" \"filename\": \"my-file\",\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -167,16 +652,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "cd22ea82-2f93-46f9-9f7a-6aaf479fcaa9",
|
||||
"execution_count": 4,
|
||||
"id": "0f68cce7-350b-4cde-bc40-d3a169551fc3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'name': 'weather_tool', 'args': {'weather': 'sunny'}, 'id': 'call_BSX4oq4SKnLlp2WlzDhToHBr'}]\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'weather_tool',\n",
|
||||
" 'args': {'weather': 'sunny'},\n",
|
||||
" 'id': 'toolu_01G6JgdkhwggKcQKfhXZQPjf',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -191,16 +682,17 @@
|
||||
" pass\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"model_with_tools = model.bind_tools([weather_tool])\n",
|
||||
"llm_with_tools = llm.bind_tools([weather_tool])\n",
|
||||
"\n",
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\"type\": \"text\", \"text\": \"Describe the weather in this image:\"},\n",
|
||||
" {\"type\": \"image\", \"source_type\": \"url\", \"url\": image_url},\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"response = model_with_tools.invoke([message])\n",
|
||||
"print(response.tool_calls)"
|
||||
"}\n",
|
||||
"response = llm_with_tools.invoke([message])\n",
|
||||
"response.tool_calls"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -220,7 +712,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -9,157 +9,148 @@
|
||||
"\n",
|
||||
"Here we demonstrate how to use prompt templates to format [multimodal](/docs/concepts/multimodality/) inputs to models. \n",
|
||||
"\n",
|
||||
"In this example we will ask a [model](/docs/concepts/chat_models/#multimodality) to describe an image."
|
||||
"To use prompt templates in the context of multimodal data, we can templatize elements of the corresponding content block.\n",
|
||||
"For example, below we define a prompt that takes a URL for an image as a parameter:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "0d9fd81a-b7f0-445a-8e3d-cfc2d31fdd59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"\n",
|
||||
"image_url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
|
||||
"image_data = base64.b64encode(httpx.get(image_url).content).decode(\"utf-8\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 1,
|
||||
"id": "2671f995",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model=\"gpt-4o\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "4ee35e4f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
"# Define prompt\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"Describe the image provided\"),\n",
|
||||
" (\n",
|
||||
" \"user\",\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": \"Describe the image provided.\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\"url\": \"data:image/jpeg;base64,{image_data}\"},\n",
|
||||
" }\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"source_type\": \"url\",\n",
|
||||
" # highlight-next-line\n",
|
||||
" \"url\": \"{image_url}\",\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
" ),\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "089f75c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = prompt | model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "02744b06",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The image depicts a sunny day with a beautiful blue sky filled with scattered white clouds. The sky has varying shades of blue, ranging from a deeper hue near the horizon to a lighter, almost pale blue higher up. The white clouds are fluffy and scattered across the expanse of the sky, creating a peaceful and serene atmosphere. The lighting and cloud patterns suggest pleasant weather conditions, likely during the daytime hours on a mild, sunny day in an outdoor natural setting.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = chain.invoke({\"image_data\": image_data})\n",
|
||||
"print(response.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e9b9ebf6",
|
||||
"id": "f75d2e26-5b9a-4d5f-94a7-7f98f5666f6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also pass in multiple images."
|
||||
"Let's use this prompt to pass an image to a [chat model](/docs/concepts/chat_models/#multimodality):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "02190ee3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"compare the two pictures provided\"),\n",
|
||||
" (\n",
|
||||
" \"user\",\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\"url\": \"data:image/jpeg;base64,{image_data1}\"},\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\"url\": \"data:image/jpeg;base64,{image_data2}\"},\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "42af057b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = prompt | model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "513abe00",
|
||||
"execution_count": 2,
|
||||
"id": "5df2e558-321d-4cf7-994e-2815ac37e704",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The two images provided are identical. Both images feature a wooden boardwalk path extending through a lush green field under a bright blue sky with some clouds. The perspective, colors, and elements in both images are exactly the same.\n"
|
||||
"This image shows a beautiful wooden boardwalk cutting through a lush green wetland or marsh area. The boardwalk extends straight ahead toward the horizon, creating a strong leading line through the composition. On either side, tall green grasses sway in what appears to be a summer or late spring setting. The sky is particularly striking, with wispy cirrus clouds streaking across a vibrant blue background. In the distance, you can see a tree line bordering the wetland area. The lighting suggests this may be during \"golden hour\" - either early morning or late afternoon - as there's a warm, gentle quality to the light that's illuminating the scene. The wooden planks of the boardwalk appear well-maintained and provide safe passage through what would otherwise be difficult terrain to traverse. It's the kind of scene you might find in a nature preserve or wildlife refuge designed to give visitors access to observe wetland ecosystems while protecting the natural environment.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = chain.invoke({\"image_data1\": image_data, \"image_data2\": image_data})\n",
|
||||
"print(response.content)"
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(\"anthropic:claude-3-5-sonnet-latest\")\n",
|
||||
"\n",
|
||||
"url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"response = chain.invoke({\"image_url\": url})\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f4cfdc50-4a9f-4888-93b4-af697366b0f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that we can templatize arbitrary elements of the content block:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "53c88ebb-dd57-40c8-8542-b2c916706653",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": \"Describe the image provided.\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"source_type\": \"base64\",\n",
|
||||
" \"mime_type\": \"{image_mime_type}\",\n",
|
||||
" \"data\": \"{image_data}\",\n",
|
||||
" \"cache_control\": {\"type\": \"{cache_type}\"},\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "25e4829e-0073-49a8-9669-9f43e5778383",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This image shows a beautiful wooden boardwalk cutting through a lush green marsh or wetland area. The boardwalk extends straight ahead toward the horizon, creating a strong leading line in the composition. The surrounding vegetation consists of tall grass and reeds in vibrant green hues, with some bushes and trees visible in the background. The sky is particularly striking, featuring a bright blue color with wispy white clouds streaked across it. The lighting suggests this photo was taken during the \"golden hour\" - either early morning or late afternoon - giving the scene a warm, peaceful quality. The raised wooden path provides accessible access through what would otherwise be difficult terrain to traverse, allowing visitors to experience and appreciate this natural environment.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"import httpx\n",
|
||||
"\n",
|
||||
"image_data = base64.b64encode(httpx.get(url).content).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"response = chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"image_data\": image_data,\n",
|
||||
" \"image_mime_type\": \"image/jpeg\",\n",
|
||||
" \"cache_type\": \"ephemeral\",\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"print(response.text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ea8152c3",
|
||||
"id": "424defe8-d85c-4e45-a88d-bf6f910d5ebb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@ -181,7 +172,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
118
docs/docs/integrations/caches/singlestore_semantic_cache.ipynb
Normal file
118
docs/docs/integrations/caches/singlestore_semantic_cache.ipynb
Normal file
@ -0,0 +1,118 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreSemanticCache\n",
|
||||
"\n",
|
||||
"This example demonstrates how to get started with the SingleStore semantic cache.\n",
|
||||
"\n",
|
||||
"### Integration Overview\n",
|
||||
"\n",
|
||||
"`SingleStoreSemanticCache` leverages `SingleStoreVectorStore` to cache LLM responses directly in a SingleStore database, enabling efficient semantic retrieval and reuse of results.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | JS support |\n",
|
||||
"| :--- | :--- | :---: |\n",
|
||||
"| SingleStoreSemanticCache | langchain_singlestore | ❌ | "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"This cache lives in the `langchain-singlestore` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-singlestore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c5f2839-4020-424e-9fc9-07777eede442",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51a60dbe-9f2e-4e04-bb62-23968f17164a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.globals import set_llm_cache\n",
|
||||
"from langchain_singlestore import SingleStoreSemanticCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(\n",
|
||||
" SingleStoreSemanticCache(\n",
|
||||
" embedding=YourEmbeddings(),\n",
|
||||
" host=\"root:pass@localhost:3306/db\",\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cddda8ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm.invoke(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c474168f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The second time, while not a direct hit, the question is semantically similar to the original question,\n",
|
||||
"# so it uses the cached result!\n",
|
||||
"llm.invoke(\"Tell me one joke\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain-singlestore-BD1RbQ07-py3.11",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,35 +1,26 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"cell_type": "markdown",
|
||||
"id": "d982c99f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Google AI\n",
|
||||
"sidebar_label: Google Gemini\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"id": "56a6d990",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"This docs will help you get started with Google AI [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatGoogleGenerativeAI features and configurations head to the [API reference](https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html).\n",
|
||||
"Access Google's Generative AI models, including the Gemini family, directly via the Gemini API or experiment rapidly using Google AI Studio. The `langchain-google-genai` package provides the LangChain integration for these models. This is often the best starting point for individual developers.\n",
|
||||
"\n",
|
||||
"Google AI offers a number of different chat models. For information on the latest models, their features, context windows, etc. head to the [Google AI docs](https://ai.google.dev/gemini-api/docs/models/gemini).\n",
|
||||
"For information on the latest models, their features, context windows, etc. head to the [Google AI docs](https://ai.google.dev/gemini-api/docs/models/gemini). All examples use the `gemini-2.0-flash` model. Gemini 2.5 Pro and 2.5 Flash can be used via `gemini-2.5-pro-preview-03-25` and `gemini-2.5-flash-preview-04-17`. All model ids can be found in the [Gemini API docs](https://ai.google.dev/gemini-api/docs/models).\n",
|
||||
"\n",
|
||||
":::info Google AI vs Google Cloud Vertex AI\n",
|
||||
"\n",
|
||||
"Google's Gemini models are accessible through Google AI and through Google Cloud Vertex AI. Using Google AI just requires a Google account and an API key. Using Google Cloud Vertex AI requires a Google Cloud account (with term agreements and billing) but offers enterprise features like customer encryption key, virtual private cloud, and more.\n",
|
||||
"\n",
|
||||
"To learn more about the key features of the two APIs see the [Google docs](https://cloud.google.com/vertex-ai/generative-ai/docs/migrate/migrate-google-ai#google-ai).\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/google_generativeai) | Package downloads | Package latest |\n",
|
||||
@ -37,23 +28,46 @@
|
||||
"| [ChatGoogleGenerativeAI](https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html) | [langchain-google-genai](https://python.langchain.com/api_reference/google_genai/index.html) | ❌ | beta | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"### Setup\n",
|
||||
"\n",
|
||||
"To access Google AI models you'll need to create a Google Acount account, get a Google AI API key, and install the `langchain-google-genai` integration package.\n",
|
||||
"To access Google AI models you'll need to create a Google Account, get a Google AI API key, and install the `langchain-google-genai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Once you've done this set the GOOGLE_API_KEY environment variable:"
|
||||
"**1. Installation:**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
|
||||
"id": "8d12ce35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60be0b38",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**2. Credentials:**\n",
|
||||
"\n",
|
||||
"Head to [https://ai.google.dev/gemini-api/docs/api-key](https://ai.google.dev/gemini-api/docs/api-key) (or via Google AI Studio) to generate a Google AI API key.\n",
|
||||
"\n",
|
||||
"### Chat Models\n",
|
||||
"\n",
|
||||
"Use the `ChatGoogleGenerativeAI` class to interact with Google's chat models. See the [API reference](https://python.langchain.com/api_reference/google_genai/chat_models/langchain_google_genai.chat_models.ChatGoogleGenerativeAI.html) for full details.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "fb18c875",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -66,7 +80,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
|
||||
"id": "f050e8db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
@ -75,7 +89,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
|
||||
"id": "82cb346f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -85,27 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Google AI integration lives in the `langchain-google-genai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"id": "273cefa0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
@ -115,15 +109,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"execution_count": 4,
|
||||
"id": "7d3dc0b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"llm = ChatGoogleGenerativeAI(\n",
|
||||
" model=\"gemini-2.0-flash-001\",\n",
|
||||
" model=\"gemini-2.0-flash\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
@ -134,7 +128,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b4f3e15",
|
||||
"id": "343a8c13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
@ -142,19 +136,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": 5,
|
||||
"id": "82c5708c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-001', 'safety_ratings': []}, id='run-61cff164-40be-4f88-a2df-cca58297502f-0', usage_metadata={'input_tokens': 20, 'output_tokens': 7, 'total_tokens': 27, 'input_token_details': {'cache_read': 0}})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-3b28d4b8-8a62-4e6c-ad4e-b53e6e825749-0', usage_metadata={'input_tokens': 20, 'output_tokens': 7, 'total_tokens': 27, 'input_token_details': {'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -173,8 +165,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"execution_count": 6,
|
||||
"id": "49d2d0c2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -191,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
|
||||
"id": "ee3f6e1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
@ -201,17 +193,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"execution_count": 7,
|
||||
"id": "3c8407ee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-001', 'safety_ratings': []}, id='run-dd2f8fb9-62d9-4b84-9c97-ed9c34cda313-0', usage_metadata={'input_tokens': 15, 'output_tokens': 7, 'total_tokens': 22, 'input_token_details': {'cache_read': 0}})"
|
||||
"AIMessage(content='Ich liebe Programmieren.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-e5561c6b-2beb-4411-9210-4796b576a7cd-0', usage_metadata={'input_tokens': 15, 'output_tokens': 7, 'total_tokens': 22, 'input_token_details': {'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -241,22 +233,164 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "41c2ff10-a3ba-4f40-b3aa-7a395854849e",
|
||||
"id": "bdae9742",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Image generation\n",
|
||||
"## Multimodal Usage\n",
|
||||
"\n",
|
||||
"Some Gemini models (specifically `gemini-2.0-flash-exp`) support image generation capabilities.\n",
|
||||
"Gemini models can accept multimodal inputs (text, images, audio, video) and, for some models, generate multimodal outputs.\n",
|
||||
"\n",
|
||||
"### Text to image\n",
|
||||
"### Image Input\n",
|
||||
"\n",
|
||||
"See a simple usage example below:"
|
||||
"Provide image inputs along with text using a `HumanMessage` with a list content format. The `gemini-2.0-flash` model can handle images."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7589e14d-8d1b-4c82-965f-5558d80cb677",
|
||||
"execution_count": null,
|
||||
"id": "6833fe5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"# Example using a public URL (remains the same)\n",
|
||||
"message_url = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"Describe the image at the URL.\",\n",
|
||||
" },\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": \"https://picsum.photos/seed/picsum/200/300\"},\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"result_url = llm.invoke([message_url])\n",
|
||||
"print(f\"Response for URL image: {result_url.content}\")\n",
|
||||
"\n",
|
||||
"# Example using a local image file encoded in base64\n",
|
||||
"image_file_path = \"/Users/philschmid/projects/google-gemini/langchain/docs/static/img/agents_vs_chains.png\"\n",
|
||||
"\n",
|
||||
"with open(image_file_path, \"rb\") as image_file:\n",
|
||||
" encoded_image = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"message_local = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"Describe the local image.\"},\n",
|
||||
" {\"type\": \"image_url\", \"image_url\": f\"data:image/png;base64,{encoded_image}\"},\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"result_local = llm.invoke([message_local])\n",
|
||||
"print(f\"Response for local image: {result_local.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b422382",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other supported `image_url` formats:\n",
|
||||
"- A Google Cloud Storage URI (`gs://...`). Ensure the service account has access.\n",
|
||||
"- A PIL Image object (the library handles encoding).\n",
|
||||
"\n",
|
||||
"### Audio Input\n",
|
||||
"\n",
|
||||
"Provide audio file inputs along with text. Use a model like `gemini-2.0-flash`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a3461836",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"# Ensure you have an audio file named 'example_audio.mp3' or provide the correct path.\n",
|
||||
"audio_file_path = \"example_audio.mp3\"\n",
|
||||
"audio_mime_type = \"audio/mpeg\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open(audio_file_path, \"rb\") as audio_file:\n",
|
||||
" encoded_audio = base64.b64encode(audio_file.read()).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"Transcribe the audio.\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"media\",\n",
|
||||
" \"data\": encoded_audio, # Use base64 string directly\n",
|
||||
" \"mime_type\": audio_mime_type,\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm.invoke([message]) # Uncomment to run\n",
|
||||
"print(f\"Response for audio: {response.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d898e27",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Video Input\n",
|
||||
"\n",
|
||||
"Provide video file inputs along with text. Use a model like `gemini-2.0-flash`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3046e74b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"# Ensure you have a video file named 'example_video.mp4' or provide the correct path.\n",
|
||||
"video_file_path = \"example_video.mp4\"\n",
|
||||
"video_mime_type = \"video/mp4\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open(video_file_path, \"rb\") as video_file:\n",
|
||||
" encoded_video = base64.b64encode(video_file.read()).decode(\"utf-8\")\n",
|
||||
"\n",
|
||||
"message = HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"Describe the first few frames of the video.\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"media\",\n",
|
||||
" \"data\": encoded_video, # Use base64 string directly\n",
|
||||
" \"mime_type\": video_mime_type,\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"response = llm.invoke([message]) # Uncomment to run\n",
|
||||
"print(f\"Response for video: {response.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2df11d89",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Image Generation (Multimodal Output)\n",
|
||||
"\n",
|
||||
"The `gemini-2.0-flash` model can generate text and images inline (image generation is experimental). You need to specify the desired `response_modalities`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0b7180f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -266,17 +400,12 @@
|
||||
"<IPython.core.display.Image object>"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"image/png": {
|
||||
"width": 300
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"from io import BytesIO\n",
|
||||
"\n",
|
||||
"from IPython.display import Image, display\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
@ -301,7 +430,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b14c0d87-cf7e-4d88-bda1-2ab40ec0350a",
|
||||
"id": "14bf00f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Image and text to image\n",
|
||||
@ -311,8 +440,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0f4ed7a5-980c-4b54-b743-0b988909744c",
|
||||
"execution_count": null,
|
||||
"id": "d65e195c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -322,11 +451,7 @@
|
||||
"<IPython.core.display.Image object>"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"image/png": {
|
||||
"width": 300
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
@ -349,7 +474,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a62669d8-becd-495f-8f4a-82d7c5d87969",
|
||||
"id": "43b54d3f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also represent an input image and query in a single message by encoding the base64 data in the [data URI scheme](https://en.wikipedia.org/wiki/Data_URI_scheme):"
|
||||
@ -357,8 +482,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6241da43-e210-43bc-89af-b3c480ea06e9",
|
||||
"execution_count": null,
|
||||
"id": "0dfc7e1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -368,11 +493,7 @@
|
||||
"<IPython.core.display.Image object>"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"image/png": {
|
||||
"width": 300
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
@ -403,7 +524,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cfe228d3-6773-4283-9788-87bdf6912b1c",
|
||||
"id": "789818d7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also use LangGraph to manage the conversation history for you as in [this tutorial](/docs/tutorials/chatbot/)."
|
||||
@ -411,7 +532,313 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd",
|
||||
"id": "b037e2dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Calling\n",
|
||||
"\n",
|
||||
"You can equip the model with tools to call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0d759f9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'name': 'get_weather', 'args': {'location': 'San Francisco'}, 'id': 'a6248087-74c5-4b7c-9250-f335e642927c', 'type': 'tool_call'}]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"OK. It's sunny in San Francisco.\", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run-ac5bb52c-e244-4c72-9fbc-fb2a9cd7a72e-0', usage_metadata={'input_tokens': 29, 'output_tokens': 11, 'total_tokens': 40, 'input_token_details': {'cache_read': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.tools import tool\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define the tool\n",
|
||||
"@tool(description=\"Get the current weather in a given location\")\n",
|
||||
"def get_weather(location: str) -> str:\n",
|
||||
" return \"It's sunny.\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Initialize the model and bind the tool\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\")\n",
|
||||
"llm_with_tools = llm.bind_tools([get_weather])\n",
|
||||
"\n",
|
||||
"# Invoke the model with a query that should trigger the tool\n",
|
||||
"query = \"What's the weather in San Francisco?\"\n",
|
||||
"ai_msg = llm_with_tools.invoke(query)\n",
|
||||
"\n",
|
||||
"# Check the tool calls in the response\n",
|
||||
"print(ai_msg.tool_calls)\n",
|
||||
"\n",
|
||||
"# Example tool call message would be needed here if you were actually running the tool\n",
|
||||
"from langchain_core.messages import ToolMessage\n",
|
||||
"\n",
|
||||
"tool_message = ToolMessage(\n",
|
||||
" content=get_weather(*ai_msg.tool_calls[0][\"args\"]),\n",
|
||||
" tool_call_id=ai_msg.tool_calls[0][\"id\"],\n",
|
||||
")\n",
|
||||
"llm_with_tools.invoke([ai_msg, tool_message]) # Example of passing tool result back"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91d42b86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Structured Output\n",
|
||||
"\n",
|
||||
"Force the model to respond with a specific structure using Pydantic models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7457dbe4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"name='Abraham Lincoln' height_m=1.93\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define the desired structure\n",
|
||||
"class Person(BaseModel):\n",
|
||||
" \"\"\"Information about a person.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str = Field(..., description=\"The person's name\")\n",
|
||||
" height_m: float = Field(..., description=\"The person's height in meters\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Initialize the model\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\", temperature=0)\n",
|
||||
"structured_llm = llm.with_structured_output(Person)\n",
|
||||
"\n",
|
||||
"# Invoke the model with a query asking for structured information\n",
|
||||
"result = structured_llm.invoke(\n",
|
||||
" \"Who was the 16th president of the USA, and how tall was he in meters?\"\n",
|
||||
")\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90d4725e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"## Token Usage Tracking\n",
|
||||
"\n",
|
||||
"Access token usage information from the response metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "edcc003e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prompt engineering is the art and science of crafting effective text prompts to elicit desired and accurate responses from large language models.\n",
|
||||
"\n",
|
||||
"Usage Metadata:\n",
|
||||
"{'input_tokens': 10, 'output_tokens': 24, 'total_tokens': 34, 'input_token_details': {'cache_read': 0}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\")\n",
|
||||
"\n",
|
||||
"result = llm.invoke(\"Explain the concept of prompt engineering in one sentence.\")\n",
|
||||
"\n",
|
||||
"print(result.content)\n",
|
||||
"print(\"\\nUsage Metadata:\")\n",
|
||||
"print(result.usage_metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28950dbc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Built-in tools\n",
|
||||
"\n",
|
||||
"Google Gemini supports a variety of built-in tools ([google search](https://ai.google.dev/gemini-api/docs/grounding/search-suggestions), [code execution](https://ai.google.dev/gemini-api/docs/code-execution?lang=python)), which can be bound to the model in the usual way."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd074816",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The next total solar eclipse visible in the United States will occur on August 23, 2044. However, the path of totality will only pass through Montana, North Dakota, and South Dakota.\n",
|
||||
"\n",
|
||||
"For a total solar eclipse that crosses a significant portion of the continental U.S., you'll have to wait until August 12, 2045. This eclipse will start in California and end in Florida.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from google.ai.generativelanguage_v1beta.types import Tool as GenAITool\n",
|
||||
"\n",
|
||||
"resp = llm.invoke(\n",
|
||||
" \"When is the next total solar eclipse in US?\",\n",
|
||||
" tools=[GenAITool(google_search={})],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(resp.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "6964be2d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Executable code: print(2*2)\n",
|
||||
"\n",
|
||||
"Code execution result: 4\n",
|
||||
"\n",
|
||||
"2*2 is 4.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/philschmid/projects/google-gemini/langchain/.venv/lib/python3.9/site-packages/langchain_google_genai/chat_models.py:580: UserWarning: \n",
|
||||
" ⚠️ Warning: Output may vary each run. \n",
|
||||
" - 'executable_code': Always present. \n",
|
||||
" - 'execution_result' & 'image_url': May be absent for some queries. \n",
|
||||
"\n",
|
||||
" Validate before using in production.\n",
|
||||
"\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from google.ai.generativelanguage_v1beta.types import Tool as GenAITool\n",
|
||||
"\n",
|
||||
"resp = llm.invoke(\n",
|
||||
" \"What is 2*2, use python\",\n",
|
||||
" tools=[GenAITool(code_execution={})],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for c in resp.content:\n",
|
||||
" if isinstance(c, dict):\n",
|
||||
" if c[\"type\"] == \"code_execution_result\":\n",
|
||||
" print(f\"Code execution result: {c['code_execution_result']}\")\n",
|
||||
" elif c[\"type\"] == \"executable_code\":\n",
|
||||
" print(f\"Executable code: {c['executable_code']}\")\n",
|
||||
" else:\n",
|
||||
" print(c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a27e6ff4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Native Async\n",
|
||||
"\n",
|
||||
"Use asynchronous methods for non-blocking calls."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "c6803e57",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Async Invoke Result: The sky is blue due to a phenomenon called **Rayle...\n",
|
||||
"\n",
|
||||
"Async Stream Result:\n",
|
||||
"The thread is free, it does not wait,\n",
|
||||
"For answers slow, or tasks of fate.\n",
|
||||
"A promise made, a future bright,\n",
|
||||
"It moves ahead, with all its might.\n",
|
||||
"\n",
|
||||
"A callback waits, a signal sent,\n",
|
||||
"When data's read, or job is spent.\n",
|
||||
"Non-blocking code, a graceful dance,\n",
|
||||
"Responsive apps, a fleeting glance.\n",
|
||||
"\n",
|
||||
"Async Batch Results: ['1 + 1 = 2', '2 + 2 = 4']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_genai import ChatGoogleGenerativeAI\n",
|
||||
"\n",
|
||||
"llm = ChatGoogleGenerativeAI(model=\"gemini-2.0-flash\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def run_async_calls():\n",
|
||||
" # Async invoke\n",
|
||||
" result_ainvoke = await llm.ainvoke(\"Why is the sky blue?\")\n",
|
||||
" print(\"Async Invoke Result:\", result_ainvoke.content[:50] + \"...\")\n",
|
||||
"\n",
|
||||
" # Async stream\n",
|
||||
" print(\"\\nAsync Stream Result:\")\n",
|
||||
" async for chunk in llm.astream(\n",
|
||||
" \"Write a short poem about asynchronous programming.\"\n",
|
||||
" ):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)\n",
|
||||
" print(\"\\n\")\n",
|
||||
"\n",
|
||||
" # Async batch\n",
|
||||
" results_abatch = await llm.abatch([\"What is 1+1?\", \"What is 2+2?\"])\n",
|
||||
" print(\"Async Batch Results:\", [res.content for res in results_abatch])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await run_async_calls()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "99204b32",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Safety Settings\n",
|
||||
@ -421,8 +848,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "238b2f96-e573-4fac-bbf2-7e52ad926833",
|
||||
"execution_count": null,
|
||||
"id": "d4c14039",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -442,7 +869,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5805d40c-deb8-4924-8e72-a294a0482fc9",
|
||||
"id": "dea38fb1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For an enumeration of the categories and thresholds available, see Google's [safety setting types](https://ai.google.dev/api/python/google/generativeai/types/SafetySettingDict)."
|
||||
@ -450,7 +877,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
"id": "d6d0e853",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
@ -461,7 +888,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -475,7 +902,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -17,21 +17,21 @@
|
||||
"source": [
|
||||
"# ChatClovaX\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with Naver’s HyperCLOVA X [chat models](https://python.langchain.com/docs/concepts/chat_models) via CLOVA Studio. For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html).\n",
|
||||
"This notebook provides a quick overview for getting started with Naver’s HyperCLOVA X [chat models](https://python.langchain.com/docs/concepts/chat_models) via CLOVA Studio. For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://guide.ncloud-docs.com/docs/clovastudio-dev-langchain).\n",
|
||||
"\n",
|
||||
"[CLOVA Studio](http://clovastudio.ncloud.com/) has several chat models. You can find information about latest models and their costs, context windows, and supported input types in the CLOVA Studio API Guide [documentation](https://api.ncloud-docs.com/docs/clovastudio-chatcompletions).\n",
|
||||
"[CLOVA Studio](http://clovastudio.ncloud.com/) has several chat models. You can find information about latest models and their costs, context windows, and supported input types in the CLOVA Studio Guide [documentation](https://guide.ncloud-docs.com/docs/clovastudio-model).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- |:-----:| :---: |:------------------------------------------------------------------------:| :---: | :---: |\n",
|
||||
"| [ChatClovaX](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ |  |  |\n",
|
||||
"| [ChatClovaX](https://guide.ncloud-docs.com/docs/clovastudio-dev-langchain#HyperCLOVAX%EB%AA%A8%EB%8D%B8%EC%9D%B4%EC%9A%A9) | [langchain-naver](https://pypi.org/project/langchain-naver/) | ❌ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"|:------------------------------------------:| :---: | :---: | :---: | :---: | :---: |:-----------------------------------------------------:| :---: |:------------------------------------------------------:|:----------------------------------:|\n",
|
||||
"|❌| ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ |\n",
|
||||
"|✅| ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
@ -39,26 +39,23 @@
|
||||
"\n",
|
||||
"1. Creating [NAVER Cloud Platform](https://www.ncloud.com/) account\n",
|
||||
"2. Apply to use [CLOVA Studio](https://www.ncloud.com/product/aiService/clovaStudio)\n",
|
||||
"3. Create a CLOVA Studio Test App or Service App of a model to use (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#테스트앱생성).)\n",
|
||||
"3. Create a CLOVA Studio Test App or Service App of a model to use (See [here](https://guide.ncloud-docs.com/docs/clovastudio-playground-testapp).)\n",
|
||||
"4. Issue a Test or Service API key (See [here](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4).)\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Set the `NCP_CLOVASTUDIO_API_KEY` environment variable with your API key.\n",
|
||||
" - Note that if you are using a legacy API Key (that doesn't start with `nv-*` prefix), you might need to get an additional API Key by clicking `App Request Status` > `Service App, Test App List` > `‘Details’ button for each app` in [CLOVA Studio](https://clovastudio.ncloud.com/studio-application/service-app) and set it as `NCP_APIGW_API_KEY`.\n",
|
||||
"Set the `CLOVASTUDIO_API_KEY` environment variable with your API key.\n",
|
||||
"\n",
|
||||
"You can add them to your environment variables as below:\n",
|
||||
"\n",
|
||||
"``` bash\n",
|
||||
"export NCP_CLOVASTUDIO_API_KEY=\"your-api-key-here\"\n",
|
||||
"# Uncomment below to use a legacy API key\n",
|
||||
"# export NCP_APIGW_API_KEY=\"your-api-key-here\"\n",
|
||||
"export CLOVASTUDIO_API_KEY=\"your-api-key-here\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "2def81b5-b023-4f40-a97b-b2c5ca59d6a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -66,22 +63,19 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NCP_CLOVASTUDIO_API_KEY\"):\n",
|
||||
" os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your NCP CLOVA Studio API Key: \"\n",
|
||||
" )\n",
|
||||
"# Uncomment below to use a legacy API key\n",
|
||||
"# if not os.getenv(\"NCP_APIGW_API_KEY\"):\n",
|
||||
"# os.environ[\"NCP_APIGW_API_KEY\"] = getpass.getpass(\n",
|
||||
"# \"Enter your NCP API Gateway API key: \"\n",
|
||||
"# )"
|
||||
"if not os.getenv(\"CLOVASTUDIO_API_KEY\"):\n",
|
||||
" os.environ[\"CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your CLOVA Studio API Key: \"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c695442",
|
||||
"metadata": {},
|
||||
"source": "To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@ -101,7 +95,7 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Naver integration lives in the `langchain-community` package:"
|
||||
"The LangChain Naver integration lives in the `langchain-naver` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -112,7 +106,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install package\n",
|
||||
"!pip install -qU langchain-community"
|
||||
"%pip install -qU langchain-naver"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -127,21 +121,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatClovaX\n",
|
||||
"from langchain_naver import ChatClovaX\n",
|
||||
"\n",
|
||||
"chat = ChatClovaX(\n",
|
||||
" model=\"HCX-003\",\n",
|
||||
" max_tokens=100,\n",
|
||||
" model=\"HCX-005\",\n",
|
||||
" temperature=0.5,\n",
|
||||
" # clovastudio_api_key=\"...\" # set if you prefer to pass api key directly instead of using environment variables\n",
|
||||
" # task_id=\"...\" # set if you want to use fine-tuned model\n",
|
||||
" # service_app=False # set True if using Service App. Default value is False (means using Test App)\n",
|
||||
" # include_ai_filters=False # set True if you want to detect inappropriate content. Default value is False\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
" max_retries=2,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
@ -153,12 +145,12 @@
|
||||
"source": [
|
||||
"## Invocation\n",
|
||||
"\n",
|
||||
"In addition to invoke, we also support batch and stream functionalities."
|
||||
"In addition to invoke, `ChatClovaX` also support batch and stream functionalities."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -167,10 +159,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='저는 네이버 AI를 사용하는 것이 좋아요.', additional_kwargs={}, response_metadata={'stop_reason': 'stop_before', 'input_length': 25, 'output_length': 14, 'seed': 1112164354, 'ai_filter': None}, id='run-b57bc356-1148-4007-837d-cc409dbd57cc-0', usage_metadata={'input_tokens': 25, 'output_tokens': 14, 'total_tokens': 39})"
|
||||
"AIMessage(content='네이버 인공지능을 사용하는 것을 정말 좋아합니다.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 28, 'total_tokens': 39, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'HCX-005', 'system_fingerprint': None, 'id': 'b70c26671cd247a0864115bacfb5fc12', 'finish_reason': 'stop', 'logprobs': None}, id='run-3faf6a8d-d5da-49ad-9fbb-7b56ed23b484-0', usage_metadata={'input_tokens': 28, 'output_tokens': 11, 'total_tokens': 39, 'input_token_details': {}, 'output_token_details': {}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -190,7 +182,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "24e7377f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -198,7 +190,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"저는 네이버 AI를 사용하는 것이 좋아요.\n"
|
||||
"네이버 인공지능을 사용하는 것을 정말 좋아합니다.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -218,17 +210,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='저는 네이버 AI를 사용하는 것이 좋아요.', additional_kwargs={}, response_metadata={'stop_reason': 'stop_before', 'input_length': 25, 'output_length': 14, 'seed': 2575184681, 'ai_filter': None}, id='run-7014b330-eba3-4701-bb62-df73ce39b854-0', usage_metadata={'input_tokens': 25, 'output_tokens': 14, 'total_tokens': 39})"
|
||||
"AIMessage(content='저는 네이버 인공지능을 사용하는 것을 좋아합니다.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 28, 'total_tokens': 38, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'HCX-005', 'system_fingerprint': None, 'id': 'b7a826d17fcf4fee8386fca2ebc63284', 'finish_reason': 'stop', 'logprobs': None}, id='run-35957816-3325-4d9c-9441-e40704912be6-0', usage_metadata={'input_tokens': 28, 'output_tokens': 10, 'total_tokens': 38, 'input_token_details': {}, 'output_token_details': {}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -266,7 +258,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "2c07af21-dda5-4514-b4de-1f214c2cebcd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -274,7 +266,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Certainly! In Korean, \"Hi\" is pronounced as \"안녕\" (annyeong). The first syllable, \"안,\" sounds like the \"ahh\" sound in \"apple,\" while the second syllable, \"녕,\" sounds like the \"yuh\" sound in \"you.\" So when you put them together, it's like saying \"ahhyuh-nyuhng.\" Remember to pronounce each syllable clearly and separately for accurate pronunciation."
|
||||
"In Korean, the informal way of saying 'hi' is \"안녕\" (annyeong). If you're addressing someone older or showing more respect, you would use \"안녕하세요\" (annjeonghaseyo). Both phrases are used as greetings similar to 'hello'. Remember, pronunciation is key so make sure to pronounce each syllable clearly: 안-녀-엉 (an-nyeo-eong) and 안-녕-하-세-요 (an-nyeong-ha-se-yo)."
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -298,115 +290,37 @@
|
||||
"\n",
|
||||
"### Using fine-tuned models\n",
|
||||
"\n",
|
||||
"You can call fine-tuned models by passing in your corresponding `task_id` parameter. (You don’t need to specify the `model_name` parameter when calling fine-tuned model.)\n",
|
||||
"You can call fine-tuned models by passing the `task_id` to the `model` parameter as: `ft:{task_id}`.\n",
|
||||
"\n",
|
||||
"You can check `task_id` from corresponding Test App or Service App details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "cb436788",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='저는 네이버 AI를 사용하는 것이 너무 좋아요.', additional_kwargs={}, response_metadata={'stop_reason': 'stop_before', 'input_length': 25, 'output_length': 15, 'seed': 52559061, 'ai_filter': None}, id='run-5bea8d4a-48f3-4c34-ae70-66e60dca5344-0', usage_metadata={'input_tokens': 25, 'output_tokens': 15, 'total_tokens': 40})"
|
||||
"AIMessage(content='네이버 인공지능을 사용하는 것을 정말 좋아합니다.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 28, 'total_tokens': 39, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'HCX-005', 'system_fingerprint': None, 'id': '2222d6d411a948c883aac1e03ca6cebe', 'finish_reason': 'stop', 'logprobs': None}, id='run-9696d7e2-7afa-4bb4-9c03-b95fcf678ab8-0', usage_metadata={'input_tokens': 28, 'output_tokens': 11, 'total_tokens': 39, 'input_token_details': {}, 'output_token_details': {}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fine_tuned_model = ChatClovaX(\n",
|
||||
" task_id=\"5s8egt3a\", # set if you want to use fine-tuned model\n",
|
||||
" model=\"ft:a1b2c3d4\", # set as `ft:{task_id}` with your fine-tuned model's task id\n",
|
||||
" # other params...\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fine_tuned_model.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f428deaf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Service App\n",
|
||||
"\n",
|
||||
"When going live with production-level application using CLOVA Studio, you should apply for and use Service App. (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#서비스앱신청).)\n",
|
||||
"\n",
|
||||
"For a Service App, you should use a corresponding Service API key and can only be called with it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dcf566df",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Update environment variables\n",
|
||||
"\n",
|
||||
"os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter NCP CLOVA Studio Service API Key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "cebe27ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatClovaX(\n",
|
||||
" service_app=True, # True if you want to use your service app, default value is False.\n",
|
||||
" # clovastudio_api_key=\"...\" # if you prefer to pass api key in directly instead of using env vars\n",
|
||||
" model=\"HCX-003\",\n",
|
||||
" # other params...\n",
|
||||
")\n",
|
||||
"ai_msg = chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d73e7140",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### AI Filter\n",
|
||||
"\n",
|
||||
"AI Filter detects inappropriate output such as profanity from the test app (or service app included) created in Playground and informs the user. See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#AIFilter) for details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "32bfbc93",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatClovaX(\n",
|
||||
" model=\"HCX-003\",\n",
|
||||
" include_ai_filters=True, # True if you want to enable ai filter\n",
|
||||
" # other params...\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ai_msg = chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bd9e179",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(ai_msg.response_metadata[\"ai_filter\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
@ -414,13 +328,13 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatNaver features and configurations head to the API reference: https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.naver.ChatClovaX.html"
|
||||
"For detailed documentation of all ChatClovaX features and configurations head to the [API reference](https://guide.ncloud-docs.com/docs/clovastudio-dev-langchain)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -434,7 +348,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
File diff suppressed because one or more lines are too long
@ -1413,6 +1413,23 @@
|
||||
"second_output_message = llm.invoke(history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90c18d18-b25c-4509-a639-bd652b92f518",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Flex processing\n",
|
||||
"\n",
|
||||
"OpenAI offers a variety of [service tiers](https://platform.openai.com/docs/guides/flex-processing). The \"flex\" tier offers cheaper pricing for requests, with the trade-off that responses may take longer and resources might not always be available. This approach is best suited for non-critical tasks, including model testing, data enhancement, or jobs that can be run asynchronously.\n",
|
||||
"\n",
|
||||
"To use it, initialize the model with `service_tier=\"flex\"`:\n",
|
||||
"```python\n",
|
||||
"llm = ChatOpenAI(model=\"o4-mini\", service_tier=\"flex\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Note that this is a beta feature that is only available for a subset of models. See OpenAI [docs](https://platform.openai.com/docs/guides/flex-processing) for more detail."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a796d728-971b-408b-88d5-440015bbb941",
|
||||
@ -1420,7 +1437,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatOpenAI features and configurations head to the API reference: https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html"
|
||||
"For detailed documentation of all ChatOpenAI features and configurations head to the [API reference](https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -57,8 +57,8 @@
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-11-08T19:44:51.390231Z",
|
||||
"start_time": "2024-11-08T19:44:51.387945Z"
|
||||
"end_time": "2025-04-21T18:23:30.746350Z",
|
||||
"start_time": "2025-04-21T18:23:30.744744Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@ -70,7 +70,7 @@
|
||||
],
|
||||
"id": "fa57fba89276da13",
|
||||
"outputs": [],
|
||||
"execution_count": 1
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
@ -82,12 +82,25 @@
|
||||
"id": "87dc1742af7b053"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T18:23:33.359278Z",
|
||||
"start_time": "2025-04-21T18:23:32.853207Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "%pip install -qU langchain-predictionguard",
|
||||
"id": "b816ae8553cba021",
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -103,13 +116,13 @@
|
||||
"metadata": {
|
||||
"id": "2xe8JEUwA7_y",
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-11-08T19:44:53.950653Z",
|
||||
"start_time": "2024-11-08T19:44:53.488694Z"
|
||||
"end_time": "2025-04-21T18:23:39.812675Z",
|
||||
"start_time": "2025-04-21T18:23:39.666881Z"
|
||||
}
|
||||
},
|
||||
"source": "from langchain_predictionguard import ChatPredictionGuard",
|
||||
"outputs": [],
|
||||
"execution_count": 2
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@ -117,8 +130,8 @@
|
||||
"metadata": {
|
||||
"id": "Ua7Mw1N4HcER",
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-11-08T19:44:54.890695Z",
|
||||
"start_time": "2024-11-08T19:44:54.502846Z"
|
||||
"end_time": "2025-04-21T18:23:41.590296Z",
|
||||
"start_time": "2025-04-21T18:23:41.253237Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@ -126,7 +139,7 @@
|
||||
"chat = ChatPredictionGuard(model=\"Hermes-3-Llama-3.1-8B\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 3
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
@ -221,6 +234,132 @@
|
||||
],
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Tool Calling\n",
|
||||
"\n",
|
||||
"Prediction Guard has a tool calling API that lets you describe tools and their arguments, which enables the model return a JSON object with a tool to call and the inputs to that tool. Tool-calling is very useful for building tool-using chains and agents, and for getting structured outputs from models more generally.\n"
|
||||
],
|
||||
"id": "1227780d6e6728ba"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### ChatPredictionGuard.bind_tools()\n",
|
||||
"\n",
|
||||
"Using `ChatPredictionGuard.bind_tools()`, you can pass in Pydantic classes, dict schemas, and Langchain tools as tools to the model, which are then reformatted to allow for use by the model."
|
||||
],
|
||||
"id": "23446aa52e01d1ba"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetWeather(BaseModel):\n",
|
||||
" \"\"\"Get the current weather in a given location\"\"\"\n",
|
||||
"\n",
|
||||
" location: str = Field(..., description=\"The city and state, e.g. San Francisco, CA\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetPopulation(BaseModel):\n",
|
||||
" \"\"\"Get the current population in a given location\"\"\"\n",
|
||||
"\n",
|
||||
" location: str = Field(..., description=\"The city and state, e.g. San Francisco, CA\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm_with_tools = chat.bind_tools(\n",
|
||||
" [GetWeather, GetPopulation]\n",
|
||||
" # strict = True # enforce tool args schema is respected\n",
|
||||
")"
|
||||
],
|
||||
"id": "135efb0bfc5916c1"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T18:42:41.834079Z",
|
||||
"start_time": "2025-04-21T18:42:40.289095Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"ai_msg = llm_with_tools.invoke(\n",
|
||||
" \"Which city is hotter today and which is bigger: LA or NY?\"\n",
|
||||
")\n",
|
||||
"ai_msg"
|
||||
],
|
||||
"id": "8136f19a8836cd58",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-b1204a3c70b44cd8802579df48df0c8c', 'type': 'function', 'index': 0, 'function': {'name': 'GetWeather', 'arguments': '{\"location\": \"Los Angeles, CA\"}'}}, {'id': 'chatcmpl-tool-e299116c05bf4ce498cd6042928ae080', 'type': 'function', 'index': 0, 'function': {'name': 'GetWeather', 'arguments': '{\"location\": \"New York, NY\"}'}}, {'id': 'chatcmpl-tool-19502a60f30348669ffbac00ff503388', 'type': 'function', 'index': 0, 'function': {'name': 'GetPopulation', 'arguments': '{\"location\": \"Los Angeles, CA\"}'}}, {'id': 'chatcmpl-tool-4b8d56ef067f447795d9146a56e43510', 'type': 'function', 'index': 0, 'function': {'name': 'GetPopulation', 'arguments': '{\"location\": \"New York, NY\"}'}}]}, response_metadata={}, id='run-4630cfa9-4e95-42dd-8e4a-45db78180a10-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'Los Angeles, CA'}, 'id': 'chatcmpl-tool-b1204a3c70b44cd8802579df48df0c8c', 'type': 'tool_call'}, {'name': 'GetWeather', 'args': {'location': 'New York, NY'}, 'id': 'chatcmpl-tool-e299116c05bf4ce498cd6042928ae080', 'type': 'tool_call'}, {'name': 'GetPopulation', 'args': {'location': 'Los Angeles, CA'}, 'id': 'chatcmpl-tool-19502a60f30348669ffbac00ff503388', 'type': 'tool_call'}, {'name': 'GetPopulation', 'args': {'location': 'New York, NY'}, 'id': 'chatcmpl-tool-4b8d56ef067f447795d9146a56e43510', 'type': 'tool_call'}])"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 7
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### AIMessage.tool_calls\n",
|
||||
"\n",
|
||||
"Notice that the AIMessage has a tool_calls attribute. This contains in a standardized ToolCall format that is model-provider agnostic."
|
||||
],
|
||||
"id": "84f405c45a35abe5"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-04-21T18:43:00.429453Z",
|
||||
"start_time": "2025-04-21T18:43:00.426399Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "ai_msg.tool_calls",
|
||||
"id": "bdcee85475019719",
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'GetWeather',\n",
|
||||
" 'args': {'location': 'Los Angeles, CA'},\n",
|
||||
" 'id': 'chatcmpl-tool-b1204a3c70b44cd8802579df48df0c8c',\n",
|
||||
" 'type': 'tool_call'},\n",
|
||||
" {'name': 'GetWeather',\n",
|
||||
" 'args': {'location': 'New York, NY'},\n",
|
||||
" 'id': 'chatcmpl-tool-e299116c05bf4ce498cd6042928ae080',\n",
|
||||
" 'type': 'tool_call'},\n",
|
||||
" {'name': 'GetPopulation',\n",
|
||||
" 'args': {'location': 'Los Angeles, CA'},\n",
|
||||
" 'id': 'chatcmpl-tool-19502a60f30348669ffbac00ff503388',\n",
|
||||
" 'type': 'tool_call'},\n",
|
||||
" {'name': 'GetPopulation',\n",
|
||||
" 'args': {'location': 'New York, NY'},\n",
|
||||
" 'id': 'chatcmpl-tool-4b8d56ef067f447795d9146a56e43510',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 8
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff1b51a8",
|
||||
|
@ -34,33 +34,46 @@
|
||||
"id": "juAmbgoWD17u"
|
||||
},
|
||||
"source": [
|
||||
"The AstraDB Document Loader returns a list of Langchain Documents from an AstraDB database.\n",
|
||||
"The Astra DB Document Loader returns a list of Langchain `Document` objects read from an Astra DB collection.\n",
|
||||
"\n",
|
||||
"The Loader takes the following parameters:\n",
|
||||
"The loader takes the following parameters:\n",
|
||||
"\n",
|
||||
"* `api_endpoint`: AstraDB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"* `token`: AstraDB token. Looks like `AstraCS:6gBhNmsk135....`\n",
|
||||
"* `api_endpoint`: Astra DB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"* `token`: Astra DB token. Looks like `AstraCS:aBcD0123...`\n",
|
||||
"* `collection_name` : AstraDB collection name\n",
|
||||
"* `namespace`: (Optional) AstraDB namespace\n",
|
||||
"* `namespace`: (Optional) AstraDB namespace (called _keyspace_ in Astra DB)\n",
|
||||
"* `filter_criteria`: (Optional) Filter used in the find query\n",
|
||||
"* `projection`: (Optional) Projection used in the find query\n",
|
||||
"* `find_options`: (Optional) Options used in the find query\n",
|
||||
"* `nb_prefetched`: (Optional) Number of documents pre-fetched by the loader\n",
|
||||
"* `limit`: (Optional) Maximum number of documents to retrieve\n",
|
||||
"* `extraction_function`: (Optional) A function to convert the AstraDB document to the LangChain `page_content` string. Defaults to `json.dumps`\n",
|
||||
"\n",
|
||||
"The following metadata is set to the LangChain Documents metadata output:\n",
|
||||
"The loader sets the following metadata for the documents it reads:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"{\n",
|
||||
" metadata : {\n",
|
||||
" \"namespace\": \"...\", \n",
|
||||
" \"api_endpoint\": \"...\", \n",
|
||||
" \"collection\": \"...\"\n",
|
||||
" }\n",
|
||||
"metadata={\n",
|
||||
" \"namespace\": \"...\", \n",
|
||||
" \"api_endpoint\": \"...\", \n",
|
||||
" \"collection\": \"...\"\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"langchain-astradb>=0.6,<0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@ -71,24 +84,43 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import AstraDBLoader"
|
||||
"from langchain_astradb import AstraDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[**API Reference:** `AstraDBLoader`](https://python.langchain.com/api_reference/astradb/document_loaders/langchain_astradb.document_loaders.AstraDBLoader.html#langchain_astradb.document_loaders.AstraDBLoader)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:41:22.643335Z",
|
||||
"start_time": "2024-01-08T12:40:57.759116Z"
|
||||
},
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
@ -98,7 +130,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:42:25.395162Z",
|
||||
@ -112,19 +144,22 @@
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" collection_name=\"movie_reviews\",\n",
|
||||
" projection={\"title\": 1, \"reviewtext\": 1},\n",
|
||||
" find_options={\"limit\": 10},\n",
|
||||
" limit=10,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:42:30.236489Z",
|
||||
"start_time": "2024-01-08T12:42:29.612133Z"
|
||||
},
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -133,7 +168,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-08T12:42:31.369394Z",
|
||||
@ -144,10 +179,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='{\"_id\": \"659bdffa16cbc4586b11a423\", \"title\": \"Dangerous Men\", \"reviewtext\": \"\\\\\"Dangerous Men,\\\\\" the picture\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\"}', metadata={'namespace': 'default_keyspace', 'api_endpoint': 'https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com', 'collection': 'movie_reviews'})"
|
||||
"Document(metadata={'namespace': 'default_keyspace', 'api_endpoint': 'https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com', 'collection': 'movie_reviews'}, page_content='{\"_id\": \"659bdffa16cbc4586b11a423\", \"title\": \"Dangerous Men\", \"reviewtext\": \"\\\\\"Dangerous Men,\\\\\" the picture\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\"}')"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -179,7 +214,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -49,7 +49,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import BrowserbaseLoader"
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders import BrowserbaseLoader\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"BROWSERBASE_API_KEY = os.getenv(\"BROWSERBASE_API_KEY\")\n",
|
||||
"BROWSERBASE_PROJECT_ID = os.getenv(\"BROWSERBASE_PROJECT_ID\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -59,6 +66,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = BrowserbaseLoader(\n",
|
||||
" api_key=BROWSERBASE_API_KEY,\n",
|
||||
" project_id=BROWSERBASE_PROJECT_ID,\n",
|
||||
" urls=[\n",
|
||||
" \"https://example.com\",\n",
|
||||
" ],\n",
|
||||
@ -78,52 +87,11 @@
|
||||
"\n",
|
||||
"- `urls` Required. A list of URLs to fetch.\n",
|
||||
"- `text_content` Retrieve only text content. Default is `False`.\n",
|
||||
"- `api_key` Optional. Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.\n",
|
||||
"- `project_id` Optional. Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.\n",
|
||||
"- `api_key` Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.\n",
|
||||
"- `project_id` Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.\n",
|
||||
"- `session_id` Optional. Provide an existing Session ID.\n",
|
||||
"- `proxy` Optional. Enable/Disable Proxies."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading images\n",
|
||||
"\n",
|
||||
"You can also load screenshots of webpages (as bytes) for multi-modal models.\n",
|
||||
"\n",
|
||||
"Full example using GPT-4V:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from browserbase import Browserbase\n",
|
||||
"from browserbase.helpers.gpt4 import GPT4VImage, GPT4VImageDetail\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=256)\n",
|
||||
"browser = Browserbase()\n",
|
||||
"\n",
|
||||
"screenshot = browser.screenshot(\"https://browserbase.com\")\n",
|
||||
"\n",
|
||||
"result = chat.invoke(\n",
|
||||
" [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=[\n",
|
||||
" {\"type\": \"text\", \"text\": \"What color is the logo?\"},\n",
|
||||
" GPT4VImage(screenshot, GPT4VImageDetail.auto),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(result.content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -36,10 +36,7 @@
|
||||
"pip install oracledb"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -51,10 +48,7 @@
|
||||
"from settings import s"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -97,16 +91,14 @@
|
||||
"doc_2 = doc_loader_2.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"With TLS authentication, wallet_location and wallet_password are not required."
|
||||
"With TLS authentication, wallet_location and wallet_password are not required.\n",
|
||||
"Bind variable option is provided by argument \"parameters\"."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
@ -117,6 +109,8 @@
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SQL_QUERY = \"select channel_id, channel_desc from sh.channels where channel_desc = :1 fetch first 5 rows only\"\n",
|
||||
"\n",
|
||||
"doc_loader_3 = OracleAutonomousDatabaseLoader(\n",
|
||||
" query=SQL_QUERY,\n",
|
||||
" user=s.USERNAME,\n",
|
||||
@ -124,6 +118,7 @@
|
||||
" schema=s.SCHEMA,\n",
|
||||
" config_dir=s.CONFIG_DIR,\n",
|
||||
" tns_name=s.TNS_NAME,\n",
|
||||
" parameters=[\"Direct Sales\"],\n",
|
||||
")\n",
|
||||
"doc_3 = doc_loader_3.load()\n",
|
||||
"\n",
|
||||
@ -133,6 +128,7 @@
|
||||
" password=s.PASSWORD,\n",
|
||||
" schema=s.SCHEMA,\n",
|
||||
" connection_string=s.CONNECTION_STRING,\n",
|
||||
" parameters=[\"Direct Sales\"],\n",
|
||||
")\n",
|
||||
"doc_4 = doc_loader_4.load()"
|
||||
],
|
||||
|
187
docs/docs/integrations/document_loaders/singlestore.ipynb
Normal file
187
docs/docs/integrations/document_loaders/singlestore.ipynb
Normal file
@ -0,0 +1,187 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: SingleStore\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreLoader\n",
|
||||
"\n",
|
||||
"The `SingleStoreLoader` allows you to load documents directly from a SingleStore database table. It is part of the `langchain-singlestore` integration package.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration Details\n",
|
||||
"\n",
|
||||
"| Class | Package | JS Support |\n",
|
||||
"| :--- | :--- | :---: |\n",
|
||||
"| `SingleStoreLoader` | `langchain_singlestore` | ❌ |\n",
|
||||
"\n",
|
||||
"### Features\n",
|
||||
"- Load documents lazily to handle large datasets efficiently.\n",
|
||||
"- Supports native asynchronous operations.\n",
|
||||
"- Easily configurable to work with different database schemas.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use the `SingleStoreLoader`, you need to install the `langchain-singlestore` package. Follow the installation instructions below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_singlestore**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_singlestore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"To initialize `SingleStoreLoader`, you need to provide connection parameters for the SingleStore database and specify the table and fields to load documents from.\n",
|
||||
"\n",
|
||||
"### Required Parameters:\n",
|
||||
"- **host** (`str`): Hostname, IP address, or URL for the database.\n",
|
||||
"- **table_name** (`str`): Name of the table to query. Defaults to `embeddings`.\n",
|
||||
"- **content_field** (`str`): Field containing document content. Defaults to `content`.\n",
|
||||
"- **metadata_field** (`str`): Field containing document metadata. Defaults to `metadata`.\n",
|
||||
"\n",
|
||||
"### Optional Parameters:\n",
|
||||
"- **id_field** (`str`): Field containing document IDs. Defaults to `id`.\n",
|
||||
"\n",
|
||||
"### Connection Pool Parameters:\n",
|
||||
"- **pool_size** (`int`): Number of active connections in the pool. Defaults to `5`.\n",
|
||||
"- **max_overflow** (`int`): Maximum connections beyond `pool_size`. Defaults to `10`.\n",
|
||||
"- **timeout** (`float`): Connection timeout in seconds. Defaults to `30`.\n",
|
||||
"\n",
|
||||
"### Additional Options:\n",
|
||||
"- **pure_python** (`bool`): Enables pure Python mode.\n",
|
||||
"- **local_infile** (`bool`): Allows local file uploads.\n",
|
||||
"- **charset** (`str`): Character set for string values.\n",
|
||||
"- **ssl_key**, **ssl_cert**, **ssl_ca** (`str`): Paths to SSL files.\n",
|
||||
"- **ssl_disabled** (`bool`): Disables SSL.\n",
|
||||
"- **ssl_verify_cert** (`bool`): Verifies server's certificate.\n",
|
||||
"- **ssl_verify_identity** (`bool`): Verifies server's identity.\n",
|
||||
"- **autocommit** (`bool`): Enables autocommits.\n",
|
||||
"- **results_type** (`str`): Structure of query results (e.g., `tuples`, `dicts`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_singlestore.document_loaders import SingleStoreLoader\n",
|
||||
"\n",
|
||||
"loader = SingleStoreLoader(\n",
|
||||
" host=\"127.0.0.1:3306/db\",\n",
|
||||
" table_name=\"documents\",\n",
|
||||
" content_field=\"content\",\n",
|
||||
" metadata_field=\"metadata\",\n",
|
||||
" id_field=\"id\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SingleStore Document Loader features and configurations head to the github page: [https://github.com/singlestore-labs/langchain-singlestore/](https://github.com/singlestore-labs/langchain-singlestore/)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -2525,7 +2525,17 @@
|
||||
"source": [
|
||||
"## `SingleStoreDB` semantic cache\n",
|
||||
"\n",
|
||||
"You can use [SingleStoreDB](https://python.langchain.com/docs/integrations/vectorstores/singlestoredb/) as a semantic cache to cache prompts and responses."
|
||||
"You can use [SingleStore](https://python.langchain.com/docs/integrations/vectorstores/singlestore/) as a semantic cache to cache prompts and responses."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "596e15e8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-singlestore"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -2535,11 +2545,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.cache import SingleStoreDBSemanticCache\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_singlestore.cache import SingleStoreSemanticCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(\n",
|
||||
" SingleStoreDBSemanticCache(\n",
|
||||
" SingleStoreSemanticCache(\n",
|
||||
" embedding=OpenAIEmbeddings(),\n",
|
||||
" host=\"root:pass@localhost:3306/db\",\n",
|
||||
" )\n",
|
||||
@ -3102,8 +3112,8 @@
|
||||
"|------------|---------|\n",
|
||||
"| langchain_astradb.cache | [AstraDBCache](https://python.langchain.com/api_reference/astradb/cache/langchain_astradb.cache.AstraDBCache.html) |\n",
|
||||
"| langchain_astradb.cache | [AstraDBSemanticCache](https://python.langchain.com/api_reference/astradb/cache/langchain_astradb.cache.AstraDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [AstraDBCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBCache.html) |\n",
|
||||
"| langchain_community.cache | [AstraDBSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [AstraDBCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBCache.html) (deprecated since `langchain-community==0.0.28`) |\n",
|
||||
"| langchain_community.cache | [AstraDBSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AstraDBSemanticCache.html) (deprecated since `langchain-community==0.0.28`) |\n",
|
||||
"| langchain_community.cache | [AzureCosmosDBSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.AzureCosmosDBSemanticCache.html) |\n",
|
||||
"| langchain_community.cache | [CassandraCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.CassandraCache.html) |\n",
|
||||
"| langchain_community.cache | [CassandraSemanticCache](https://python.langchain.com/api_reference/community/cache/langchain_community.cache.CassandraSemanticCache.html) |\n",
|
||||
|
@ -90,7 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "d285fd7f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -99,7 +99,7 @@
|
||||
"\n",
|
||||
"# Initialize a Fireworks model\n",
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" model=\"accounts/fireworks/models/llama-v3p1-8b-instruct\",\n",
|
||||
" base_url=\"https://api.fireworks.ai/inference/v1/completions\",\n",
|
||||
")"
|
||||
]
|
||||
@ -176,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "b801c20d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -192,7 +192,7 @@
|
||||
"source": [
|
||||
"# Setting additional parameters: temperature, max_tokens, top_p\n",
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" model=\"accounts/fireworks/models/llama-v3p1-8b-instruct\",\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=15,\n",
|
||||
" top_p=1.0,\n",
|
||||
@ -218,7 +218,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "fd2c6bc1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -235,7 +235,7 @@
|
||||
"from langchain_fireworks import Fireworks\n",
|
||||
"\n",
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" model=\"accounts/fireworks/models/llama-v3p1-8b-instruct\",\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=15,\n",
|
||||
" top_p=1.0,\n",
|
||||
|
@ -17,22 +17,22 @@
|
||||
"id": "f507f58b-bf22-4a48-8daf-68d869bcd1ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To run this notebook you need a running Astra DB. Get the connection secrets on your Astra dashboard:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`;\n",
|
||||
"- the Token looks like `AstraCS:6gBhNmsk135...`."
|
||||
"- the Database Token looks like `AstraCS:aBcD0123...`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d7092199",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet \"astrapy>=0.7.1 langchain-community\" "
|
||||
"!pip install \"langchain-astradb>=0.6,<0.7\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -45,12 +45,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "163d97f0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
@ -65,14 +65,6 @@
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55860b2d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Depending on whether local or cloud-based Astra DB, create the corresponding database connection \"Session\" object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36c163e8",
|
||||
@ -83,12 +75,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_message_histories import AstraDBChatMessageHistory\n",
|
||||
"from langchain_astradb import AstraDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"message_history = AstraDBChatMessageHistory(\n",
|
||||
" session_id=\"test-session\",\n",
|
||||
@ -98,22 +90,31 @@
|
||||
"\n",
|
||||
"message_history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
"message_history.add_ai_message(\"whats up?\")"
|
||||
"message_history.add_ai_message(\"hello, how are you?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "53acb4a8-d536-4a58-9fee-7d70033d9c81",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[**API Reference:** `AstraDBChatMessageHistory`](https://python.langchain.com/api_reference/astradb/chat_message_histories/langchain_astradb.chat_message_histories.AstraDBChatMessageHistory.html#langchain_astradb.chat_message_histories.AstraDBChatMessageHistory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!'), AIMessage(content='whats up?')]"
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}, response_metadata={}),\n",
|
||||
" AIMessage(content='hello, how are you?', additional_kwargs={}, response_metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -139,7 +140,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,19 +10,23 @@ Please refer to [NCP User Guide](https://guide.ncloud-docs.com/docs/clovastudio-
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get a CLOVA Studio API Key by [issuing it](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4) and set it as an environment variable (`NCP_CLOVASTUDIO_API_KEY`).
|
||||
- If you are using a legacy API Key (that doesn't start with `nv-*` prefix), you might need to get an additional API Key by [creating your app](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#create-test-app) and set it as `NCP_APIGW_API_KEY`.
|
||||
- Get a CLOVA Studio API Key by [issuing it](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4) and set it as an environment variable (`CLOVASTUDIO_API_KEY`).
|
||||
|
||||
|
||||
Naver integrations live in two packages:
|
||||
|
||||
- `langchain-naver-community`: a dedicated integration package for Naver. It is a community-maintained package and is not officially maintained by Naver or LangChain.
|
||||
- `langchain-community`: a collection of [third-party integrations](https://python.langchain.com/docs/concepts/architecture/#langchain-community),
|
||||
including Naver. **New features should be implemented in the dedicated `langchain-naver-community` package**.
|
||||
- `langchain-naver`: a dedicated integration package for Naver.
|
||||
- `langchain-naver-community`: a community-maintained package and is not officially maintained by Naver or LangChain.
|
||||
|
||||
```bash
|
||||
pip install -U langchain-community langchain-naver-community
|
||||
pip install -U langchain-naver
|
||||
# pip install -U langchain-naver-community // Install to use Naver Search tool.
|
||||
```
|
||||
|
||||
> **(Note)** Naver integration via `langchain-community`, a collection of [third-party integrations](https://python.langchain.com/docs/concepts/architecture/#langchain-community), is outdated.
|
||||
> - **Use `langchain-naver` instead as new features should only be implemented via this package**.
|
||||
> - If you are using `langchain-community` (outdated) and got a legacy API Key (that doesn't start with `nv-*` prefix), you should set it as `NCP_CLOVASTUDIO_API_KEY`, and might need to get an additional API Gateway API Key by [creating your app](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#create-test-app) and set it as `NCP_APIGW_API_KEY`.
|
||||
|
||||
## Chat models
|
||||
|
||||
### ChatClovaX
|
||||
@ -30,7 +34,7 @@ pip install -U langchain-community langchain-naver-community
|
||||
See a [usage example](/docs/integrations/chat/naver).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatClovaX
|
||||
from langchain_naver import ChatClovaX
|
||||
```
|
||||
|
||||
## Embedding models
|
||||
@ -40,7 +44,7 @@ from langchain_community.chat_models import ChatClovaX
|
||||
See a [usage example](/docs/integrations/text_embedding/naver).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import ClovaXEmbeddings
|
||||
from langchain_naver import ClovaXEmbeddings
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
@ -7,10 +7,10 @@
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install the `hdbcli` python package.
|
||||
We need to install the `langchain-hana` python package.
|
||||
|
||||
```bash
|
||||
pip install hdbcli
|
||||
pip install langchain-hana
|
||||
```
|
||||
|
||||
## Vectorstore
|
||||
@ -21,5 +21,5 @@ pip install hdbcli
|
||||
See a [usage example](/docs/integrations/vectorstores/sap_hanavector).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores.hanavector import HanaDB
|
||||
from langchain_hana import HanaDB
|
||||
```
|
||||
|
62
docs/docs/integrations/providers/singlestore.ipynb
Normal file
62
docs/docs/integrations/providers/singlestore.ipynb
Normal file
@ -0,0 +1,62 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStore Integration\n",
|
||||
"\n",
|
||||
"[SingleStore](https://singlestore.com/) is a high-performance, distributed SQL database designed to excel in both [cloud](https://www.singlestore.com/cloud/) and on-premises environments. It offers a versatile feature set, seamless deployment options, and exceptional performance.\n",
|
||||
"\n",
|
||||
"This integration provides the following components to leverage SingleStore's capabilities:\n",
|
||||
"\n",
|
||||
"- **`SingleStoreLoader`**: Load documents directly from a SingleStore database table.\n",
|
||||
"- **`SingleStoreSemanticCache`**: Use SingleStore as a semantic cache for efficient storage and retrieval of embeddings.\n",
|
||||
"- **`SingleStoreChatMessageHistory`**: Store and retrieve chat message history in SingleStore.\n",
|
||||
"- **`SingleStoreVectorStore`**: Store document embeddings and perform fast vector and full-text searches.\n",
|
||||
"\n",
|
||||
"These components enable efficient document storage, embedding management, and advanced search capabilities, combining full-text and vector-based search for fast and accurate queries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_singlestore import (\n",
|
||||
" SingleStoreChatMessageHistory,\n",
|
||||
" SingleStoreLoader,\n",
|
||||
" SingleStoreSemanticCache,\n",
|
||||
" SingleStoreVectorStore,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
# SingleStoreDB
|
||||
|
||||
>[SingleStoreDB](https://singlestore.com/) is a high-performance distributed SQL database that supports deployment both in the [cloud](https://www.singlestore.com/cloud/) and on-premises. It provides vector storage, and vector functions including [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html) and [euclidean_distance](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/euclidean_distance.html), thereby supporting AI applications that require text similarity matching.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
There are several ways to establish a [connection](https://singlestoredb-python.labs.singlestore.com/generated/singlestoredb.connect.html) to the database. You can either set up environment variables or pass named parameters to the `SingleStoreDB constructor`.
|
||||
Alternatively, you may provide these parameters to the `from_documents` and `from_texts` methods.
|
||||
|
||||
```bash
|
||||
pip install singlestoredb
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/singlestoredb).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
```
|
||||
|
||||
## Memory
|
||||
|
||||
See a [usage example](/docs/integrations/memory/singlestoredb_chat_message_history).
|
||||
|
||||
```python
|
||||
from langchain.memory import SingleStoreDBChatMessageHistory
|
||||
```
|
103
docs/docs/integrations/providers/valyu.ipynb
Normal file
103
docs/docs/integrations/providers/valyu.ipynb
Normal file
@ -0,0 +1,103 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ValyuContext\n",
|
||||
"\n",
|
||||
">[Valyu](https://www.valyu.network/) allows AI applications and agents to search the internet and proprietary data sources for relevant LLM ready information.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Valyu in LangChain.\n",
|
||||
"\n",
|
||||
"First, get an Valyu API key and add it as an environment variable. Get $10 free credit by [signing up here](https://exchange.valyu.network/).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-valyu` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-valyu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to use the package, you will also need to set the `VALYU_API_KEY` environment variable to your Valyu API key.\n",
|
||||
"\n",
|
||||
"## Context Retriever\n",
|
||||
"\n",
|
||||
"You can use the [`ValyuContextRetriever`](https://pypi.org/project/langchain-valyu/) in a standard retrieval pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_valyu import ValyuContextRetriever\n",
|
||||
"\n",
|
||||
"valyu_api_key = \"YOUR API KEY\"\n",
|
||||
"\n",
|
||||
"# Create a new instance of the ValyuContextRetriever\n",
|
||||
"valyu_retriever = ValyuContextRetriever(valyu_api_key=valyu_api_key)\n",
|
||||
"\n",
|
||||
"# Search for a query and save the results\n",
|
||||
"docs = valyu_retriever.invoke(\"What are the benefits of renewable energy?\")\n",
|
||||
"\n",
|
||||
"# Print the results\n",
|
||||
"for doc in docs:\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(doc.metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Context Search Tool\n",
|
||||
"\n",
|
||||
"You can use the `ValyuSearchTool` for advanced search queries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_valyu import ValyuSearchTool\n",
|
||||
"\n",
|
||||
"# Initialize the ValyuSearchTool\n",
|
||||
"search_tool = ValyuSearchTool(valyu_api_key=\"YOUR API KEY\")\n",
|
||||
"\n",
|
||||
"# Perform a search query\n",
|
||||
"search_results = search_tool._run(\n",
|
||||
" query=\"What are agentic search-enhanced large reasoning models?\",\n",
|
||||
" search_type=\"all\",\n",
|
||||
" max_num_results=5,\n",
|
||||
" similarity_threshold=0.4,\n",
|
||||
" query_rewrite=False,\n",
|
||||
" max_price=20.0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Search Results:\", search_results)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Astra DB (Cassandra)\n",
|
||||
"# Astra DB\n",
|
||||
"\n",
|
||||
">[DataStax Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on `Cassandra` and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
@ -16,32 +16,46 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating an Astra DB vector store\n",
|
||||
"First we'll want to create an Astra DB VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"First, create an Astra DB vector store and seed it with some data.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `astrapy` package."
|
||||
"We've created a small demo set of documents containing movie summaries.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires the `lark` package installed (`pip install lark`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet lark astrapy langchain-openai"
|
||||
"!pip install \"langchain-astradb>=0.6,<0.7\" \\\n",
|
||||
" \"langchain_openai>=0.3,<0.4\" \\\n",
|
||||
" \"lark>=1.2,<2.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
"In this example, you'll use the `OpenAIEmbeddings`. Please enter an OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
@ -69,14 +83,23 @@
|
||||
"Create the Astra DB VectorStore:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"- the Token looks like `AstraCS:6gBhNmsk135....`"
|
||||
"- the Token looks like `AstraCS:aBcD0123...`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
@ -84,11 +107,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import AstraDB\n",
|
||||
"from langchain_astradb import AstraDBVectorStore\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
@ -101,11 +124,13 @@
|
||||
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
|
||||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams \"\n",
|
||||
" \"within dreams and Inception reused the idea\",\n",
|
||||
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
|
||||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men \"\n",
|
||||
" \"pine after them\",\n",
|
||||
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
@ -123,7 +148,7 @@
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vectorstore = AstraDB.from_documents(\n",
|
||||
"vectorstore = AstraDBVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" collection_name=\"astra_self_query_demo\",\n",
|
||||
@ -136,13 +161,16 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
"## Creating a self-querying retriever\n",
|
||||
"\n",
|
||||
"Now you can instantiate the retriever.\n",
|
||||
"\n",
|
||||
"To do this, you need to provide some information upfront about the metadata fields that the documents support, along with a short description of the documents' contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -174,7 +202,11 @@
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
" metadata_field_info,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -183,14 +215,29 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
"\n",
|
||||
"Now you can try actually using our retriever:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='d7b9ec1edafa467caab524455e8c1f5d', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
|
||||
" Document(id='8ad04ef2a73d4f74897a51e49be1a8d2', metadata={'year': 1995, 'genre': 'animated'}, page_content='Toys come alive and have a blast doing so'),\n",
|
||||
" Document(id='5b07e600d3494506952b60e0a45a0546', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
|
||||
" Document(id='a0cef19e27c341929098ac4793602829', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.invoke(\"What are some movies about dinosaurs?\")"
|
||||
@ -198,9 +245,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='5b07e600d3494506952b60e0a45a0546', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
|
||||
" Document(id='a0cef19e27c341929098ac4793602829', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a filter\n",
|
||||
"retriever.invoke(\"I want to watch a movie rated higher than 8.5\")"
|
||||
@ -208,9 +267,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='0539843fd203484c9be486c2a0e2454c', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a query and a filter\n",
|
||||
"retriever.invoke(\"Has Greta Gerwig directed any movies about women\")"
|
||||
@ -218,9 +288,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='a0cef19e27c341929098ac4793602829', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
|
||||
" Document(id='5b07e600d3494506952b60e0a45a0546', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a composite filter\n",
|
||||
"retriever.invoke(\"What's a highly rated (above 8.5), science fiction movie ?\")"
|
||||
@ -228,9 +310,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='8ad04ef2a73d4f74897a51e49be1a8d2', metadata={'year': 1995, 'genre': 'animated'}, page_content='Toys come alive and have a blast doing so')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a query and composite filter\n",
|
||||
"retriever.invoke(\n",
|
||||
@ -242,20 +335,20 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filter k\n",
|
||||
"## Set a limit ('k')\n",
|
||||
"\n",
|
||||
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||
"you can also use the self-query retriever to specify `k`, the number of documents to fetch.\n",
|
||||
"\n",
|
||||
"We can do this by passing `enable_limit=True` to the constructor."
|
||||
"You achieve this by passing `enable_limit=True` to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
"retriever_k = SelfQueryRetriever.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
@ -267,12 +360,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(id='d7b9ec1edafa467caab524455e8c1f5d', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
|
||||
" Document(id='8ad04ef2a73d4f74897a51e49be1a8d2', metadata={'year': 1995, 'genre': 'animated'}, page_content='Toys come alive and have a blast doing so')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.invoke(\"What are two movies about dinosaurs?\")"
|
||||
"retriever_k.invoke(\"What are two movies about dinosaurs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -293,7 +398,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@ -322,7 +427,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,121 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab66dd43",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreDB\n",
|
||||
"\n",
|
||||
">[SingleStoreDB](https://singlestore.com/) is a high-performance distributed SQL database that supports deployment both in the [cloud](https://www.singlestore.com/cloud/) and on-premises. It provides vector storage, and vector functions including [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html) and [euclidean_distance](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/euclidean_distance.html), thereby supporting AI applications that require text similarity matching. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook shows how to use a retriever that uses `SingleStoreDB`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51b49135-a61a-49e8-869d-7c1d76794cd7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Establishing a connection to the database is facilitated through the singlestoredb Python connector.\n",
|
||||
"# Please ensure that this connector is installed in your working environment.\n",
|
||||
"%pip install --upgrade --quiet singlestoredb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aaf80e7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Retriever from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bcb3c8c2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
|
||||
"if \"OPENAI_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import SingleStoreDB\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"# Setup connection url as environment variable\n",
|
||||
"os.environ[\"SINGLESTOREDB_URL\"] = \"root:pass@localhost:3306/db\"\n",
|
||||
"\n",
|
||||
"# Load documents to the store\n",
|
||||
"docsearch = SingleStoreDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" table_name=\"notebook\", # use table with a custom name\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# create retriever from the vector store\n",
|
||||
"retriever = docsearch.as_retriever(search_kwargs={\"k\": 2})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fc0915db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Search with retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "b605284d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = retriever.invoke(\"What did the president say about Ketanji Brown Jackson\")\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
178
docs/docs/integrations/retrievers/valyu.ipynb
Normal file
178
docs/docs/integrations/retrievers/valyu.ipynb
Normal file
@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ValyuContext\n",
|
||||
"\n",
|
||||
">[Valyu](https://www.valyu.network/) allows AI applications and agents to search the internet and proprietary data sources for relevant LLM ready information.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Valyu context tool in LangChain.\n",
|
||||
"\n",
|
||||
"First, get an Valyu API key and add it as an environment variable. Get $10 free credit by [signing up here](https://exchange.valyu.network/).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-valyu` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-valyu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to use the package, you will also need to set the `VALYU_API_KEY` environment variable to your Valyu API key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"valyu_api_key = os.environ[\"VALYU_API_KEY\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our retriever:\n",
|
||||
"The `ValyuContextRetriever` can be configured with several parameters:\n",
|
||||
"\n",
|
||||
"- `k: int = 5` \n",
|
||||
" The number of top results to return for each query.\n",
|
||||
"\n",
|
||||
"- `search_type: str = \"all\"` \n",
|
||||
" The type of search to perform. Options may include \"all\", \"web\", \"proprietary\", etc., depending on your use case.\n",
|
||||
"\n",
|
||||
"- `similarity_threshold: float = 0.4` \n",
|
||||
" The minimum similarity score (between 0 and 1) required for a document to be considered relevant.\n",
|
||||
"\n",
|
||||
"- `query_rewrite: bool = False` \n",
|
||||
" Whether to enable automatic rewriting of the query to improve search results.\n",
|
||||
" \n",
|
||||
"- `max_price: float = 20.0`\n",
|
||||
" The maximum price (in USD) you are willing to spend per query.\n",
|
||||
"\n",
|
||||
"- `client: Optional[Valyu] = None` \n",
|
||||
" An optional custom Valyu client instance. If not provided, a new client will be created internally.\n",
|
||||
" \n",
|
||||
"- `valyu_api_key: Optional[str] = None` \n",
|
||||
" Your Valyu API key. If not provided, the retriever will look for the `VALYU_API_KEY` environment variable.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_valyu import ValyuContextRetriever\n",
|
||||
"\n",
|
||||
"retriever = ValyuContextRetriever(\n",
|
||||
" k=5,\n",
|
||||
" search_type=\"all\",\n",
|
||||
" similarity_threshold=0.4,\n",
|
||||
" query_rewrite=False,\n",
|
||||
" max_price=20.0,\n",
|
||||
" client=None,\n",
|
||||
" valyu_api_key=os.environ[\"VALYU_API_KEY\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What are the benefits of renewable energy?\"\n",
|
||||
"docs = retriever.invoke(query)\n",
|
||||
"\n",
|
||||
"for doc in docs:\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(doc.metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain\n",
|
||||
"\n",
|
||||
"We can easily combine this retriever in to a chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the question based only on the context provided.\n",
|
||||
"\n",
|
||||
"Context: {context}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_docs(docs):\n",
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all Valyu Context API features and configurations head to the API reference: https://docs.valyu.network/overview"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -1,13 +1,76 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8543d632",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Google Gemini\n",
|
||||
"keywords: [google gemini embeddings]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "afab8b36-10bb-4795-bc98-75ab2d2081bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Generative AI Embeddings\n",
|
||||
"# Google Generative AI Embeddings (AI Studio & Gemini API)\n",
|
||||
"\n",
|
||||
"Connect to Google's generative AI embeddings service using the `GoogleGenerativeAIEmbeddings` class, found in the [langchain-google-genai](https://pypi.org/project/langchain-google-genai/) package."
|
||||
"Connect to Google's generative AI embeddings service using the `GoogleGenerativeAIEmbeddings` class, found in the [langchain-google-genai](https://pypi.org/project/langchain-google-genai/) package.\n",
|
||||
"\n",
|
||||
"This will help you get started with Google's Generative AI embedding models (like Gemini) using LangChain. For detailed documentation on `GoogleGenerativeAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/v0.2/api_reference/google_genai/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"Google Gemini\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Google Generative AI embedding models you'll need to create a Google Cloud project, enable the Generative Language API, get an API key, and install the `langchain-google-genai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"To use Google Generative AI models, you must have an API key. You can create one in Google AI Studio. See the [Google documentation](https://ai.google.dev/gemini-api/docs/api-key) for instructions.\n",
|
||||
"\n",
|
||||
"Once you have a key, set it as an environment variable `GOOGLE_API_KEY`:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "47652620",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"GOOGLE_API_KEY\"):\n",
|
||||
" os.environ[\"GOOGLE_API_KEY\"] = getpass.getpass(\"Enter your Google API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67283790",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To enable automated tracing of your model calls, set your [LangSmith](https://docs.smith.langchain.com/) API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eccf1968",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -28,28 +91,6 @@
|
||||
"%pip install --upgrade --quiet langchain-google-genai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "25f3f88e-164e-400d-b371-9fa488baba19",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec89153f-8999-4aab-a21b-0bfba1cc3893",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"GOOGLE_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"GOOGLE_API_KEY\"] = getpass.getpass(\"Provide your Google API key here\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f2437b22-e364-418a-8c13-490a026cb7b5",
|
||||
@ -60,17 +101,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"id": "eedc551e-a1f3-4fd8-8d65-4e0784c4441b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[0.05636945, 0.0048285457, -0.0762591, -0.023642512, 0.05329321]"
|
||||
"[-0.024917153641581535,\n",
|
||||
" 0.012005362659692764,\n",
|
||||
" -0.003886754624545574,\n",
|
||||
" -0.05774897709488869,\n",
|
||||
" 0.0020742062479257584]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -78,7 +123,7 @@
|
||||
"source": [
|
||||
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = GoogleGenerativeAIEmbeddings(model=\"models/text-embedding-004\")\n",
|
||||
"embeddings = GoogleGenerativeAIEmbeddings(model=\"models/gemini-embedding-exp-03-07\")\n",
|
||||
"vector = embeddings.embed_query(\"hello, world!\")\n",
|
||||
"vector[:5]"
|
||||
]
|
||||
@ -95,17 +140,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "6ec53aba-404f-4778-acd9-5d6664e79ed2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(3, 768)"
|
||||
"(3, 3072)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -121,6 +166,56 @@
|
||||
"len(vectors), len(vectors[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c362bfbf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "606a7f65",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'LangChain is the framework for building context-aware reasoning applications'"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1482486f-5617-498a-8a44-1974d3212dda",
|
||||
@ -129,70 +224,74 @@
|
||||
"## Task type\n",
|
||||
"`GoogleGenerativeAIEmbeddings` optionally support a `task_type`, which currently must be one of:\n",
|
||||
"\n",
|
||||
"- task_type_unspecified\n",
|
||||
"- retrieval_query\n",
|
||||
"- retrieval_document\n",
|
||||
"- semantic_similarity\n",
|
||||
"- classification\n",
|
||||
"- clustering\n",
|
||||
"- `SEMANTIC_SIMILARITY`: Used to generate embeddings that are optimized to assess text similarity.\n",
|
||||
"- `CLASSIFICATION`: Used to generate embeddings that are optimized to classify texts according to preset labels.\n",
|
||||
"- `CLUSTERING`: Used to generate embeddings that are optimized to cluster texts based on their similarities.\n",
|
||||
"- `RETRIEVAL_DOCUMENT`, `RETRIEVAL_QUERY`, `QUESTION_ANSWERING`, and `FACT_VERIFICATION`: Used to generate embeddings that are optimized for document search or information retrieval.\n",
|
||||
"- `CODE_RETRIEVAL_QUERY`: Used to retrieve a code block based on a natural language query, such as sort an array or reverse a linked list. Embeddings of the code blocks are computed using `RETRIEVAL_DOCUMENT`.\n",
|
||||
"\n",
|
||||
"By default, we use `retrieval_document` in the `embed_documents` method and `retrieval_query` in the `embed_query` method. If you provide a task type, we will use that for all methods."
|
||||
"By default, we use `RETRIEVAL_DOCUMENT` in the `embed_documents` method and `RETRIEVAL_QUERY` in the `embed_query` method. If you provide a task type, we will use that for all methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "a223bb25-2b1b-418e-a570-2f543083132e",
|
||||
"execution_count": null,
|
||||
"id": "b7acc5c2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet matplotlib scikit-learn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 19,
|
||||
"id": "f1f077db-8eb4-49f7-8866-471a8528dcdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1\n",
|
||||
"Cosine similarity with query: 0.7892893360164779\n",
|
||||
"---\n",
|
||||
"Document 2\n",
|
||||
"Cosine similarity with query: 0.5438283285204146\n",
|
||||
"---\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
|
||||
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
||||
"\n",
|
||||
"query_embeddings = GoogleGenerativeAIEmbeddings(\n",
|
||||
" model=\"models/embedding-001\", task_type=\"retrieval_query\"\n",
|
||||
" model=\"models/gemini-embedding-exp-03-07\", task_type=\"RETRIEVAL_QUERY\"\n",
|
||||
")\n",
|
||||
"doc_embeddings = GoogleGenerativeAIEmbeddings(\n",
|
||||
" model=\"models/embedding-001\", task_type=\"retrieval_document\"\n",
|
||||
")"
|
||||
" model=\"models/gemini-embedding-exp-03-07\", task_type=\"RETRIEVAL_DOCUMENT\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"q_embed = query_embeddings.embed_query(\"What is the capital of France?\")\n",
|
||||
"d_embed = doc_embeddings.embed_documents(\n",
|
||||
" [\"The capital of France is Paris.\", \"Philipp is likes to eat pizza.\"]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for i, d in enumerate(d_embed):\n",
|
||||
" print(f\"Document {i+1}:\")\n",
|
||||
" print(f\"Cosine similarity with query: {cosine_similarity([q_embed], [d])[0][0]}\")\n",
|
||||
" print(\"---\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79bd4a5e-75ba-413c-befa-86167c938caf",
|
||||
"id": "f45ea7b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"All of these will be embedded with the 'retrieval_query' task set\n",
|
||||
"```python\n",
|
||||
"query_vecs = [query_embeddings.embed_query(q) for q in [query, query_2, answer_1]]\n",
|
||||
"```\n",
|
||||
"All of these will be embedded with the 'retrieval_document' task set\n",
|
||||
"```python\n",
|
||||
"doc_vecs = [doc_embeddings.embed_query(q) for q in [query, query_2, answer_1]]\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9e1fae5e-0f84-4812-89f5-7d4d71affbc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In retrieval, relative distance matters. In the image above, you can see the difference in similarity scores between the \"relevant doc\" and \"simil stronger delta between the similar query and relevant doc on the latter case."
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `GoogleGenerativeAIEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/google_genai/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -211,7 +310,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -225,7 +324,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -17,14 +17,14 @@
|
||||
"source": [
|
||||
"# ClovaXEmbeddings\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with embedding models provided by CLOVA Studio. For detailed documentation on `ClovaXEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.naver.ClovaXEmbeddings.html).\n",
|
||||
"This notebook covers how to get started with embedding models provided by CLOVA Studio. For detailed documentation on `ClovaXEmbeddings` features and configuration options, please refer to the [API reference](https://guide.ncloud-docs.com/docs/clovastudio-dev-langchain#%EC%9E%84%EB%B2%A0%EB%94%A9%EB%8F%84%EA%B5%AC%EC%9D%B4%EC%9A%A9).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:-------:|\n",
|
||||
"| [Naver](/docs/integrations/providers/naver.mdx) | [langchain-community](https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.naver.ClovaXEmbeddings.html) |\n",
|
||||
"| [Naver](/docs/integrations/providers/naver.mdx) | [langchain-naver](https://pypi.org/project/langchain-naver/) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
@ -33,12 +33,11 @@
|
||||
"1. Creating [NAVER Cloud Platform](https://www.ncloud.com/) account \n",
|
||||
"2. Apply to use [CLOVA Studio](https://www.ncloud.com/product/aiService/clovaStudio)\n",
|
||||
"3. Create a CLOVA Studio Test App or Service App of a model to use (See [here](https://guide.ncloud-docs.com/docs/clovastudio-explorer03#%ED%85%8C%EC%8A%A4%ED%8A%B8%EC%95%B1%EC%83%9D%EC%84%B1).)\n",
|
||||
"4. Issue a Test or Service API key (See [here](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4).)\n",
|
||||
"4. Issue a Test or Service API key (See [here](https://guide.ncloud-docs.com/docs/clovastudio-explorer-testapp).)\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Set the `NCP_CLOVASTUDIO_API_KEY` environment variable with your API key.\n",
|
||||
" - Note that if you are using a legacy API Key (that doesn't start with `nv-*` prefix), you might need two additional keys to be set as environment variables (`NCP_APIGW_API_KEY` and `NCP_CLOVASTUDIO_APP_ID`. They could be found by clicking `App Request Status` > `Service App, Test App List` > `Details` button for each app in [CLOVA Studio](https://clovastudio.ncloud.com/studio-application/service-app)."
|
||||
"Set the `CLOVASTUDIO_API_KEY` environment variable with your API key."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -51,30 +50,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"NCP_CLOVASTUDIO_API_KEY\"):\n",
|
||||
" os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter NCP CLOVA Studio API Key: \"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b31fc062",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Uncomment below to use a legacy API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "83520d8e-ecf8-4e47-b3bc-1ac205b3a2ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# if not os.getenv(\"NCP_APIGW_API_KEY\"):\n",
|
||||
"# os.environ[\"NCP_APIGW_API_KEY\"] = getpass.getpass(\"Enter NCP API Gateway API Key: \")\n",
|
||||
"# os.environ[\"NCP_CLOVASTUDIO_APP_ID\"] = input(\"Enter NCP CLOVA Studio App ID: \")"
|
||||
"if not os.getenv(\"CLOVASTUDIO_API_KEY\"):\n",
|
||||
" os.environ[\"CLOVASTUDIO_API_KEY\"] = getpass.getpass(\"Enter CLOVA Studio API Key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -84,7 +61,7 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"ClovaXEmbeddings integration lives in the `langchain_community` package:"
|
||||
"ClovaXEmbeddings integration lives in the `langchain_naver` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -95,7 +72,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install package\n",
|
||||
"!pip install -U langchain-community"
|
||||
"%pip install -qU langchain-naver"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -113,7 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
@ -121,10 +98,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import ClovaXEmbeddings\n",
|
||||
"from langchain_naver import ClovaXEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = ClovaXEmbeddings(\n",
|
||||
" model=\"clir-emb-dolphin\" # set with the model name of corresponding app id. Default is `clir-emb-dolphin`\n",
|
||||
" model=\"clir-emb-dolphin\" # set with the model name of corresponding test/service app. Default is `clir-emb-dolphin`\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -225,7 +202,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "1f2e6104",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -239,55 +216,12 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text2 = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"text2 = \"LangChain is a framework for building context-aware reasoning applications\"\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eee40d32367cc5c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Additional functionalities\n",
|
||||
"\n",
|
||||
"### Service App\n",
|
||||
"\n",
|
||||
"When going live with production-level application using CLOVA Studio, you should apply for and use Service App. (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#서비스앱신청).)\n",
|
||||
"\n",
|
||||
"For a Service App, you should use a corresponding Service API key and can only be called with it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08f9f44e-c6a4-4163-8caf-27a0cda345b7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Update environment variables\n",
|
||||
"\n",
|
||||
"os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter NCP CLOVA Studio API Key for Service App: \"\n",
|
||||
")\n",
|
||||
"# Uncomment below to use a legacy API key:\n",
|
||||
"os.environ[\"NCP_CLOVASTUDIO_APP_ID\"] = input(\"Enter NCP CLOVA Studio Service App ID: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "86f59698-b3f4-4b19-a9d4-4facfcea304b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = ClovaXEmbeddings(\n",
|
||||
" service_app=True,\n",
|
||||
" model=\"clir-emb-dolphin\", # set with the model name of corresponding app id of your Service App\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ddeaee9",
|
||||
@ -295,7 +229,7 @@
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `ClovaXEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/latest/api_reference/community/embeddings/langchain_community.embeddings.naver.ClovaXEmbeddings.html)."
|
||||
"For detailed documentation on `ClovaXEmbeddings` features and configuration options, please refer to the [API reference](https://guide.ncloud-docs.com/docs/clovastudio-dev-langchain#%EC%9E%84%EB%B2%A0%EB%94%A9%EB%8F%84%EA%B5%AC%EC%9D%B4%EC%9A%A9)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -183,7 +183,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initalize simple_datasource_qa for querying Tableau Datasources through VDS\n",
|
||||
"# Initialize simple_datasource_qa for querying Tableau Datasources through VDS\n",
|
||||
"analyze_datasource = initialize_simple_datasource_qa(\n",
|
||||
" domain=tableau_server,\n",
|
||||
" site=tableau_site,\n",
|
||||
|
194
docs/docs/integrations/tools/valyu_context.ipynb
Normal file
194
docs/docs/integrations/tools/valyu_context.ipynb
Normal file
@ -0,0 +1,194 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ValyuContext\n",
|
||||
"\n",
|
||||
">[Valyu](https://www.valyu.network/) allows AI applications and agents to search the internet and proprietary data sources for relevant LLM ready information.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Valyu context tool in LangChain.\n",
|
||||
"\n",
|
||||
"First, get an Valyu API key and add it as an environment variable. Get $10 free credit by [signing up here](https://exchange.valyu.network/).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"| Class | Package | Serializable | JS support | Package latest |\n",
|
||||
"|:--------------------------------------------------------------|:---------------------------------------------------------------| :---: | :---: | :---: |\n",
|
||||
"| [Valyu Search](https://github.com/valyu-network/langchain-valyu) | [langchain-valyu](https://pypi.org/project/langchain-valyu/) | ✅ | ❌ |  |\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-valyu` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-valyu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to use the package, you will also need to set the `VALYU_API_KEY` environment variable to your Valyu API key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.environ.get(\"VALYU_API_KEY\"):\n",
|
||||
" os.environ[\"VALYU_API_KEY\"] = getpass.getpass(\"Valyu API key:\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here we show how to instantiate an instance of the Valyu search tool. This tool allows you to complete search queries using Valyu's Context API endpoint.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_valyu import ValyuSearchTool\n",
|
||||
"\n",
|
||||
"tool = ValyuSearchTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation\n",
|
||||
"\n",
|
||||
"### Invoke directly with args\n",
|
||||
"\n",
|
||||
"The Valyu search tool accepts the following arguments during invocation:\n",
|
||||
"- `query` (required): A natural language search query\n",
|
||||
"- `search_type` (optional): Type of search, e.g., \"all\"\n",
|
||||
"- `max_num_results` (optional): Maximum number of results to return\n",
|
||||
"- `similarity_threshold` (optional): Similarity threshold for results\n",
|
||||
"- `query_rewrite` (optional): Whether to rewrite the query\n",
|
||||
"- `max_price` (optional): Maximum price for the search\n",
|
||||
"\n",
|
||||
"For reliability and performance reasons, certain parameters may be required or restricted. See the [Valyu API documentation](https://docs.valyu.network/overview) for details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search_results = tool._run(\n",
|
||||
" query=\"What are agentic search-enhanced large reasoning models?\",\n",
|
||||
" search_type=\"all\",\n",
|
||||
" max_num_results=5,\n",
|
||||
" similarity_threshold=0.4,\n",
|
||||
" query_rewrite=False,\n",
|
||||
" max_price=20.0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Search Results:\", search_results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an agent\n",
|
||||
"\n",
|
||||
"We can use our tools directly with an agent executor by binding the tool to the agent. This gives the agent the ability to dynamically set the available arguments to the Valyu search tool."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.environ.get(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OPENAI_API_KEY:\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"# !pip install -qU langchain langchain-openai\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_valyu import ValyuSearchTool\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"valyu_search_tool = ValyuSearchTool()\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(llm, [valyu_search_tool])\n",
|
||||
"\n",
|
||||
"user_input = \"What are the key factors driving recent stock market volatility, and how do macroeconomic indicators influence equity prices across different sectors?\"\n",
|
||||
"\n",
|
||||
"for step in agent.stream(\n",
|
||||
" {\"messages\": user_input},\n",
|
||||
" stream_mode=\"values\",\n",
|
||||
"):\n",
|
||||
" step[\"messages\"][-1].pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all Valyu Context API features and configurations head to the API reference: https://docs.valyu.network/overview"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -6,18 +6,16 @@
|
||||
"source": [
|
||||
"# SAP HANA Cloud Vector Engine\n",
|
||||
"\n",
|
||||
">[SAP HANA Cloud Vector Engine](https://www.sap.com/events/teched/news-guide/ai.html#article8) is a vector store fully integrated into the `SAP HANA Cloud` database.\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration"
|
||||
">[SAP HANA Cloud Vector Engine](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/sap-hana-cloud-sap-hana-database-vector-engine-guide) is a vector store fully integrated into the `SAP HANA Cloud` database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Installation of the HANA database driver."
|
||||
"Install the `langchain-hana` external integration package, as well as the other packages used throughout this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -26,53 +24,36 @@
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Pip install necessary package\n",
|
||||
"%pip install --upgrade --quiet hdbcli"
|
||||
"%pip install -qU langchain-hana"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For `OpenAIEmbeddings` we use the OpenAI API key from the environment."
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Ensure your SAP HANA instance is running. Load your credentials from environment variables and create a connection:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:16.802456Z",
|
||||
"start_time": "2023-09-09T08:02:07.065604Z"
|
||||
}
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"# Use OPENAI_API_KEY env variable\n",
|
||||
"# os.environ[\"OPENAI_API_KEY\"] = \"Your OpenAI API key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a database connection to a HANA Cloud instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:28.174088Z",
|
||||
"start_time": "2023-09-09T08:02:28.162698Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from hdbcli import dbapi\n",
|
||||
"\n",
|
||||
@ -88,6 +69,92 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Learn more about SAP HANA in [What is SAP HANA?](https://www.sap.com/products/data-cloud/hana/what-is-sap-hana.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialization\n",
|
||||
"To initialize a `HanaDB` vector store, you need a database connection and an embedding instance. SAP HANA Cloud Vector Engine supports both external and internal embeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- #### Using External Embeddings\n",
|
||||
"\n",
|
||||
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||
"\n",
|
||||
"<EmbeddingTabs/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"- #### Using Internal Embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can compute embeddings directly in SAP HANA using its native `VECTOR_EMBEDDING()` function. To enable this, create an instance of `HanaInternalEmbeddings` with your internal model ID and pass it to `HanaDB`. Note that the `HanaInternalEmbeddings` instance is specifically designed for use with `HanaDB` and is not intended for use with other vector store implementations. For more information about internal embedding, see the [SAP HANA VECTOR_EMBEDDING Function](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/vector-embedding-function-vector).\n",
|
||||
"\n",
|
||||
"> **Caution:** Ensure NLP is enabled in your SAP HANA Cloud instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_hana import HanaInternalEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = HanaInternalEmbeddings(internal_embedding_model_id=\"SAP_NEB.20240715\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once you have your connection and embedding instance, create the vector store by passing them to `HanaDB` along with a table name for storing vectors:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_hana import HanaDB\n",
|
||||
"\n",
|
||||
"db = HanaDB(\n",
|
||||
" embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -104,7 +171,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:25.452472Z",
|
||||
@ -122,40 +189,16 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.hanavector import HanaDB\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",
|
||||
"text_documents = TextLoader(\n",
|
||||
" \"../../how_to/state_of_the_union.txt\", encoding=\"UTF-8\"\n",
|
||||
").load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"text_chunks = text_splitter.split_documents(text_documents)\n",
|
||||
"print(f\"Number of document chunks: {len(text_chunks)}\")\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a LangChain VectorStore interface for the HANA database and specify the table (collection) to use for accessing the vector embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:04:16.696625Z",
|
||||
"start_time": "2023-09-09T08:02:31.817790Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = HanaDB(\n",
|
||||
" embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n",
|
||||
")"
|
||||
"print(f\"Number of document chunks: {len(text_chunks)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -167,7 +210,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -176,7 +219,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -199,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -235,7 +278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -254,7 +297,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
||||
"from langchain_hana.utils import DistanceStrategy\n",
|
||||
"\n",
|
||||
"db = HanaDB(\n",
|
||||
" embedding=embeddings,\n",
|
||||
@ -286,7 +329,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:05:23.276819Z",
|
||||
@ -336,7 +379,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -411,7 +454,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -420,7 +463,7 @@
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -443,7 +486,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -452,7 +495,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -471,7 +514,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -480,7 +523,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -508,7 +551,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -539,7 +582,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -578,13 +621,14 @@
|
||||
"| `$nin` | Not contained in a set of given values (not in) |\n",
|
||||
"| `$between` | Between the range of two boundary values |\n",
|
||||
"| `$like` | Text equality based on the \"LIKE\" semantics in SQL (using \"%\" as wildcard) |\n",
|
||||
"| `$contains` | Filters documents containing a specific keyword |\n",
|
||||
"| `$and` | Logical \"and\", supporting 2 or more operands |\n",
|
||||
"| `$or` | Logical \"or\", supporting 2 or more operands |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -592,15 +636,15 @@
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"First\",\n",
|
||||
" metadata={\"name\": \"adam\", \"is_active\": True, \"id\": 1, \"height\": 10.0},\n",
|
||||
" metadata={\"name\": \"Adam Smith\", \"is_active\": True, \"id\": 1, \"height\": 10.0},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Second\",\n",
|
||||
" metadata={\"name\": \"bob\", \"is_active\": False, \"id\": 2, \"height\": 5.7},\n",
|
||||
" metadata={\"name\": \"Bob Johnson\", \"is_active\": False, \"id\": 2, \"height\": 5.7},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Third\",\n",
|
||||
" metadata={\"name\": \"jane\", \"is_active\": True, \"id\": 3, \"height\": 2.4},\n",
|
||||
" metadata={\"name\": \"Jane Doe\", \"is_active\": True, \"id\": 3, \"height\": 2.4},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
@ -632,7 +676,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -640,19 +684,19 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'id': {'$ne': 1}}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'id': {'$gt': 1}}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'id': {'$gte': 1}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'id': {'$lt': 1}}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'id': {'$lte': 1}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -687,7 +731,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -695,13 +739,13 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'id': {'$between': (1, 2)}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$in': ['adam', 'bob']}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$nin': ['adam', 'bob']}}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$in': ['Adam Smith', 'Bob Johnson']}}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$nin': ['Adam Smith', 'Bob Johnson']}}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -710,11 +754,11 @@
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$in\": [\"adam\", \"bob\"]}}\n",
|
||||
"advanced_filter = {\"name\": {\"$in\": [\"Adam Smith\", \"Bob Johnson\"]}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$nin\": [\"adam\", \"bob\"]}}\n",
|
||||
"advanced_filter = {\"name\": {\"$nin\": [\"Adam Smith\", \"Bob Johnson\"]}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
@ -728,7 +772,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -736,10 +780,10 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'name': {'$like': 'a%'}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'name': {'$like': '%a%'}}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -753,6 +797,51 @@
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Text filtering with `$contains`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'name': {'$contains': 'bob'}}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'name': {'$contains': 'bo'}}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'name': {'$contains': 'Adam Johnson'}}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'name': {'$contains': 'Adam Smith'}}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"bob\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"bo\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"Adam Johnson\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\"name\": {\"$contains\": \"Adam Smith\"}}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -762,7 +851,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -770,14 +859,15 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Filter: {'$or': [{'id': 1}, {'name': 'bob'}]}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"Filter: {'$and': [{'id': 1}, {'id': 2}]}\n",
|
||||
"<empty result>\n",
|
||||
"Filter: {'$or': [{'id': 1}, {'id': 2}, {'id': 3}]}\n",
|
||||
"{'name': 'adam', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'bob', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"{'name': 'jane', 'is_active': True, 'id': 3, 'height': 2.4}\n"
|
||||
"{'name': 'Adam Smith', 'is_active': True, 'id': 1, 'height': 10.0}\n",
|
||||
"{'name': 'Jane Doe', 'is_active': True, 'id': 3, 'height': 2.4}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n",
|
||||
"Filter: {'$and': [{'name': {'$contains': 'bob'}}, {'name': {'$contains': 'johnson'}}]}\n",
|
||||
"{'name': 'Bob Johnson', 'is_active': False, 'id': 2, 'height': 5.7}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -792,6 +882,12 @@
|
||||
"\n",
|
||||
"advanced_filter = {\"$or\": [{\"id\": 1}, {\"id\": 2}, {\"id\": 3}]}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))\n",
|
||||
"\n",
|
||||
"advanced_filter = {\n",
|
||||
" \"$and\": [{\"name\": {\"$contains\": \"bob\"}}, {\"name\": {\"$contains\": \"johnson\"}}]\n",
|
||||
"}\n",
|
||||
"print(f\"Filter: {advanced_filter}\")\n",
|
||||
"print_filter_result(db.similarity_search(\"just testing\", k=5, filter=advanced_filter))"
|
||||
]
|
||||
},
|
||||
@ -804,13 +900,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"# Access the vector DB with a new table\n",
|
||||
"db = HanaDB(\n",
|
||||
" connection=connection,\n",
|
||||
@ -837,7 +930,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -874,6 +967,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
|
||||
"memory = ConversationBufferMemory(\n",
|
||||
@ -898,7 +993,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -907,7 +1002,7 @@
|
||||
"text": [
|
||||
"Answer from LLM:\n",
|
||||
"================\n",
|
||||
"The United States has set up joint patrols with Mexico and Guatemala to catch more human traffickers. This collaboration is part of the efforts to address immigration issues and secure the borders in the region.\n",
|
||||
"The United States has set up joint patrols with Mexico and Guatemala to catch more human traffickers at the border. This collaborative effort aims to improve border security and combat illegal activities such as human trafficking.\n",
|
||||
"================\n",
|
||||
"Number of used source document chunks: 5\n"
|
||||
]
|
||||
@ -954,7 +1049,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -963,12 +1058,12 @@
|
||||
"text": [
|
||||
"Answer from LLM:\n",
|
||||
"================\n",
|
||||
"Mexico and Guatemala are involved in joint patrols to catch human traffickers.\n"
|
||||
"Countries like Mexico and Guatemala are participating in joint patrols to catch human traffickers. The United States is also working with partners in South and Central America to host more refugees and secure their borders. Additionally, the U.S. is working with twenty-seven members of the European Union, as well as countries like France, Germany, Italy, the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and Switzerland.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What about other countries?\"\n",
|
||||
"question = \"How many casualties were reported after that?\"\n",
|
||||
"\n",
|
||||
"result = qa_chain.invoke({\"question\": question})\n",
|
||||
"print(\"Answer from LLM:\")\n",
|
||||
@ -996,7 +1091,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1005,7 +1100,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -1038,7 +1133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1101,7 +1196,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1111,7 +1206,7 @@
|
||||
"None\n",
|
||||
"Some other text\n",
|
||||
"{\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\"}\n",
|
||||
"<memory at 0x7f5edcb18d00>\n"
|
||||
"<memory at 0x110f856c0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1168,7 +1263,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1176,9 +1271,9 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some other text\n",
|
||||
"Some more text\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some more text\n"
|
||||
"Some other text\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1214,7 +1309,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1224,7 +1319,7 @@
|
||||
"Filters on this value are very performant\n",
|
||||
"Some other text\n",
|
||||
"{\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\", \"CUSTOMTEXT\": \"Filters on this value are very performant\"}\n",
|
||||
"<memory at 0x7f5edcb193c0>\n"
|
||||
"<memory at 0x110f859c0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1291,7 +1386,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -1299,9 +1394,9 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some other text\n",
|
||||
"Some more text\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Some more text\n"
|
||||
"Some other text\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1330,9 +1425,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "lc3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "your_env_name"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@ -1344,7 +1439,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.14"
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
527
docs/docs/integrations/vectorstores/singlestore.ipynb
Normal file
527
docs/docs/integrations/vectorstores/singlestore.ipynb
Normal file
@ -0,0 +1,527 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "1957f5cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: SingleStore\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef1f0986",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreVectorStore\n",
|
||||
"\n",
|
||||
">[SingleStore](https://singlestore.com/) is a robust, high-performance distributed SQL database solution designed to excel in both [cloud](https://www.singlestore.com/cloud/) and on-premises environments. Boasting a versatile feature set, it offers seamless deployment options while delivering unparalleled performance.\n",
|
||||
"\n",
|
||||
"A standout feature of SingleStore is its advanced support for vector storage and operations, making it an ideal choice for applications requiring intricate AI capabilities such as text similarity matching. With built-in vector functions like [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html) and [euclidean_distance](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/euclidean_distance.html), SingleStore empowers developers to implement sophisticated algorithms efficiently.\n",
|
||||
"\n",
|
||||
"For developers keen on leveraging vector data within SingleStore, a comprehensive tutorial is available, guiding them through the intricacies of [working with vector data](https://docs.singlestore.com/managed-service/en/developer-resources/functional-extensions/working-with-vector-data.html). This tutorial delves into the Vector Store within SingleStoreDB, showcasing its ability to facilitate searches based on vector similarity. Leveraging vector indexes, queries can be executed with remarkable speed, enabling swift retrieval of relevant data.\n",
|
||||
"\n",
|
||||
"Moreover, SingleStore's Vector Store seamlessly integrates with [full-text indexing based on Lucene](https://docs.singlestore.com/cloud/developer-resources/functional-extensions/working-with-full-text-search/), enabling powerful text similarity searches. Users can filter search results based on selected fields of document metadata objects, enhancing query precision.\n",
|
||||
"\n",
|
||||
"What sets SingleStore apart is its ability to combine vector and full-text searches in various ways, offering flexibility and versatility. Whether prefiltering by text or vector similarity and selecting the most relevant data, or employing a weighted sum approach to compute a final similarity score, developers have multiple options at their disposal.\n",
|
||||
"\n",
|
||||
"In essence, SingleStore provides a comprehensive solution for managing and querying vector data, offering unparalleled performance and flexibility for AI-driven applications.\n",
|
||||
"\n",
|
||||
"| Class | Package | JS support |\n",
|
||||
"| :--- | :--- | :---: |\n",
|
||||
"| SingleStoreVectorStore | langchain_singlestore | ✅ | \n",
|
||||
"\n",
|
||||
":::note\n",
|
||||
"\n",
|
||||
"For the langchain-community version `SingleStoreDB` (deprecated), see\n",
|
||||
"the [v0.2 documentation](https://python.langchain.com/v0.2/docs/integrations/vectorstores/singlestoredb/).\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36fdc060",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access SingleStore vector stores you'll need to install the `langchain-singlestore` integration package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "64e28aa6",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"%pip install -qU \"langchain-singlestore\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93df377e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"To initialize `SingleStoreVectorStore`, you need an `Embeddings` object and connection parameters for the SingleStore database.\n",
|
||||
"\n",
|
||||
"### Required Parameters:\n",
|
||||
"- **embedding** (`Embeddings`): A text embedding model.\n",
|
||||
"\n",
|
||||
"### Optional Parameters:\n",
|
||||
"- **distance_strategy** (`DistanceStrategy`): Strategy for calculating vector distances. Defaults to `DOT_PRODUCT`. Options:\n",
|
||||
" - `DOT_PRODUCT`: Computes the scalar product of two vectors.\n",
|
||||
" - `EUCLIDEAN_DISTANCE`: Computes the Euclidean distance between two vectors.\n",
|
||||
"\n",
|
||||
"- **table_name** (`str`): Name of the table. Defaults to `embeddings`.\n",
|
||||
"- **content_field** (`str`): Field for storing content. Defaults to `content`.\n",
|
||||
"- **metadata_field** (`str`): Field for storing metadata. Defaults to `metadata`.\n",
|
||||
"- **vector_field** (`str`): Field for storing vectors. Defaults to `vector`.\n",
|
||||
"- **id_field** (`str`): Field for storing IDs. Defaults to `id`.\n",
|
||||
"\n",
|
||||
"- **use_vector_index** (`bool`): Enables vector indexing (requires SingleStore 8.5+). Defaults to `False`.\n",
|
||||
"- **vector_index_name** (`str`): Name of the vector index. Ignored if `use_vector_index` is `False`.\n",
|
||||
"- **vector_index_options** (`dict`): Options for the vector index. Ignored if `use_vector_index` is `False`.\n",
|
||||
"- **vector_size** (`int`): Size of the vector. Required if `use_vector_index` is `True`.\n",
|
||||
"\n",
|
||||
"- **use_full_text_search** (`bool`): Enables full-text indexing on content. Defaults to `False`.\n",
|
||||
"\n",
|
||||
"### Connection Pool Parameters:\n",
|
||||
"- **pool_size** (`int`): Number of active connections in the pool. Defaults to `5`.\n",
|
||||
"- **max_overflow** (`int`): Maximum connections beyond `pool_size`. Defaults to `10`.\n",
|
||||
"- **timeout** (`float`): Connection timeout in seconds. Defaults to `30`.\n",
|
||||
"\n",
|
||||
"### Database Connection Parameters:\n",
|
||||
"- **host** (`str`): Hostname, IP, or URL for the database.\n",
|
||||
"- **user** (`str`): Database username.\n",
|
||||
"- **password** (`str`): Database password.\n",
|
||||
"- **port** (`int`): Database port. Defaults to `3306`.\n",
|
||||
"- **database** (`str`): Database name.\n",
|
||||
"\n",
|
||||
"### Additional Options:\n",
|
||||
"- **pure_python** (`bool`): Enables pure Python mode.\n",
|
||||
"- **local_infile** (`bool`): Allows local file uploads.\n",
|
||||
"- **charset** (`str`): Character set for string values.\n",
|
||||
"- **ssl_key**, **ssl_cert**, **ssl_ca** (`str`): Paths to SSL files.\n",
|
||||
"- **ssl_disabled** (`bool`): Disables SSL.\n",
|
||||
"- **ssl_verify_cert** (`bool`): Verifies server's certificate.\n",
|
||||
"- **ssl_verify_identity** (`bool`): Verifies server's identity.\n",
|
||||
"- **autocommit** (`bool`): Enables autocommits.\n",
|
||||
"- **results_type** (`str`): Structure of query results (e.g., `tuples`, `dicts`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_singlestore.vectorstores import SingleStoreVectorStore\n",
|
||||
"\n",
|
||||
"os.environ[\"SINGLESTOREDB_URL\"] = \"root:pass@localhost:3306/db\"\n",
|
||||
"\n",
|
||||
"vector_store = SingleStoreVectorStore(embeddings=embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac6071d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store\n",
|
||||
"\n",
|
||||
"The `SingleStoreVectorStore` assumes that a Document's ID is an integer. Below are examples of how to manage the vector store.\n",
|
||||
"\n",
|
||||
"### Add items to vector store\n",
|
||||
"\n",
|
||||
"You can add documents to the vector store as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26e0c6e6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-core"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17f5efc0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"In the parched desert, a sudden rainstorm brought relief,\n",
|
||||
" as the droplets danced upon the thirsty earth, rejuvenating the landscape\n",
|
||||
" with the sweet scent of petrichor.\"\"\",\n",
|
||||
" metadata={\"category\": \"rain\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"Amidst the bustling cityscape, the rain fell relentlessly,\n",
|
||||
" creating a symphony of pitter-patter on the pavement, while umbrellas\n",
|
||||
" bloomed like colorful flowers in a sea of gray.\"\"\",\n",
|
||||
" metadata={\"category\": \"rain\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"High in the mountains, the rain transformed into a delicate\n",
|
||||
" mist, enveloping the peaks in a mystical veil, where each droplet seemed to\n",
|
||||
" whisper secrets to the ancient rocks below.\"\"\",\n",
|
||||
" metadata={\"category\": \"rain\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"Blanketing the countryside in a soft, pristine layer, the\n",
|
||||
" snowfall painted a serene tableau, muffling the world in a tranquil hush\n",
|
||||
" as delicate flakes settled upon the branches of trees like nature's own \n",
|
||||
" lacework.\"\"\",\n",
|
||||
" metadata={\"category\": \"snow\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"In the urban landscape, snow descended, transforming\n",
|
||||
" bustling streets into a winter wonderland, where the laughter of\n",
|
||||
" children echoed amidst the flurry of snowballs and the twinkle of\n",
|
||||
" holiday lights.\"\"\",\n",
|
||||
" metadata={\"category\": \"snow\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"Atop the rugged peaks, snow fell with an unyielding\n",
|
||||
" intensity, sculpting the landscape into a pristine alpine paradise,\n",
|
||||
" where the frozen crystals shimmered under the moonlight, casting a\n",
|
||||
" spell of enchantment over the wilderness below.\"\"\",\n",
|
||||
" metadata={\"category\": \"snow\"},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c738c3e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Update items in vector store\n",
|
||||
"\n",
|
||||
"To update an existing document in the vector store, use the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0aa8b71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"updated_document = Document(\n",
|
||||
" page_content=\"qux\", metadata={\"source\": \"https://another-example.com\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store.update_documents(document_id=\"1\", document=updated_document)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dcf1b905",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store\n",
|
||||
"\n",
|
||||
"To delete documents from the vector store, use the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ef61e188",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[\"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c3620501",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n",
|
||||
"\n",
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa0a16fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(query=\"trees in the snow\", k=1)\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ed9d733",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:\n",
|
||||
"\n",
|
||||
"- TODO: Edit and then run code cell to generate output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5efd2eaa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(query=\"trees in the snow\", k=1)\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fdaae211",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metadata filtering\n",
|
||||
"SingleStoreDB elevates search capabilities by enabling users to enhance and refine search results through prefiltering based on metadata fields. This functionality empowers developers and data analysts to fine-tune queries, ensuring that search results are precisely tailored to their requirements. By filtering search results using specific metadata attributes, users can narrow down the scope of their queries, focusing only on relevant data subsets. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c9e9989",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"trees branches\"\n",
|
||||
"docs = vector_store.similarity_search(\n",
|
||||
" query, filter={\"category\": \"snow\"}\n",
|
||||
") # Find documents that correspond to the query and has category \"snow\"\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c14b0bc3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Vector index\n",
|
||||
"Enhance your search efficiency with SingleStore DB version 8.5 or above by leveraging [ANN vector indexes](https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/). By setting `use_vector_index=True` during vector store object creation, you can activate this feature. Additionally, if your vectors differ in dimensionality from the default OpenAI embedding size of 1536, ensure to specify the `vector_size` parameter accordingly. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1f2a202",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Search strategies\n",
|
||||
"SingleStoreDB presents a diverse range of search strategies, each meticulously crafted to cater to specific use cases and user preferences. The default `VECTOR_ONLY` strategy utilizes vector operations such as `dot_product` or `euclidean_distance` to calculate similarity scores directly between vectors, while `TEXT_ONLY` employs Lucene-based full-text search, particularly advantageous for text-centric applications. For users seeking a balanced approach, `FILTER_BY_TEXT` first refines results based on text similarity before conducting vector comparisons, whereas `FILTER_BY_VECTOR` prioritizes vector similarity, filtering results before assessing text similarity for optimal matches. Notably, both `FILTER_BY_TEXT` and `FILTER_BY_VECTOR` necessitate a full-text index for operation. Additionally, `WEIGHTED_SUM` emerges as a sophisticated strategy, calculating the final similarity score by weighing vector and text similarities, albeit exclusively utilizing dot_product distance calculations and also requiring a full-text index. These versatile strategies empower users to fine-tune searches according to their unique needs, facilitating efficient and precise data retrieval and analysis. Moreover, SingleStoreDB's hybrid approaches, exemplified by `FILTER_BY_TEXT`, `FILTER_BY_VECTOR`, and `WEIGHTED_SUM` strategies, seamlessly blend vector and text-based searches to maximize efficiency and accuracy, ensuring users can fully leverage the platform's capabilities for a wide range of applications."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "15093016",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_singlestore.vectorstores import DistanceStrategy\n",
|
||||
"\n",
|
||||
"docsearch = SingleStoreVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" distance_strategy=DistanceStrategy.DOT_PRODUCT, # Use dot product for similarity search\n",
|
||||
" use_vector_index=True, # Use vector index for faster search\n",
|
||||
" use_full_text_search=True, # Use full text index\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vectorResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreVectorStore.SearchStrategy.VECTOR_ONLY,\n",
|
||||
" filter={\"category\": \"rain\"},\n",
|
||||
")\n",
|
||||
"print(vectorResults[0].page_content)\n",
|
||||
"\n",
|
||||
"textResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreVectorStore.SearchStrategy.TEXT_ONLY,\n",
|
||||
")\n",
|
||||
"print(textResults[0].page_content)\n",
|
||||
"\n",
|
||||
"filteredByTextResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreVectorStore.SearchStrategy.FILTER_BY_TEXT,\n",
|
||||
" filter_threshold=0.1,\n",
|
||||
")\n",
|
||||
"print(filteredByTextResults[0].page_content)\n",
|
||||
"\n",
|
||||
"filteredByVectorResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreVectorStore.SearchStrategy.FILTER_BY_VECTOR,\n",
|
||||
" filter_threshold=0.1,\n",
|
||||
")\n",
|
||||
"print(filteredByVectorResults[0].page_content)\n",
|
||||
"\n",
|
||||
"weightedSumResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreVectorStore.SearchStrategy.WEIGHTED_SUM,\n",
|
||||
" text_weight=0.2,\n",
|
||||
" vector_weight=0.8,\n",
|
||||
")\n",
|
||||
"print(weightedSumResults[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c235cdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f3460093",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"trees in the snow\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8bf60ab4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Multi-modal Example: Leveraging CLIP and OpenClip Embeddings\n",
|
||||
"\n",
|
||||
"In the realm of multi-modal data analysis, the integration of diverse information types like images and text has become increasingly crucial. One powerful tool facilitating such integration is [CLIP](https://openai.com/research/clip), a cutting-edge model capable of embedding both images and text into a shared semantic space. By doing so, CLIP enables the retrieval of relevant content across different modalities through similarity search.\n",
|
||||
"\n",
|
||||
"To illustrate, let's consider an application scenario where we aim to effectively analyze multi-modal data. In this example, we harness the capabilities of [OpenClip multimodal embeddings](/docs/integrations/text_embedding/open_clip), which leverage CLIP's framework. With OpenClip, we can seamlessly embed textual descriptions alongside corresponding images, enabling comprehensive analysis and retrieval tasks. Whether it's identifying visually similar images based on textual queries or finding relevant text passages associated with specific visual content, OpenClip empowers users to explore and extract insights from multi-modal data with remarkable efficiency and accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "710f6be9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain openai lanchain-singlestore langchain-experimental"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e26825f1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_experimental.open_clip import OpenCLIPEmbeddings\n",
|
||||
"from langchain_singlestore.vectorstores import SingleStoreVectorStore\n",
|
||||
"\n",
|
||||
"os.environ[\"SINGLESTOREDB_URL\"] = \"root:pass@localhost:3306/db\"\n",
|
||||
"\n",
|
||||
"TEST_IMAGES_DIR = \"../../modules/images\"\n",
|
||||
"\n",
|
||||
"docsearch = SingleStoreVectorStore(OpenCLIPEmbeddings())\n",
|
||||
"\n",
|
||||
"image_uris = sorted(\n",
|
||||
" [\n",
|
||||
" os.path.join(TEST_IMAGES_DIR, image_name)\n",
|
||||
" for image_name in os.listdir(TEST_IMAGES_DIR)\n",
|
||||
" if image_name.endswith(\".jpg\")\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Add images\n",
|
||||
"docsearch.add_images(uris=image_uris)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c87779e8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"\n",
|
||||
"- [Tutorials](/docs/tutorials/)\n",
|
||||
"- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n",
|
||||
"- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/retrieval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f717924",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all SingleStore Document Loader features and configurations head to the github page: [https://github.com/singlestore-labs/langchain-singlestore/](https://github.com/singlestore-labs/langchain-singlestore/)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,326 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b9582dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreDB\n",
|
||||
">[SingleStoreDB](https://singlestore.com/) is a robust, high-performance distributed SQL database solution designed to excel in both [cloud](https://www.singlestore.com/cloud/) and on-premises environments. Boasting a versatile feature set, it offers seamless deployment options while delivering unparalleled performance.\n",
|
||||
"\n",
|
||||
"A standout feature of SingleStoreDB is its advanced support for vector storage and operations, making it an ideal choice for applications requiring intricate AI capabilities such as text similarity matching. With built-in vector functions like [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html) and [euclidean_distance](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/euclidean_distance.html), SingleStoreDB empowers developers to implement sophisticated algorithms efficiently.\n",
|
||||
"\n",
|
||||
"For developers keen on leveraging vector data within SingleStoreDB, a comprehensive tutorial is available, guiding them through the intricacies of [working with vector data](https://docs.singlestore.com/managed-service/en/developer-resources/functional-extensions/working-with-vector-data.html). This tutorial delves into the Vector Store within SingleStoreDB, showcasing its ability to facilitate searches based on vector similarity. Leveraging vector indexes, queries can be executed with remarkable speed, enabling swift retrieval of relevant data.\n",
|
||||
"\n",
|
||||
"Moreover, SingleStoreDB's Vector Store seamlessly integrates with [full-text indexing based on Lucene](https://docs.singlestore.com/cloud/developer-resources/functional-extensions/working-with-full-text-search/), enabling powerful text similarity searches. Users can filter search results based on selected fields of document metadata objects, enhancing query precision.\n",
|
||||
"\n",
|
||||
"What sets SingleStoreDB apart is its ability to combine vector and full-text searches in various ways, offering flexibility and versatility. Whether prefiltering by text or vector similarity and selecting the most relevant data, or employing a weighted sum approach to compute a final similarity score, developers have multiple options at their disposal.\n",
|
||||
"\n",
|
||||
"In essence, SingleStoreDB provides a comprehensive solution for managing and querying vector data, offering unparalleled performance and flexibility for AI-driven applications.\n",
|
||||
"\n",
|
||||
"You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e4a61a4d",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Establishing a connection to the database is facilitated through the singlestoredb Python connector.\n",
|
||||
"# Please ensure that this connector is installed in your working environment.\n",
|
||||
"%pip install --upgrade --quiet singlestoredb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "39a0132a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
|
||||
"if \"OPENAI_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6104fde8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import SingleStoreDB\n",
|
||||
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b45113c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# loading docs\n",
|
||||
"# we will use some artificial data for this example\n",
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"In the parched desert, a sudden rainstorm brought relief,\n",
|
||||
" as the droplets danced upon the thirsty earth, rejuvenating the landscape\n",
|
||||
" with the sweet scent of petrichor.\"\"\",\n",
|
||||
" metadata={\"category\": \"rain\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"Amidst the bustling cityscape, the rain fell relentlessly,\n",
|
||||
" creating a symphony of pitter-patter on the pavement, while umbrellas\n",
|
||||
" bloomed like colorful flowers in a sea of gray.\"\"\",\n",
|
||||
" metadata={\"category\": \"rain\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"High in the mountains, the rain transformed into a delicate\n",
|
||||
" mist, enveloping the peaks in a mystical veil, where each droplet seemed to\n",
|
||||
" whisper secrets to the ancient rocks below.\"\"\",\n",
|
||||
" metadata={\"category\": \"rain\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"Blanketing the countryside in a soft, pristine layer, the\n",
|
||||
" snowfall painted a serene tableau, muffling the world in a tranquil hush\n",
|
||||
" as delicate flakes settled upon the branches of trees like nature's own \n",
|
||||
" lacework.\"\"\",\n",
|
||||
" metadata={\"category\": \"snow\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"In the urban landscape, snow descended, transforming\n",
|
||||
" bustling streets into a winter wonderland, where the laughter of\n",
|
||||
" children echoed amidst the flurry of snowballs and the twinkle of\n",
|
||||
" holiday lights.\"\"\",\n",
|
||||
" metadata={\"category\": \"snow\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"\"\"Atop the rugged peaks, snow fell with an unyielding\n",
|
||||
" intensity, sculpting the landscape into a pristine alpine paradise,\n",
|
||||
" where the frozen crystals shimmered under the moonlight, casting a\n",
|
||||
" spell of enchantment over the wilderness below.\"\"\",\n",
|
||||
" metadata={\"category\": \"snow\"},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "535b2687",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are several ways to establish a [connection](https://singlestoredb-python.labs.singlestore.com/generated/singlestoredb.connect.html) to the database. You can either set up environment variables or pass named parameters to the `SingleStoreDB constructor`. Alternatively, you may provide these parameters to the `from_documents` and `from_texts` methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d0b316bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup connection url as environment variable\n",
|
||||
"os.environ[\"SINGLESTOREDB_URL\"] = \"root:pass@localhost:3306/db\"\n",
|
||||
"\n",
|
||||
"# Load documents to the store\n",
|
||||
"docsearch = SingleStoreDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" table_name=\"notebook\", # use table with a custom name\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0eaa4297",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"trees in the snow\"\n",
|
||||
"docs = docsearch.similarity_search(query) # Find documents that correspond to the query\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "51b2b552",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"SingleStoreDB elevates search capabilities by enabling users to enhance and refine search results through prefiltering based on metadata fields. This functionality empowers developers and data analysts to fine-tune queries, ensuring that search results are precisely tailored to their requirements. By filtering search results using specific metadata attributes, users can narrow down the scope of their queries, focusing only on relevant data subsets. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "389bf801",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"trees branches\"\n",
|
||||
"docs = docsearch.similarity_search(\n",
|
||||
" query, filter={\"category\": \"snow\"}\n",
|
||||
") # Find documents that correspond to the query and has category \"snow\"\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "035cba66",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Enhance your search efficiency with SingleStore DB version 8.5 or above by leveraging [ANN vector indexes](https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/). By setting `use_vector_index=True` during vector store object creation, you can activate this feature. Additionally, if your vectors differ in dimensionality from the default OpenAI embedding size of 1536, ensure to specify the `vector_size` parameter accordingly. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5308afe5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"SingleStoreDB presents a diverse range of search strategies, each meticulously crafted to cater to specific use cases and user preferences. The default `VECTOR_ONLY` strategy utilizes vector operations such as `dot_product` or `euclidean_distance` to calculate similarity scores directly between vectors, while `TEXT_ONLY` employs Lucene-based full-text search, particularly advantageous for text-centric applications. For users seeking a balanced approach, `FILTER_BY_TEXT` first refines results based on text similarity before conducting vector comparisons, whereas `FILTER_BY_VECTOR` prioritizes vector similarity, filtering results before assessing text similarity for optimal matches. Notably, both `FILTER_BY_TEXT` and `FILTER_BY_VECTOR` necessitate a full-text index for operation. Additionally, `WEIGHTED_SUM` emerges as a sophisticated strategy, calculating the final similarity score by weighing vector and text similarities, albeit exclusively utilizing dot_product distance calculations and also requiring a full-text index. These versatile strategies empower users to fine-tune searches according to their unique needs, facilitating efficient and precise data retrieval and analysis. Moreover, SingleStoreDB's hybrid approaches, exemplified by `FILTER_BY_TEXT`, `FILTER_BY_VECTOR`, and `WEIGHTED_SUM` strategies, seamlessly blend vector and text-based searches to maximize efficiency and accuracy, ensuring users can fully leverage the platform's capabilities for a wide range of applications."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17db0116",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = SingleStoreDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" distance_strategy=DistanceStrategy.DOT_PRODUCT, # Use dot product for similarity search\n",
|
||||
" use_vector_index=True, # Use vector index for faster search\n",
|
||||
" use_full_text_search=True, # Use full text index\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vectorResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreDB.SearchStrategy.VECTOR_ONLY,\n",
|
||||
" filter={\"category\": \"rain\"},\n",
|
||||
")\n",
|
||||
"print(vectorResults[0].page_content)\n",
|
||||
"\n",
|
||||
"textResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreDB.SearchStrategy.TEXT_ONLY,\n",
|
||||
")\n",
|
||||
"print(textResults[0].page_content)\n",
|
||||
"\n",
|
||||
"filteredByTextResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,\n",
|
||||
" filter_threshold=0.1,\n",
|
||||
")\n",
|
||||
"print(filteredByTextResults[0].page_content)\n",
|
||||
"\n",
|
||||
"filteredByVectorResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,\n",
|
||||
" filter_threshold=0.1,\n",
|
||||
")\n",
|
||||
"print(filteredByVectorResults[0].page_content)\n",
|
||||
"\n",
|
||||
"weightedSumResults = docsearch.similarity_search(\n",
|
||||
" \"rainstorm in parched desert, rain\",\n",
|
||||
" k=1,\n",
|
||||
" search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,\n",
|
||||
" text_weight=0.2,\n",
|
||||
" vector_weight=0.8,\n",
|
||||
")\n",
|
||||
"print(weightedSumResults[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86efff90",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Multi-modal Example: Leveraging CLIP and OpenClip Embeddings\n",
|
||||
"\n",
|
||||
"In the realm of multi-modal data analysis, the integration of diverse information types like images and text has become increasingly crucial. One powerful tool facilitating such integration is [CLIP](https://openai.com/research/clip), a cutting-edge model capable of embedding both images and text into a shared semantic space. By doing so, CLIP enables the retrieval of relevant content across different modalities through similarity search.\n",
|
||||
"\n",
|
||||
"To illustrate, let's consider an application scenario where we aim to effectively analyze multi-modal data. In this example, we harness the capabilities of [OpenClip multimodal embeddings](/docs/integrations/text_embedding/open_clip), which leverage CLIP's framework. With OpenClip, we can seamlessly embed textual descriptions alongside corresponding images, enabling comprehensive analysis and retrieval tasks. Whether it's identifying visually similar images based on textual queries or finding relevant text passages associated with specific visual content, OpenClip empowers users to explore and extract insights from multi-modal data with remarkable efficiency and accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c0bce88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain openai singlestoredb langchain-experimental # (newest versions required for multi-modal)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21a8c25c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.vectorstores import SingleStoreDB\n",
|
||||
"from langchain_experimental.open_clip import OpenCLIPEmbeddings\n",
|
||||
"\n",
|
||||
"os.environ[\"SINGLESTOREDB_URL\"] = \"root:pass@localhost:3306/db\"\n",
|
||||
"\n",
|
||||
"TEST_IMAGES_DIR = \"../../modules/images\"\n",
|
||||
"\n",
|
||||
"docsearch = SingleStoreDB(OpenCLIPEmbeddings())\n",
|
||||
"\n",
|
||||
"image_uris = sorted(\n",
|
||||
" [\n",
|
||||
" os.path.join(TEST_IMAGES_DIR, image_name)\n",
|
||||
" for image_name in os.listdir(TEST_IMAGES_DIR)\n",
|
||||
" if image_name.endswith(\".jpg\")\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Add images\n",
|
||||
"docsearch.add_images(uris=image_uris)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -89,7 +89,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "39f3ce3e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -118,15 +118,13 @@
|
||||
" language: str = Field(description=\"The language the text is written in\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# LLM\n",
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4o-mini\").with_structured_output(\n",
|
||||
" Classification\n",
|
||||
")"
|
||||
"# Structured LLM\n",
|
||||
"structured_llm = llm.with_structured_output(Classification)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"id": "5509b6a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -144,7 +142,7 @@
|
||||
"source": [
|
||||
"inp = \"Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!\"\n",
|
||||
"prompt = tagging_prompt.invoke({\"input\": inp})\n",
|
||||
"response = llm.invoke(prompt)\n",
|
||||
"response = structured_llm.invoke(prompt)\n",
|
||||
"\n",
|
||||
"response"
|
||||
]
|
||||
@ -159,7 +157,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "9154474c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -177,7 +175,7 @@
|
||||
"source": [
|
||||
"inp = \"Estoy muy enojado con vos! Te voy a dar tu merecido!\"\n",
|
||||
"prompt = tagging_prompt.invoke({\"input\": inp})\n",
|
||||
"response = llm.invoke(prompt)\n",
|
||||
"response = structured_llm.invoke(prompt)\n",
|
||||
"\n",
|
||||
"response.model_dump()"
|
||||
]
|
||||
|
@ -145,15 +145,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "a5e490f6-35ad-455e-8ae4-2bae021583ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"# Define a custom prompt to provide instructions and any additional context.\n",
|
||||
"# 1) You can add examples into the prompt template to improve extraction quality\n",
|
||||
|
@ -135,6 +135,13 @@ ${llmVarName} = AzureChatOpenAI(
|
||||
apiKeyName: "AZURE_OPENAI_API_KEY",
|
||||
packageName: "langchain[openai]",
|
||||
},
|
||||
{
|
||||
value: "google_genai",
|
||||
label: "Google Gemini",
|
||||
model: "gemini-2.0-flash",
|
||||
apiKeyName: "GOOGLE_API_KEY",
|
||||
packageName: "langchain[google-genai]",
|
||||
},
|
||||
{
|
||||
value: "google_vertexai",
|
||||
label: "Google Vertex",
|
||||
|
@ -366,6 +366,12 @@ const FEATURE_TABLES = {
|
||||
package: "langchain-openai",
|
||||
apiLink: "https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html"
|
||||
},
|
||||
{
|
||||
name: "Google Gemini",
|
||||
link: "google-generative-ai",
|
||||
package: "langchain-google-genai",
|
||||
apiLink: "https://python.langchain.com/api_reference/google_genai/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html"
|
||||
},
|
||||
{
|
||||
name: "Together",
|
||||
link: "together",
|
||||
|
1
docs/static/js/google_analytics.js
vendored
1
docs/static/js/google_analytics.js
vendored
@ -3,3 +3,4 @@ function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-9B66JQQH2F');
|
||||
gtag('config', 'G-47WX3HKKY2');
|
||||
|
@ -134,6 +134,18 @@
|
||||
"source": "/docs/integrations/retrievers/weaviate-hybrid(/?)",
|
||||
"destination": "/docs/integrations/vectorstores/weaviate/#search-mechanism"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/vectorstores/singlestoredb(/?)",
|
||||
"destination": "https://python.langchain.com/v0.2/docs/integrations/vectorstores/singlestoredb/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/providers/singlestoredb(/?)",
|
||||
"destination": "/docs/integrations/providers/singlestore/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/retrievers/singlestoredb(/?)",
|
||||
"destination": "https://python.langchain.com/v0.2/docs/integrations/retrievers/singlestoredb/"
|
||||
},
|
||||
{
|
||||
"source": "/api_reference/mongodb/:path(.*/?)*",
|
||||
"destination": "https://langchain-mongodb.readthedocs.io/en/latest/langchain_mongodb/api_docs.html"
|
||||
|
@ -1,2 +1,4 @@
|
||||
httpx
|
||||
grpcio
|
||||
aiohttp<3.11
|
||||
protobuf<3.21
|
||||
|
@ -2489,6 +2489,18 @@ class OpenSearchSemanticCache(BaseCache):
|
||||
del self._cache_dict[index_name]
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.22",
|
||||
message=(
|
||||
"This class is pending deprecation and may be removed in a future version. "
|
||||
"You can swap to using the `SingleStoreSemanticCache` "
|
||||
"implementation in `langchain_singlestore`. "
|
||||
"See <https://github.com/singlestore-labs/langchain-singlestore> for details "
|
||||
" about the new implementation."
|
||||
),
|
||||
alternative="from langchain_singlestore import SingleStoreSemanticCache",
|
||||
pending=True,
|
||||
)
|
||||
class SingleStoreDBSemanticCache(BaseCache):
|
||||
"""Cache that uses SingleStore DB as a backend"""
|
||||
|
||||
|
@ -10,6 +10,38 @@ from langchain_core.messages import AIMessage
|
||||
from langchain_core.outputs import ChatGeneration, LLMResult
|
||||
|
||||
MODEL_COST_PER_1K_TOKENS = {
|
||||
# GPT-4.1 input
|
||||
"gpt-4.1": 0.002,
|
||||
"gpt-4.1-2025-04-14": 0.002,
|
||||
"gpt-4.1-cached": 0.0005,
|
||||
"gpt-4.1-2025-04-14-cached": 0.0005,
|
||||
# GPT-4.1 output
|
||||
"gpt-4.1-completion": 0.008,
|
||||
"gpt-4.1-2025-04-14-completion": 0.008,
|
||||
# GPT-4.1-mini input
|
||||
"gpt-4.1-mini": 0.0004,
|
||||
"gpt-4.1-mini-2025-04-14": 0.0004,
|
||||
"gpt-4.1-mini-cached": 0.0001,
|
||||
"gpt-4.1-mini-2025-04-14-cached": 0.0001,
|
||||
# GPT-4.1-mini output
|
||||
"gpt-4.1-mini-completion": 0.0016,
|
||||
"gpt-4.1-mini-2025-04-14-completion": 0.0016,
|
||||
# GPT-4.1-nano input
|
||||
"gpt-4.1-nano": 0.0001,
|
||||
"gpt-4.1-nano-2025-04-14": 0.0001,
|
||||
"gpt-4.1-nano-cached": 0.000025,
|
||||
"gpt-4.1-nano-2025-04-14-cached": 0.000025,
|
||||
# GPT-4.1-nano output
|
||||
"gpt-4.1-nano-completion": 0.0004,
|
||||
"gpt-4.1-nano-2025-04-14-completion": 0.0004,
|
||||
# GPT-4.5-preview input
|
||||
"gpt-4.5-preview": 0.075,
|
||||
"gpt-4.5-preview-2025-02-27": 0.075,
|
||||
"gpt-4.5-preview-cached": 0.0375,
|
||||
"gpt-4.5-preview-2025-02-27-cached": 0.0375,
|
||||
# GPT-4.5-preview output
|
||||
"gpt-4.5-preview-completion": 0.15,
|
||||
"gpt-4.5-preview-2025-02-27-completion": 0.15,
|
||||
# OpenAI o1 input
|
||||
"o1": 0.015,
|
||||
"o1-2024-12-17": 0.015,
|
||||
@ -18,6 +50,28 @@ MODEL_COST_PER_1K_TOKENS = {
|
||||
# OpenAI o1 output
|
||||
"o1-completion": 0.06,
|
||||
"o1-2024-12-17-completion": 0.06,
|
||||
# OpenAI o1-pro input
|
||||
"o1-pro": 0.15,
|
||||
"o1-pro-2025-03-19": 0.15,
|
||||
# OpenAI o1-pro output
|
||||
"o1-pro-completion": 0.6,
|
||||
"o1-pro-2025-03-19-completion": 0.6,
|
||||
# OpenAI o3 input
|
||||
"o3": 0.01,
|
||||
"o3-2025-04-16": 0.01,
|
||||
"o3-cached": 0.0025,
|
||||
"o3-2025-04-16-cached": 0.0025,
|
||||
# OpenAI o3 output
|
||||
"o3-completion": 0.04,
|
||||
"o3-2025-04-16-completion": 0.04,
|
||||
# OpenAI o4-mini input
|
||||
"o4-mini": 0.0011,
|
||||
"o4-mini-2025-04-16": 0.0011,
|
||||
"o4-mini-cached": 0.000275,
|
||||
"o4-mini-2025-04-16-cached": 0.000275,
|
||||
# OpenAI o4-mini output
|
||||
"o4-mini-completion": 0.0044,
|
||||
"o4-mini-2025-04-16-completion": 0.0044,
|
||||
# OpenAI o3-mini input
|
||||
"o3-mini": 0.0011,
|
||||
"o3-mini-2025-01-31": 0.0011,
|
||||
@ -26,6 +80,14 @@ MODEL_COST_PER_1K_TOKENS = {
|
||||
# OpenAI o3-mini output
|
||||
"o3-mini-completion": 0.0044,
|
||||
"o3-mini-2025-01-31-completion": 0.0044,
|
||||
# OpenAI o1-mini input (updated pricing)
|
||||
"o1-mini": 0.0011,
|
||||
"o1-mini-cached": 0.00055,
|
||||
"o1-mini-2024-09-12": 0.0011,
|
||||
"o1-mini-2024-09-12-cached": 0.00055,
|
||||
# OpenAI o1-mini output (updated pricing)
|
||||
"o1-mini-completion": 0.0044,
|
||||
"o1-mini-2024-09-12-completion": 0.0044,
|
||||
# OpenAI o1-preview input
|
||||
"o1-preview": 0.015,
|
||||
"o1-preview-cached": 0.0075,
|
||||
@ -34,22 +96,6 @@ MODEL_COST_PER_1K_TOKENS = {
|
||||
# OpenAI o1-preview output
|
||||
"o1-preview-completion": 0.06,
|
||||
"o1-preview-2024-09-12-completion": 0.06,
|
||||
# OpenAI o1-mini input
|
||||
"o1-mini": 0.003,
|
||||
"o1-mini-cached": 0.0015,
|
||||
"o1-mini-2024-09-12": 0.003,
|
||||
"o1-mini-2024-09-12-cached": 0.0015,
|
||||
# OpenAI o1-mini output
|
||||
"o1-mini-completion": 0.012,
|
||||
"o1-mini-2024-09-12-completion": 0.012,
|
||||
# GPT-4o-mini input
|
||||
"gpt-4o-mini": 0.00015,
|
||||
"gpt-4o-mini-cached": 0.000075,
|
||||
"gpt-4o-mini-2024-07-18": 0.00015,
|
||||
"gpt-4o-mini-2024-07-18-cached": 0.000075,
|
||||
# GPT-4o-mini output
|
||||
"gpt-4o-mini-completion": 0.0006,
|
||||
"gpt-4o-mini-2024-07-18-completion": 0.0006,
|
||||
# GPT-4o input
|
||||
"gpt-4o": 0.0025,
|
||||
"gpt-4o-cached": 0.00125,
|
||||
@ -63,6 +109,65 @@ MODEL_COST_PER_1K_TOKENS = {
|
||||
"gpt-4o-2024-05-13-completion": 0.015,
|
||||
"gpt-4o-2024-08-06-completion": 0.01,
|
||||
"gpt-4o-2024-11-20-completion": 0.01,
|
||||
# GPT-4o-audio-preview input
|
||||
"gpt-4o-audio-preview": 0.0025,
|
||||
"gpt-4o-audio-preview-2024-12-17": 0.0025,
|
||||
"gpt-4o-audio-preview-2024-10-01": 0.0025,
|
||||
# GPT-4o-audio-preview output
|
||||
"gpt-4o-audio-preview-completion": 0.01,
|
||||
"gpt-4o-audio-preview-2024-12-17-completion": 0.01,
|
||||
"gpt-4o-audio-preview-2024-10-01-completion": 0.01,
|
||||
# GPT-4o-realtime-preview input
|
||||
"gpt-4o-realtime-preview": 0.005,
|
||||
"gpt-4o-realtime-preview-2024-12-17": 0.005,
|
||||
"gpt-4o-realtime-preview-2024-10-01": 0.005,
|
||||
"gpt-4o-realtime-preview-cached": 0.0025,
|
||||
"gpt-4o-realtime-preview-2024-12-17-cached": 0.0025,
|
||||
"gpt-4o-realtime-preview-2024-10-01-cached": 0.0025,
|
||||
# GPT-4o-realtime-preview output
|
||||
"gpt-4o-realtime-preview-completion": 0.02,
|
||||
"gpt-4o-realtime-preview-2024-12-17-completion": 0.02,
|
||||
"gpt-4o-realtime-preview-2024-10-01-completion": 0.02,
|
||||
# GPT-4o-mini input
|
||||
"gpt-4o-mini": 0.00015,
|
||||
"gpt-4o-mini-cached": 0.000075,
|
||||
"gpt-4o-mini-2024-07-18": 0.00015,
|
||||
"gpt-4o-mini-2024-07-18-cached": 0.000075,
|
||||
# GPT-4o-mini output
|
||||
"gpt-4o-mini-completion": 0.0006,
|
||||
"gpt-4o-mini-2024-07-18-completion": 0.0006,
|
||||
# GPT-4o-mini-audio-preview input
|
||||
"gpt-4o-mini-audio-preview": 0.00015,
|
||||
"gpt-4o-mini-audio-preview-2024-12-17": 0.00015,
|
||||
# GPT-4o-mini-audio-preview output
|
||||
"gpt-4o-mini-audio-preview-completion": 0.0006,
|
||||
"gpt-4o-mini-audio-preview-2024-12-17-completion": 0.0006,
|
||||
# GPT-4o-mini-realtime-preview input
|
||||
"gpt-4o-mini-realtime-preview": 0.0006,
|
||||
"gpt-4o-mini-realtime-preview-2024-12-17": 0.0006,
|
||||
"gpt-4o-mini-realtime-preview-cached": 0.0003,
|
||||
"gpt-4o-mini-realtime-preview-2024-12-17-cached": 0.0003,
|
||||
# GPT-4o-mini-realtime-preview output
|
||||
"gpt-4o-mini-realtime-preview-completion": 0.0024,
|
||||
"gpt-4o-mini-realtime-preview-2024-12-17-completion": 0.0024,
|
||||
# GPT-4o-mini-search-preview input
|
||||
"gpt-4o-mini-search-preview": 0.00015,
|
||||
"gpt-4o-mini-search-preview-2025-03-11": 0.00015,
|
||||
# GPT-4o-mini-search-preview output
|
||||
"gpt-4o-mini-search-preview-completion": 0.0006,
|
||||
"gpt-4o-mini-search-preview-2025-03-11-completion": 0.0006,
|
||||
# GPT-4o-search-preview input
|
||||
"gpt-4o-search-preview": 0.0025,
|
||||
"gpt-4o-search-preview-2025-03-11": 0.0025,
|
||||
# GPT-4o-search-preview output
|
||||
"gpt-4o-search-preview-completion": 0.01,
|
||||
"gpt-4o-search-preview-2025-03-11-completion": 0.01,
|
||||
# Computer-use-preview input
|
||||
"computer-use-preview": 0.003,
|
||||
"computer-use-preview-2025-03-11": 0.003,
|
||||
# Computer-use-preview output
|
||||
"computer-use-preview-completion": 0.012,
|
||||
"computer-use-preview-2025-03-11-completion": 0.012,
|
||||
# GPT-4 input
|
||||
"gpt-4": 0.03,
|
||||
"gpt-4-0314": 0.03,
|
||||
@ -219,6 +324,7 @@ def standardize_model_name(
|
||||
or model_name.startswith("gpt-35")
|
||||
or model_name.startswith("o1-")
|
||||
or model_name.startswith("o3-")
|
||||
or model_name.startswith("o4-")
|
||||
or ("finetuned" in model_name and "legacy" not in model_name)
|
||||
):
|
||||
return model_name + "-completion"
|
||||
@ -226,8 +332,10 @@ def standardize_model_name(
|
||||
token_type == TokenType.PROMPT_CACHED
|
||||
and (
|
||||
model_name.startswith("gpt-4o")
|
||||
or model_name.startswith("gpt-4.1")
|
||||
or model_name.startswith("o1")
|
||||
or model_name.startswith("o3")
|
||||
or model_name.startswith("o4")
|
||||
)
|
||||
and not (model_name.startswith("gpt-4o-2024-05-13"))
|
||||
):
|
||||
|
@ -6,6 +6,7 @@ from typing import (
|
||||
List,
|
||||
)
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
@ -16,6 +17,18 @@ from langchain_core.messages import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.22",
|
||||
message=(
|
||||
"This class is pending deprecation and may be removed in a future version. "
|
||||
"You can swap to using the `SingleStoreChatMessageHistory` "
|
||||
"implementation in `langchain_singlestore`. "
|
||||
"See <https://github.com/singlestore-labs/langchain-singlestore> for details "
|
||||
" about the new implementation."
|
||||
),
|
||||
alternative="from langchain_singlestore import SingleStoreChatMessageHistory",
|
||||
pending=True,
|
||||
)
|
||||
class SingleStoreDBChatMessageHistory(BaseChatMessageHistory):
|
||||
"""Chat message history stored in a SingleStoreDB database."""
|
||||
|
||||
|
@ -452,6 +452,7 @@ class ChatLiteLLM(BaseChatModel):
|
||||
params = {**params, **kwargs, "stream": True}
|
||||
|
||||
default_chunk_class = AIMessageChunk
|
||||
added_model_name = False
|
||||
for chunk in self.completion_with_retry(
|
||||
messages=message_dicts, run_manager=run_manager, **params
|
||||
):
|
||||
@ -460,7 +461,15 @@ class ChatLiteLLM(BaseChatModel):
|
||||
if len(chunk["choices"]) == 0:
|
||||
continue
|
||||
delta = chunk["choices"][0]["delta"]
|
||||
usage = chunk.get("usage", {})
|
||||
chunk = _convert_delta_to_message_chunk(delta, default_chunk_class)
|
||||
if isinstance(chunk, AIMessageChunk):
|
||||
if not added_model_name:
|
||||
chunk.response_metadata = {
|
||||
"model_name": self.model_name or self.model
|
||||
}
|
||||
added_model_name = True
|
||||
chunk.usage_metadata = _create_usage_metadata(usage)
|
||||
default_chunk_class = chunk.__class__
|
||||
cg_chunk = ChatGenerationChunk(message=chunk)
|
||||
if run_manager:
|
||||
@ -478,6 +487,7 @@ class ChatLiteLLM(BaseChatModel):
|
||||
params = {**params, **kwargs, "stream": True}
|
||||
|
||||
default_chunk_class = AIMessageChunk
|
||||
added_model_name = False
|
||||
async for chunk in await acompletion_with_retry(
|
||||
self, messages=message_dicts, run_manager=run_manager, **params
|
||||
):
|
||||
@ -486,7 +496,15 @@ class ChatLiteLLM(BaseChatModel):
|
||||
if len(chunk["choices"]) == 0:
|
||||
continue
|
||||
delta = chunk["choices"][0]["delta"]
|
||||
usage = chunk.get("usage", {})
|
||||
chunk = _convert_delta_to_message_chunk(delta, default_chunk_class)
|
||||
if isinstance(chunk, AIMessageChunk):
|
||||
if not added_model_name:
|
||||
chunk.response_metadata = {
|
||||
"model_name": self.model_name or self.model
|
||||
}
|
||||
added_model_name = True
|
||||
chunk.usage_metadata = _create_usage_metadata(usage)
|
||||
default_chunk_class = chunk.__class__
|
||||
cg_chunk = ChatGenerationChunk(message=chunk)
|
||||
if run_manager:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, Optional, Sequence
|
||||
from typing import Any, Dict, Iterator, Optional, Sequence
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -8,7 +8,7 @@ from langchain_community.document_loaders.base import BaseLoader
|
||||
class BrowserbaseLoader(BaseLoader):
|
||||
"""Load pre-rendered web pages using a headless browser hosted on Browserbase.
|
||||
|
||||
Depends on `browserbase` package.
|
||||
Depends on `browserbase` and `playwright` packages.
|
||||
Get your API key from https://browserbase.com
|
||||
"""
|
||||
|
||||
@ -24,6 +24,7 @@ class BrowserbaseLoader(BaseLoader):
|
||||
self.urls = urls
|
||||
self.text_content = text_content
|
||||
self.session_id = session_id
|
||||
self.project_id = project_id
|
||||
self.proxy = proxy
|
||||
|
||||
try:
|
||||
@ -32,22 +33,57 @@ class BrowserbaseLoader(BaseLoader):
|
||||
raise ImportError(
|
||||
"You must run "
|
||||
"`pip install --upgrade "
|
||||
"browserbase` "
|
||||
"browserbase playwright` "
|
||||
"to use the Browserbase loader."
|
||||
)
|
||||
|
||||
self.browserbase = Browserbase(api_key, project_id)
|
||||
self.browserbase = Browserbase(api_key=api_key)
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load pages from URLs"""
|
||||
pages = self.browserbase.load_urls(
|
||||
self.urls, self.text_content, self.session_id, self.proxy
|
||||
)
|
||||
|
||||
for i, page in enumerate(pages):
|
||||
yield Document(
|
||||
page_content=page,
|
||||
metadata={
|
||||
"url": self.urls[i],
|
||||
},
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"playwright is required for BrowserbaseLoader. "
|
||||
"Please run `pip install --upgrade playwright`."
|
||||
)
|
||||
|
||||
for url in self.urls:
|
||||
with sync_playwright() as playwright:
|
||||
# Create or use existing session
|
||||
if self.session_id:
|
||||
session = self.browserbase.sessions.retrieve(id=self.session_id)
|
||||
else:
|
||||
if not self.project_id:
|
||||
raise ValueError("project_id is required to create a session")
|
||||
session_params: Dict[str, Any] = {"project_id": self.project_id}
|
||||
if self.proxy is not None:
|
||||
session_params["proxy"] = bool(self.proxy)
|
||||
session = self.browserbase.sessions.create(**session_params)
|
||||
|
||||
# Connect to the remote session
|
||||
browser = playwright.chromium.connect_over_cdp(session.connect_url)
|
||||
context = browser.contexts[0]
|
||||
page = context.pages[0]
|
||||
|
||||
# Navigate to URL and get content
|
||||
page.goto(url)
|
||||
# Get content based on the text_content flag
|
||||
if self.text_content:
|
||||
page_text = page.inner_text("body")
|
||||
content = str(page_text)
|
||||
else:
|
||||
page_html = page.content()
|
||||
content = str(page_html)
|
||||
|
||||
# Close browser
|
||||
page.close()
|
||||
browser.close()
|
||||
|
||||
yield Document(
|
||||
page_content=content,
|
||||
metadata={
|
||||
"url": url,
|
||||
},
|
||||
)
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -31,6 +31,7 @@ class OracleAutonomousDatabaseLoader(BaseLoader):
|
||||
wallet_password: Optional[str] = None,
|
||||
connection_string: Optional[str] = None,
|
||||
metadata: Optional[List[str]] = None,
|
||||
parameters: Optional[Union[list, tuple, dict]] = None,
|
||||
):
|
||||
"""
|
||||
init method
|
||||
@ -44,6 +45,7 @@ class OracleAutonomousDatabaseLoader(BaseLoader):
|
||||
:param wallet_password: password of wallet
|
||||
:param connection_string: connection string to connect to adb instance
|
||||
:param metadata: metadata used in document
|
||||
:param parameters: bind variable to use in query
|
||||
"""
|
||||
# Mandatory required arguments.
|
||||
self.query = query
|
||||
@ -67,6 +69,9 @@ class OracleAutonomousDatabaseLoader(BaseLoader):
|
||||
# metadata column
|
||||
self.metadata = metadata
|
||||
|
||||
# parameters, e.g bind variable
|
||||
self.parameters = parameters
|
||||
|
||||
# dsn
|
||||
self.dsn: Optional[str]
|
||||
self._set_dsn()
|
||||
@ -96,7 +101,10 @@ class OracleAutonomousDatabaseLoader(BaseLoader):
|
||||
cursor = connection.cursor()
|
||||
if self.schema:
|
||||
cursor.execute(f"alter session set current_schema={self.schema}")
|
||||
cursor.execute(self.query)
|
||||
if self.parameters:
|
||||
cursor.execute(self.query, self.parameters)
|
||||
else:
|
||||
cursor.execute(self.query)
|
||||
columns = [col[0] for col in cursor.description]
|
||||
data = cursor.fetchall()
|
||||
data = [
|
||||
|
@ -185,7 +185,7 @@ class SitemapLoader(WebBaseLoader):
|
||||
|
||||
els.append(
|
||||
{
|
||||
tag: prop.text
|
||||
tag: prop.text.strip()
|
||||
for tag in ["loc", "lastmod", "changefreq", "priority"]
|
||||
if (prop := url.find(tag))
|
||||
}
|
||||
|
@ -668,7 +668,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
# NOTE: to keep things simple, we assume the list may contain texts longer
|
||||
# than the maximum context and use length-safe embedding function.
|
||||
engine = cast(str, self.deployment)
|
||||
return self._get_len_safe_embeddings(texts, engine=engine)
|
||||
return self._get_len_safe_embeddings(
|
||||
texts, engine=engine, chunk_size=chunk_size
|
||||
)
|
||||
|
||||
async def aembed_documents(
|
||||
self, texts: List[str], chunk_size: Optional[int] = 0
|
||||
@ -686,7 +688,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
# NOTE: to keep things simple, we assume the list may contain texts longer
|
||||
# than the maximum context and use length-safe embedding function.
|
||||
engine = cast(str, self.deployment)
|
||||
return await self._aget_len_safe_embeddings(texts, engine=engine)
|
||||
return self._get_len_safe_embeddings(
|
||||
texts, engine=engine, chunk_size=chunk_size
|
||||
)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Call out to OpenAI's embedding endpoint for embedding query text.
|
||||
|
@ -1,6 +1,7 @@
|
||||
# HANA Translator/query constructor
|
||||
from typing import Dict, Tuple, Union
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.structured_query import (
|
||||
Comparator,
|
||||
Comparison,
|
||||
@ -11,8 +12,25 @@ from langchain_core.structured_query import (
|
||||
)
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.23",
|
||||
removal="1.0",
|
||||
message=(
|
||||
"This class is deprecated and will be removed in a future version. "
|
||||
"Please use query_constructors.HanaTranslator from the "
|
||||
"langchain_hana package instead. "
|
||||
"See https://github.com/SAP/langchain-integration-for-sap-hana-cloud "
|
||||
"for details."
|
||||
),
|
||||
alternative="from langchain_hana.query_constructors import HanaTranslator;",
|
||||
pending=False,
|
||||
)
|
||||
class HanaTranslator(Visitor):
|
||||
"""
|
||||
**DEPRECATED**: This class is deprecated and will no longer be maintained.
|
||||
Please use query_constructors.HanaTranslator from the langchain_hana
|
||||
package instead. It offers an improved implementation and full support.
|
||||
|
||||
Translate internal query language elements to valid filters params for
|
||||
HANA vectorstore.
|
||||
"""
|
||||
|
@ -19,6 +19,7 @@ from typing import (
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.runnables.config import run_in_executor
|
||||
@ -66,9 +67,25 @@ default_vector_column: str = "VEC_VECTOR"
|
||||
default_vector_column_length: int = -1 # -1 means dynamic length
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.23",
|
||||
removal="1.0",
|
||||
message=(
|
||||
"This class is deprecated and will be removed in a future version. "
|
||||
"Please use HanaDB from the langchain_hana package instead. "
|
||||
"See https://github.com/SAP/langchain-integration-for-sap-hana-cloud "
|
||||
"for details."
|
||||
),
|
||||
alternative="from langchain_hana import HanaDB;",
|
||||
pending=False,
|
||||
)
|
||||
class HanaDB(VectorStore):
|
||||
"""SAP HANA Cloud Vector Engine
|
||||
|
||||
**DEPRECATED**: This class is deprecated and will no longer be maintained.
|
||||
Please use HanaDB from the langchain_hana package instead. It offers an
|
||||
improved implementation and full support.
|
||||
|
||||
The prerequisite for using this class is the installation of the ``hdbcli``
|
||||
Python package.
|
||||
|
||||
|
@ -480,6 +480,8 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||
index_name = kwargs.get("index_name", self.index_name)
|
||||
if self.index_name is None:
|
||||
raise ValueError("index_name must be provided.")
|
||||
text_field = kwargs.get("text_field", "text")
|
||||
dim = len(embeddings[0])
|
||||
engine = kwargs.get("engine", self.engine)
|
||||
@ -522,6 +524,8 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||
index_name = kwargs.get("index_name", self.index_name)
|
||||
if self.index_name is None:
|
||||
raise ValueError("index_name must be provided.")
|
||||
text_field = kwargs.get("text_field", "text")
|
||||
dim = len(embeddings[0])
|
||||
engine = kwargs.get("engine", self.engine)
|
||||
@ -735,12 +739,14 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
||||
|
||||
body = []
|
||||
|
||||
index_name = kwargs.get("index_name", self.index_name)
|
||||
if self.index_name is None:
|
||||
raise ValueError("index_name must be provided.")
|
||||
if ids is None:
|
||||
raise ValueError("ids must be provided.")
|
||||
|
||||
for _id in ids:
|
||||
body.append({"_op_type": "delete", "_index": self.index_name, "_id": _id})
|
||||
body.append({"_op_type": "delete", "_index": index_name, "_id": _id})
|
||||
|
||||
if len(body) > 0:
|
||||
try:
|
||||
@ -766,8 +772,10 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
"""
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
actions = [{"delete": {"_index": self.index_name, "_id": id_}} for id_ in ids]
|
||||
index_name = kwargs.get("index_name", self.index_name)
|
||||
if self.index_name is None:
|
||||
raise ValueError("index_name must be provided.")
|
||||
actions = [{"delete": {"_index": index_name, "_id": id_}} for id_ in ids]
|
||||
response = await self.async_client.bulk(body=actions, **kwargs)
|
||||
return not any(
|
||||
item.get("delete", {}).get("error") for item in response["items"]
|
||||
@ -1096,6 +1104,8 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
search_type = kwargs.get("search_type", "approximate_search")
|
||||
vector_field = kwargs.get("vector_field", "vector_field")
|
||||
index_name = kwargs.get("index_name", self.index_name)
|
||||
if self.index_name is None:
|
||||
raise ValueError("index_name must be provided.")
|
||||
filter = kwargs.get("filter", {})
|
||||
|
||||
if (
|
||||
@ -1289,7 +1299,12 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
return [
|
||||
Document(
|
||||
page_content=results[i]["_source"][text_field],
|
||||
metadata=results[i]["_source"][metadata_field],
|
||||
metadata=(
|
||||
results[i]["_source"]
|
||||
if metadata_field == "*"
|
||||
or metadata_field not in results[i]["_source"]
|
||||
else results[i]["_source"][metadata_field]
|
||||
),
|
||||
id=results[i]["_id"],
|
||||
)
|
||||
for i in mmr_selected
|
||||
|
@ -13,6 +13,7 @@ from typing import (
|
||||
Type,
|
||||
)
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.vectorstores import VectorStore, VectorStoreRetriever
|
||||
@ -28,6 +29,18 @@ ORDERING_DIRECTIVE: dict = {
|
||||
}
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.22",
|
||||
message=(
|
||||
"This class is pending deprecation and may be removed in a future version. "
|
||||
"You can swap to using the `SingleStoreVectorStore` "
|
||||
"implementation in `langchain_singlestore`. "
|
||||
"See <https://github.com/singlestore-labs/langchain-singlestore> for details "
|
||||
"about the new implementation."
|
||||
),
|
||||
alternative="from langchain_singlestore import SingleStoreVectorStore",
|
||||
pending=True,
|
||||
)
|
||||
class SingleStoreDB(VectorStore):
|
||||
"""`SingleStore DB` vector store.
|
||||
|
||||
|
@ -7,8 +7,8 @@ authors = []
|
||||
license = { text = "MIT" }
|
||||
requires-python = "<4.0,>=3.9"
|
||||
dependencies = [
|
||||
"langchain-core<1.0.0,>=0.3.51",
|
||||
"langchain<1.0.0,>=0.3.23",
|
||||
"langchain-core<1.0.0,>=0.3.55",
|
||||
"langchain<1.0.0,>=0.3.24",
|
||||
"SQLAlchemy<3,>=1.4",
|
||||
"requests<3,>=2",
|
||||
"PyYAML>=5.3",
|
||||
@ -22,7 +22,7 @@ dependencies = [
|
||||
"numpy>=2.1.0; python_version>='3.13'",
|
||||
]
|
||||
name = "langchain-community"
|
||||
version = "0.3.21"
|
||||
version = "0.3.22"
|
||||
description = "Community contributed LangChain integrations."
|
||||
readme = "README.md"
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
from typing import Type
|
||||
|
||||
import pytest
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_tests.integration_tests import ChatModelIntegrationTests
|
||||
|
||||
@ -16,8 +15,8 @@ class TestLiteLLMStandard(ChatModelIntegrationTests):
|
||||
|
||||
@property
|
||||
def chat_model_params(self) -> dict:
|
||||
return {"model": "ollama/mistral"}
|
||||
|
||||
@pytest.mark.xfail(reason="Not yet implemented.")
|
||||
def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
|
||||
super().test_usage_metadata_streaming(model)
|
||||
return {
|
||||
"model": "ollama/mistral",
|
||||
# Needed to get the usage object when streaming. See https://docs.litellm.ai/docs/completion/usage#streaming-usage
|
||||
"model_kwargs": {"stream_options": {"include_usage": True}},
|
||||
}
|
||||
|
@ -1,7 +1,12 @@
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "foo"
|
||||
|
||||
|
||||
@pytest.mark.requires("openai")
|
||||
def test_openai_invalid_model_kwargs() -> None:
|
||||
@ -14,3 +19,20 @@ def test_openai_incorrect_field() -> None:
|
||||
with pytest.warns(match="not default parameter"):
|
||||
llm = OpenAIEmbeddings(foo="bar", openai_api_key="foo") # type: ignore[call-arg]
|
||||
assert llm.model_kwargs == {"foo": "bar"}
|
||||
|
||||
|
||||
@pytest.mark.requires("openai")
|
||||
def test_embed_documents_with_custom_chunk_size() -> None:
|
||||
embeddings = OpenAIEmbeddings(chunk_size=2)
|
||||
texts = ["text1", "text2", "text3", "text4"]
|
||||
custom_chunk_size = 3
|
||||
|
||||
with patch.object(embeddings.client, "create") as mock_create:
|
||||
mock_create.side_effect = [
|
||||
{"data": [{"embedding": [0.1, 0.2]}, {"embedding": [0.3, 0.4]}]},
|
||||
{"data": [{"embedding": [0.5, 0.6]}, {"embedding": [0.7, 0.8]}]},
|
||||
]
|
||||
|
||||
embeddings.embed_documents(texts, chunk_size=custom_chunk_size)
|
||||
mock_create.assert_any_call(input=[[1342, 19]], **embeddings._invocation_params)
|
||||
mock_create.assert_any_call(input=[[1342, 19]], **embeddings._invocation_params)
|
||||
|
@ -1,155 +0,0 @@
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
from types import ModuleType
|
||||
|
||||
from langchain_core.load.mapping import SERIALIZABLE_MAPPING
|
||||
|
||||
|
||||
def import_all_modules(package_name: str) -> dict:
|
||||
package = importlib.import_module(package_name)
|
||||
classes: dict = {}
|
||||
|
||||
def _handle_module(module: ModuleType) -> None:
|
||||
# Iterate over all members of the module
|
||||
|
||||
names = dir(module)
|
||||
|
||||
if hasattr(module, "__all__"):
|
||||
names += list(module.__all__)
|
||||
|
||||
names = sorted(set(names))
|
||||
|
||||
for name in names:
|
||||
# Check if it's a class or function
|
||||
attr = getattr(module, name)
|
||||
|
||||
if not inspect.isclass(attr):
|
||||
continue
|
||||
|
||||
if not hasattr(attr, "is_lc_serializable") or not isinstance(attr, type):
|
||||
continue
|
||||
|
||||
if (
|
||||
isinstance(attr.is_lc_serializable(), bool)
|
||||
and attr.is_lc_serializable()
|
||||
):
|
||||
key = tuple(attr.lc_id())
|
||||
value = tuple(attr.__module__.split(".") + [attr.__name__])
|
||||
if key in classes and classes[key] != value:
|
||||
raise ValueError
|
||||
classes[key] = value
|
||||
|
||||
_handle_module(package)
|
||||
|
||||
for importer, modname, ispkg in pkgutil.walk_packages(
|
||||
package.__path__, package.__name__ + "."
|
||||
):
|
||||
try:
|
||||
module = importlib.import_module(modname)
|
||||
except ModuleNotFoundError:
|
||||
continue
|
||||
_handle_module(module)
|
||||
|
||||
return classes
|
||||
|
||||
|
||||
def test_import_all_modules() -> None:
|
||||
"""Test import all modules works as expected"""
|
||||
all_modules = import_all_modules("langchain")
|
||||
filtered_modules = [
|
||||
k
|
||||
for k in all_modules
|
||||
if len(k) == 4 and tuple(k[:2]) == ("langchain", "chat_models")
|
||||
]
|
||||
# This test will need to be updated if new serializable classes are added
|
||||
# to community
|
||||
assert sorted(filtered_modules) == sorted(
|
||||
[
|
||||
("langchain", "chat_models", "azure_openai", "AzureChatOpenAI"),
|
||||
("langchain", "chat_models", "bedrock", "BedrockChat"),
|
||||
("langchain", "chat_models", "anthropic", "ChatAnthropic"),
|
||||
("langchain", "chat_models", "fireworks", "ChatFireworks"),
|
||||
("langchain", "chat_models", "google_palm", "ChatGooglePalm"),
|
||||
("langchain", "chat_models", "openai", "ChatOpenAI"),
|
||||
("langchain", "chat_models", "vertexai", "ChatVertexAI"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_serializable_mapping() -> None:
|
||||
to_skip = {
|
||||
# This should have had a different namespace, as it was never
|
||||
# exported from the langchain module, but we keep for whoever has
|
||||
# already serialized it.
|
||||
("langchain", "prompts", "image", "ImagePromptTemplate"): (
|
||||
"langchain_core",
|
||||
"prompts",
|
||||
"image",
|
||||
"ImagePromptTemplate",
|
||||
),
|
||||
# This is not exported from langchain, only langchain_core
|
||||
("langchain_core", "prompts", "structured", "StructuredPrompt"): (
|
||||
"langchain_core",
|
||||
"prompts",
|
||||
"structured",
|
||||
"StructuredPrompt",
|
||||
),
|
||||
# This is not exported from langchain, only langchain_core
|
||||
("langchain", "schema", "messages", "RemoveMessage"): (
|
||||
"langchain_core",
|
||||
"messages",
|
||||
"modifier",
|
||||
"RemoveMessage",
|
||||
),
|
||||
("langchain", "chat_models", "mistralai", "ChatMistralAI"): (
|
||||
"langchain_mistralai",
|
||||
"chat_models",
|
||||
"ChatMistralAI",
|
||||
),
|
||||
("langchain_groq", "chat_models", "ChatGroq"): (
|
||||
"langchain_groq",
|
||||
"chat_models",
|
||||
"ChatGroq",
|
||||
),
|
||||
("langchain_sambanova", "chat_models", "ChatSambaNovaCloud"): (
|
||||
"langchain_sambanova",
|
||||
"chat_models",
|
||||
"ChatSambaNovaCloud",
|
||||
),
|
||||
("langchain_sambanova", "chat_models", "ChatSambaStudio"): (
|
||||
"langchain_sambanova",
|
||||
"chat_models",
|
||||
"ChatSambaStudio",
|
||||
),
|
||||
# TODO(0.3): For now we're skipping the below two tests. Need to fix
|
||||
# so that it only runs when langchain-aws, langchain-google-genai
|
||||
# are installed.
|
||||
("langchain", "chat_models", "bedrock", "ChatBedrock"): (
|
||||
"langchain_aws",
|
||||
"chat_models",
|
||||
"bedrock",
|
||||
"ChatBedrock",
|
||||
),
|
||||
("langchain_google_genai", "chat_models", "ChatGoogleGenerativeAI"): (
|
||||
"langchain_google_genai",
|
||||
"chat_models",
|
||||
"ChatGoogleGenerativeAI",
|
||||
),
|
||||
}
|
||||
serializable_modules = import_all_modules("langchain")
|
||||
|
||||
missing = set(SERIALIZABLE_MAPPING).difference(
|
||||
set(serializable_modules).union(to_skip)
|
||||
)
|
||||
assert missing == set()
|
||||
extra = set(serializable_modules).difference(SERIALIZABLE_MAPPING)
|
||||
assert extra == set()
|
||||
|
||||
for k, import_path in serializable_modules.items():
|
||||
import_dir, import_obj = import_path[:-1], import_path[-1]
|
||||
# Import module
|
||||
mod = importlib.import_module(".".join(import_dir))
|
||||
# Import class
|
||||
cls = getattr(mod, import_obj)
|
||||
assert list(k) == cls.lc_id()
|
@ -1,5 +1,4 @@
|
||||
version = 1
|
||||
revision = 1
|
||||
requires-python = ">=3.9, <4.0"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
|
||||
@ -1498,7 +1497,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = { editable = "../langchain" }
|
||||
dependencies = [
|
||||
{ name = "async-timeout", marker = "python_full_version < '3.11'" },
|
||||
@ -1539,7 +1538,6 @@ requires-dist = [
|
||||
{ name = "requests", specifier = ">=2,<3" },
|
||||
{ name = "sqlalchemy", specifier = ">=1.4,<3" },
|
||||
]
|
||||
provides-extras = ["community", "anthropic", "openai", "azure-ai", "cohere", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "deepseek", "xai", "perplexity"]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
codespell = [{ name = "codespell", specifier = ">=2.2.0,<3.0.0" }]
|
||||
@ -1596,7 +1594,7 @@ test-integration = [
|
||||
typing = [
|
||||
{ name = "langchain-core", editable = "../core" },
|
||||
{ name = "langchain-text-splitters", editable = "../text-splitters" },
|
||||
{ name = "mypy", specifier = ">=1.10,<2.0" },
|
||||
{ name = "mypy", specifier = ">=1.15,<2.0" },
|
||||
{ name = "mypy-protobuf", specifier = ">=3.0.0,<4.0.0" },
|
||||
{ name = "numpy", marker = "python_full_version < '3.13'", specifier = ">=1.26.4" },
|
||||
{ name = "numpy", marker = "python_full_version >= '3.13'", specifier = ">=2.1.0" },
|
||||
@ -1610,7 +1608,7 @@ typing = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-community"
|
||||
version = "0.3.21"
|
||||
version = "0.3.22"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
@ -1757,7 +1755,7 @@ typing = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "0.3.51"
|
||||
version = "0.3.55"
|
||||
source = { editable = "../core" }
|
||||
dependencies = [
|
||||
{ name = "jsonpatch" },
|
||||
@ -1816,7 +1814,7 @@ typing = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-tests"
|
||||
version = "0.3.17"
|
||||
version = "0.3.19"
|
||||
source = { editable = "../standard-tests" }
|
||||
dependencies = [
|
||||
{ name = "httpx" },
|
||||
|
@ -8,14 +8,12 @@ from typing import Any, Optional
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core._api import beta
|
||||
from langchain_core.callbacks import BaseCallbackHandler
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages.ai import UsageMetadata, add_usage
|
||||
from langchain_core.outputs import ChatGeneration, LLMResult
|
||||
|
||||
|
||||
@beta()
|
||||
class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
"""Callback Handler that tracks AIMessage.usage_metadata.
|
||||
|
||||
@ -90,7 +88,6 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
|
||||
|
||||
@contextmanager
|
||||
@beta()
|
||||
def get_usage_metadata_callback(
|
||||
name: str = "usage_metadata_callback",
|
||||
) -> Generator[UsageMetadataCallbackHandler, None, None]:
|
||||
|
139
libs/core/langchain_core/language_models/_utils.py
Normal file
139
libs/core/langchain_core/language_models/_utils.py
Normal file
@ -0,0 +1,139 @@
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from typing import Optional
|
||||
|
||||
from langchain_core.messages import BaseMessage
|
||||
|
||||
|
||||
def _is_openai_data_block(block: dict) -> bool:
|
||||
"""Check if the block contains multimodal data in OpenAI Chat Completions format."""
|
||||
if block.get("type") == "image_url":
|
||||
if (
|
||||
(set(block.keys()) <= {"type", "image_url", "detail"})
|
||||
and (image_url := block.get("image_url"))
|
||||
and isinstance(image_url, dict)
|
||||
):
|
||||
url = image_url.get("url")
|
||||
if isinstance(url, str):
|
||||
return True
|
||||
|
||||
elif block.get("type") == "file":
|
||||
if (file := block.get("file")) and isinstance(file, dict):
|
||||
file_data = file.get("file_data")
|
||||
if isinstance(file_data, str):
|
||||
return True
|
||||
|
||||
elif block.get("type") == "input_audio": # noqa: SIM102
|
||||
if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
|
||||
audio_data = input_audio.get("data")
|
||||
audio_format = input_audio.get("format")
|
||||
if isinstance(audio_data, str) and isinstance(audio_format, str):
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_data_uri(uri: str) -> Optional[dict]:
|
||||
"""Parse a data URI into its components. If parsing fails, return None.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
data_uri = "..."
|
||||
parsed = _parse_data_uri(data_uri)
|
||||
|
||||
assert parsed == {
|
||||
"source_type": "base64",
|
||||
"mime_type": "image/jpeg",
|
||||
"data": "/9j/4AAQSkZJRg...",
|
||||
}
|
||||
"""
|
||||
regex = r"^data:(?P<mime_type>[^;]+);base64,(?P<data>.+)$"
|
||||
match = re.match(regex, uri)
|
||||
if match is None:
|
||||
return None
|
||||
return {
|
||||
"source_type": "base64",
|
||||
"data": match.group("data"),
|
||||
"mime_type": match.group("mime_type"),
|
||||
}
|
||||
|
||||
|
||||
def _convert_openai_format_to_data_block(block: dict) -> dict:
|
||||
"""Convert OpenAI image content block to standard data content block.
|
||||
|
||||
If parsing fails, pass-through.
|
||||
|
||||
Args:
|
||||
block: The OpenAI image content block to convert.
|
||||
|
||||
Returns:
|
||||
The converted standard data content block.
|
||||
"""
|
||||
if block["type"] == "image_url":
|
||||
parsed = _parse_data_uri(block["image_url"]["url"])
|
||||
if parsed is not None:
|
||||
parsed["type"] = "image"
|
||||
return parsed
|
||||
return block
|
||||
|
||||
if block["type"] == "file":
|
||||
parsed = _parse_data_uri(block["file"]["file_data"])
|
||||
if parsed is not None:
|
||||
parsed["type"] = "file"
|
||||
if filename := block["file"].get("filename"):
|
||||
parsed["filename"] = filename
|
||||
return parsed
|
||||
return block
|
||||
|
||||
if block["type"] == "input_audio":
|
||||
data = block["input_audio"].get("data")
|
||||
format = block["input_audio"].get("format")
|
||||
if data and format:
|
||||
return {
|
||||
"type": "audio",
|
||||
"source_type": "base64",
|
||||
"data": data,
|
||||
"mime_type": f"audio/{format}",
|
||||
}
|
||||
return block
|
||||
|
||||
return block
|
||||
|
||||
|
||||
def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]:
|
||||
"""Extend support for message formats.
|
||||
|
||||
Chat models implement support for images in OpenAI Chat Completions format, as well
|
||||
as other multimodal data as standard data blocks. This function extends support to
|
||||
audio and file data in OpenAI Chat Completions format by converting them to standard
|
||||
data blocks.
|
||||
"""
|
||||
formatted_messages = []
|
||||
for message in messages:
|
||||
formatted_message = message
|
||||
if isinstance(message.content, list):
|
||||
for idx, block in enumerate(message.content):
|
||||
if (
|
||||
isinstance(block, dict)
|
||||
# Subset to (PDF) files and audio, as most relevant chat models
|
||||
# support images in OAI format (and some may not yet support the
|
||||
# standard data block format)
|
||||
and block.get("type") in ("file", "input_audio")
|
||||
and _is_openai_data_block(block)
|
||||
):
|
||||
if formatted_message is message:
|
||||
formatted_message = message.model_copy()
|
||||
# Also shallow-copy content
|
||||
formatted_message.content = list(formatted_message.content)
|
||||
|
||||
formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
|
||||
_convert_openai_format_to_data_block(block)
|
||||
)
|
||||
formatted_messages.append(formatted_message)
|
||||
|
||||
return formatted_messages
|
@ -40,6 +40,7 @@ from langchain_core.callbacks import (
|
||||
Callbacks,
|
||||
)
|
||||
from langchain_core.globals import get_llm_cache
|
||||
from langchain_core.language_models._utils import _normalize_messages
|
||||
from langchain_core.language_models.base import (
|
||||
BaseLanguageModel,
|
||||
LangSmithParams,
|
||||
@ -489,7 +490,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
self.rate_limiter.acquire(blocking=True)
|
||||
|
||||
try:
|
||||
for chunk in self._stream(messages, stop=stop, **kwargs):
|
||||
input_messages = _normalize_messages(messages)
|
||||
for chunk in self._stream(input_messages, stop=stop, **kwargs):
|
||||
if chunk.message.id is None:
|
||||
chunk.message.id = f"run-{run_manager.run_id}"
|
||||
chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
|
||||
@ -574,8 +576,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
|
||||
generation: Optional[ChatGenerationChunk] = None
|
||||
try:
|
||||
input_messages = _normalize_messages(messages)
|
||||
async for chunk in self._astream(
|
||||
messages,
|
||||
input_messages,
|
||||
stop=stop,
|
||||
**kwargs,
|
||||
):
|
||||
@ -753,7 +756,10 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
batch_size=len(messages),
|
||||
)
|
||||
results = []
|
||||
for i, m in enumerate(messages):
|
||||
input_messages = [
|
||||
_normalize_messages(message_list) for message_list in messages
|
||||
]
|
||||
for i, m in enumerate(input_messages):
|
||||
try:
|
||||
results.append(
|
||||
self._generate_with_cache(
|
||||
@ -865,6 +871,9 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
run_id=run_id,
|
||||
)
|
||||
|
||||
input_messages = [
|
||||
_normalize_messages(message_list) for message_list in messages
|
||||
]
|
||||
results = await asyncio.gather(
|
||||
*[
|
||||
self._agenerate_with_cache(
|
||||
@ -873,7 +882,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
run_manager=run_managers[i] if run_managers else None,
|
||||
**kwargs,
|
||||
)
|
||||
for i, m in enumerate(messages)
|
||||
for i, m in enumerate(input_messages)
|
||||
],
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
@ -540,6 +540,12 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
|
||||
"chat_models",
|
||||
"ChatSambaStudio",
|
||||
),
|
||||
("langchain_core", "prompts", "message", "_DictMessagePromptTemplate"): (
|
||||
"langchain_core",
|
||||
"prompts",
|
||||
"dict",
|
||||
"DictPromptTemplate",
|
||||
),
|
||||
}
|
||||
|
||||
# Needed for backwards compatibility for old versions of LangChain where things
|
||||
|
@ -33,6 +33,7 @@ if TYPE_CHECKING:
|
||||
)
|
||||
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
||||
from langchain_core.messages.content_blocks import (
|
||||
convert_to_openai_data_block,
|
||||
convert_to_openai_image_block,
|
||||
is_data_content_block,
|
||||
)
|
||||
@ -83,6 +84,7 @@ __all__ = (
|
||||
"ToolMessageChunk",
|
||||
"RemoveMessage",
|
||||
"_message_from_dict",
|
||||
"convert_to_openai_data_block",
|
||||
"convert_to_openai_image_block",
|
||||
"convert_to_messages",
|
||||
"get_buffer_string",
|
||||
@ -124,6 +126,7 @@ _dynamic_imports = {
|
||||
"MessageLikeRepresentation": "utils",
|
||||
"_message_from_dict": "utils",
|
||||
"convert_to_messages": "utils",
|
||||
"convert_to_openai_data_block": "content_blocks",
|
||||
"convert_to_openai_image_block": "content_blocks",
|
||||
"convert_to_openai_messages": "utils",
|
||||
"filter_messages": "utils",
|
||||
|
@ -1,5 +1,6 @@
|
||||
"""Types for content blocks."""
|
||||
|
||||
import warnings
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
@ -108,3 +109,47 @@ def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
|
||||
}
|
||||
error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
|
||||
raise ValueError(error_message)
|
||||
|
||||
|
||||
def convert_to_openai_data_block(block: dict) -> dict:
|
||||
"""Format standard data content block to format expected by OpenAI."""
|
||||
if block["type"] == "image":
|
||||
formatted_block = convert_to_openai_image_block(block)
|
||||
|
||||
elif block["type"] == "file":
|
||||
if block["source_type"] == "base64":
|
||||
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
||||
if filename := block.get("filename"):
|
||||
file["filename"] = filename
|
||||
elif (metadata := block.get("metadata")) and ("filename" in metadata):
|
||||
file["filename"] = metadata["filename"]
|
||||
else:
|
||||
warnings.warn(
|
||||
"OpenAI may require a filename for file inputs. Specify a filename "
|
||||
"in the content block: {'type': 'file', 'source_type': 'base64', "
|
||||
"'mime_type': 'application/pdf', 'data': '...', "
|
||||
"'filename': 'my-pdf'}",
|
||||
stacklevel=1,
|
||||
)
|
||||
formatted_block = {"type": "file", "file": file}
|
||||
elif block["source_type"] == "id":
|
||||
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
|
||||
else:
|
||||
error_msg = "source_type base64 or id is required for file blocks."
|
||||
raise ValueError(error_msg)
|
||||
|
||||
elif block["type"] == "audio":
|
||||
if block["source_type"] == "base64":
|
||||
format = block["mime_type"].split("/")[-1]
|
||||
formatted_block = {
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": block["data"], "format": format},
|
||||
}
|
||||
else:
|
||||
error_msg = "source_type base64 is required for audio blocks."
|
||||
raise ValueError(error_msg)
|
||||
else:
|
||||
error_msg = f"Block of type {block['type']} is not supported."
|
||||
raise ValueError(error_msg)
|
||||
|
||||
return formatted_block
|
||||
|
@ -12,6 +12,7 @@ from __future__ import annotations
|
||||
import base64
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Iterable, Sequence
|
||||
from functools import partial
|
||||
@ -30,6 +31,7 @@ from typing import (
|
||||
from pydantic import Discriminator, Field, Tag
|
||||
|
||||
from langchain_core.exceptions import ErrorCode, create_message
|
||||
from langchain_core.messages import convert_to_openai_data_block, is_data_content_block
|
||||
from langchain_core.messages.ai import AIMessage, AIMessageChunk
|
||||
from langchain_core.messages.base import BaseMessage, BaseMessageChunk
|
||||
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
||||
@ -46,6 +48,8 @@ if TYPE_CHECKING:
|
||||
from langchain_core.prompt_values import PromptValue
|
||||
from langchain_core.runnables.base import Runnable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_type(v: Any) -> str:
|
||||
"""Get the type associated with the object for serialization purposes."""
|
||||
@ -1067,6 +1071,17 @@ def convert_to_openai_messages(
|
||||
"image_url": block["image_url"],
|
||||
}
|
||||
)
|
||||
# Standard multi-modal content block
|
||||
elif is_data_content_block(block):
|
||||
formatted_block = convert_to_openai_data_block(block)
|
||||
if (
|
||||
formatted_block.get("type") == "file"
|
||||
and "file" in formatted_block
|
||||
and "filename" not in formatted_block["file"]
|
||||
):
|
||||
logger.info("Generating a fallback filename.")
|
||||
formatted_block["file"]["filename"] = "LC_AUTOGENERATED"
|
||||
content.append(formatted_block)
|
||||
# Anthropic and Bedrock converse format
|
||||
elif (block.get("type") == "image") or "image" in block:
|
||||
# Anthropic
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
from json import JSONDecodeError
|
||||
from typing import Annotated, Any, Optional
|
||||
|
||||
@ -16,6 +17,8 @@ from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.utils.json import parse_partial_json
|
||||
from langchain_core.utils.pydantic import TypeBaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_tool_call(
|
||||
raw_tool_call: dict[str, Any],
|
||||
@ -250,6 +253,14 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
|
||||
return parsed_result
|
||||
|
||||
|
||||
# Common cause of ValidationError is truncated output due to max_tokens.
|
||||
_MAX_TOKENS_ERROR = (
|
||||
"Output parser received a `max_tokens` stop reason. "
|
||||
"The output is likely incomplete—please increase `max_tokens` "
|
||||
"or shorten your prompt."
|
||||
)
|
||||
|
||||
|
||||
class PydanticToolsParser(JsonOutputToolsParser):
|
||||
"""Parse tools from OpenAI response."""
|
||||
|
||||
@ -296,6 +307,14 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
except (ValidationError, ValueError):
|
||||
if partial:
|
||||
continue
|
||||
has_max_tokens_stop_reason = any(
|
||||
generation.message.response_metadata.get("stop_reason")
|
||||
== "max_tokens"
|
||||
for generation in result
|
||||
if isinstance(generation, ChatGeneration)
|
||||
)
|
||||
if has_max_tokens_stop_reason:
|
||||
logger.exception(_MAX_TOKENS_ERROR)
|
||||
raise
|
||||
if self.first_tool_only:
|
||||
return pydantic_objects[0] if pydantic_objects else None
|
||||
|
@ -44,6 +44,7 @@ if TYPE_CHECKING:
|
||||
MessagesPlaceholder,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain_core.prompts.dict import DictPromptTemplate
|
||||
from langchain_core.prompts.few_shot import (
|
||||
FewShotChatMessagePromptTemplate,
|
||||
FewShotPromptTemplate,
|
||||
@ -68,6 +69,7 @@ __all__ = (
|
||||
"BasePromptTemplate",
|
||||
"ChatMessagePromptTemplate",
|
||||
"ChatPromptTemplate",
|
||||
"DictPromptTemplate",
|
||||
"FewShotPromptTemplate",
|
||||
"FewShotPromptWithTemplates",
|
||||
"FewShotChatMessagePromptTemplate",
|
||||
@ -94,6 +96,7 @@ _dynamic_imports = {
|
||||
"BaseChatPromptTemplate": "chat",
|
||||
"ChatMessagePromptTemplate": "chat",
|
||||
"ChatPromptTemplate": "chat",
|
||||
"DictPromptTemplate": "dict",
|
||||
"HumanMessagePromptTemplate": "chat",
|
||||
"MessagesPlaceholder": "chat",
|
||||
"SystemMessagePromptTemplate": "chat",
|
||||
|
@ -37,10 +37,10 @@ from langchain_core.messages import (
|
||||
from langchain_core.messages.base import get_msg_title_repr
|
||||
from langchain_core.prompt_values import ChatPromptValue, ImageURL, PromptValue
|
||||
from langchain_core.prompts.base import BasePromptTemplate
|
||||
from langchain_core.prompts.dict import DictPromptTemplate
|
||||
from langchain_core.prompts.image import ImagePromptTemplate
|
||||
from langchain_core.prompts.message import (
|
||||
BaseMessagePromptTemplate,
|
||||
_DictMessagePromptTemplate,
|
||||
)
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
from langchain_core.prompts.string import (
|
||||
@ -396,9 +396,7 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
|
||||
prompt: Union[
|
||||
StringPromptTemplate,
|
||||
list[
|
||||
Union[StringPromptTemplate, ImagePromptTemplate, _DictMessagePromptTemplate]
|
||||
],
|
||||
list[Union[StringPromptTemplate, ImagePromptTemplate, DictPromptTemplate]],
|
||||
]
|
||||
"""Prompt template."""
|
||||
additional_kwargs: dict = Field(default_factory=dict)
|
||||
@ -447,7 +445,12 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
raise ValueError(msg)
|
||||
prompt = []
|
||||
for tmpl in template:
|
||||
if isinstance(tmpl, str) or isinstance(tmpl, dict) and "text" in tmpl:
|
||||
if (
|
||||
isinstance(tmpl, str)
|
||||
or isinstance(tmpl, dict)
|
||||
and "text" in tmpl
|
||||
and set(tmpl.keys()) <= {"type", "text"}
|
||||
):
|
||||
if isinstance(tmpl, str):
|
||||
text: str = tmpl
|
||||
else:
|
||||
@ -457,7 +460,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
text, template_format=template_format
|
||||
)
|
||||
)
|
||||
elif isinstance(tmpl, dict) and "image_url" in tmpl:
|
||||
elif (
|
||||
isinstance(tmpl, dict)
|
||||
and "image_url" in tmpl
|
||||
and set(tmpl.keys())
|
||||
<= {
|
||||
"type",
|
||||
"image_url",
|
||||
}
|
||||
):
|
||||
img_template = cast("_ImageTemplateParam", tmpl)["image_url"]
|
||||
input_variables = []
|
||||
if isinstance(img_template, str):
|
||||
@ -503,7 +514,7 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
"format."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
data_template_obj = _DictMessagePromptTemplate(
|
||||
data_template_obj = DictPromptTemplate(
|
||||
template=cast("dict[str, Any]", tmpl),
|
||||
template_format=template_format,
|
||||
)
|
||||
@ -592,7 +603,7 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
elif isinstance(prompt, _DictMessagePromptTemplate):
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
return self._msg_class(
|
||||
@ -624,7 +635,7 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
elif isinstance(prompt, _DictMessagePromptTemplate):
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
return self._msg_class(
|
||||
|
137
libs/core/langchain_core/prompts/dict.py
Normal file
137
libs/core/langchain_core/prompts/dict.py
Normal file
@ -0,0 +1,137 @@
|
||||
"""Dict prompt template."""
|
||||
|
||||
import warnings
|
||||
from functools import cached_property
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
from langchain_core.load import dumpd
|
||||
from langchain_core.prompts.string import (
|
||||
DEFAULT_FORMATTER_MAPPING,
|
||||
get_template_variables,
|
||||
)
|
||||
from langchain_core.runnables import RunnableConfig, RunnableSerializable
|
||||
from langchain_core.runnables.config import ensure_config
|
||||
|
||||
|
||||
class DictPromptTemplate(RunnableSerializable[dict, dict]):
|
||||
"""Template represented by a dict.
|
||||
|
||||
Recognizes variables in f-string or mustache formatted string dict values. Does NOT
|
||||
recognize variables in dict keys. Applies recursively.
|
||||
"""
|
||||
|
||||
template: dict[str, Any]
|
||||
template_format: Literal["f-string", "mustache"]
|
||||
|
||||
@property
|
||||
def input_variables(self) -> list[str]:
|
||||
"""Template input variables."""
|
||||
return _get_input_variables(self.template, self.template_format)
|
||||
|
||||
def format(self, **kwargs: Any) -> dict[str, Any]:
|
||||
"""Format the prompt with the inputs."""
|
||||
return _insert_input_variables(self.template, kwargs, self.template_format)
|
||||
|
||||
async def aformat(self, **kwargs: Any) -> dict[str, Any]:
|
||||
"""Format the prompt with the inputs."""
|
||||
return self.format(**kwargs)
|
||||
|
||||
def invoke(
|
||||
self, input: dict, config: Optional[RunnableConfig] = None, **kwargs: Any
|
||||
) -> dict:
|
||||
"""Invoke the prompt."""
|
||||
return self._call_with_config(
|
||||
lambda x: self.format(**x),
|
||||
input,
|
||||
ensure_config(config),
|
||||
run_type="prompt",
|
||||
serialized=self._serialized,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
def _prompt_type(self) -> str:
|
||||
return "dict-prompt"
|
||||
|
||||
@cached_property
|
||||
def _serialized(self) -> dict[str, Any]:
|
||||
return dumpd(self)
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return whether or not the class is serializable.
|
||||
|
||||
Returns: True.
|
||||
"""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Serialization namespace."""
|
||||
return ["langchain_core", "prompts", "dict"]
|
||||
|
||||
def pretty_repr(self, *, html: bool = False) -> str:
|
||||
"""Human-readable representation.
|
||||
|
||||
Args:
|
||||
html: Whether to format as HTML. Defaults to False.
|
||||
|
||||
Returns:
|
||||
Human-readable representation.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _get_input_variables(
|
||||
template: dict, template_format: Literal["f-string", "mustache"]
|
||||
) -> list[str]:
|
||||
input_variables = []
|
||||
for v in template.values():
|
||||
if isinstance(v, str):
|
||||
input_variables += get_template_variables(v, template_format)
|
||||
elif isinstance(v, dict):
|
||||
input_variables += _get_input_variables(v, template_format)
|
||||
elif isinstance(v, (list, tuple)):
|
||||
for x in v:
|
||||
if isinstance(x, str):
|
||||
input_variables += get_template_variables(x, template_format)
|
||||
elif isinstance(x, dict):
|
||||
input_variables += _get_input_variables(x, template_format)
|
||||
else:
|
||||
pass
|
||||
return list(set(input_variables))
|
||||
|
||||
|
||||
def _insert_input_variables(
|
||||
template: dict[str, Any],
|
||||
inputs: dict[str, Any],
|
||||
template_format: Literal["f-string", "mustache"],
|
||||
) -> dict[str, Any]:
|
||||
formatted = {}
|
||||
formatter = DEFAULT_FORMATTER_MAPPING[template_format]
|
||||
for k, v in template.items():
|
||||
if isinstance(v, str):
|
||||
formatted[k] = formatter(v, **inputs)
|
||||
elif isinstance(v, dict):
|
||||
if k == "image_url" and "path" in v:
|
||||
msg = (
|
||||
"Specifying image inputs via file path in environments with "
|
||||
"user-input paths is a security vulnerability. Out of an abundance "
|
||||
"of caution, the utility has been removed to prevent possible "
|
||||
"misuse."
|
||||
)
|
||||
warnings.warn(msg, stacklevel=2)
|
||||
formatted[k] = _insert_input_variables(v, inputs, template_format)
|
||||
elif isinstance(v, (list, tuple)):
|
||||
formatted_v = []
|
||||
for x in v:
|
||||
if isinstance(x, str):
|
||||
formatted_v.append(formatter(x, **inputs))
|
||||
elif isinstance(x, dict):
|
||||
formatted_v.append(
|
||||
_insert_input_variables(x, inputs, template_format)
|
||||
)
|
||||
formatted[k] = type(v)(formatted_v)
|
||||
else:
|
||||
formatted[k] = v
|
||||
return formatted
|
@ -3,14 +3,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from langchain_core.load import Serializable
|
||||
from langchain_core.messages import BaseMessage, convert_to_messages
|
||||
from langchain_core.prompts.string import (
|
||||
DEFAULT_FORMATTER_MAPPING,
|
||||
get_template_variables,
|
||||
)
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -98,89 +94,3 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
prompt = ChatPromptTemplate(messages=[self])
|
||||
return prompt + other
|
||||
|
||||
|
||||
class _DictMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
"""Template represented by a dict that recursively fills input vars in string vals.
|
||||
|
||||
Special handling of image_url dicts to load local paths. These look like:
|
||||
``{"type": "image_url", "image_url": {"path": "..."}}``
|
||||
"""
|
||||
|
||||
template: dict[str, Any]
|
||||
template_format: Literal["f-string", "mustache"]
|
||||
|
||||
def format_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
msg_dict = _insert_input_variables(self.template, kwargs, self.template_format)
|
||||
return convert_to_messages([msg_dict])
|
||||
|
||||
@property
|
||||
def input_variables(self) -> list[str]:
|
||||
return _get_input_variables(self.template, self.template_format)
|
||||
|
||||
@property
|
||||
def _prompt_type(self) -> str:
|
||||
return "message-dict-prompt"
|
||||
|
||||
@classmethod
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
return ["langchain_core", "prompts", "message"]
|
||||
|
||||
def format(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
"""Format the prompt with the inputs."""
|
||||
return _insert_input_variables(self.template, kwargs, self.template_format)
|
||||
|
||||
|
||||
def _get_input_variables(
|
||||
template: dict, template_format: Literal["f-string", "mustache"]
|
||||
) -> list[str]:
|
||||
input_variables = []
|
||||
for v in template.values():
|
||||
if isinstance(v, str):
|
||||
input_variables += get_template_variables(v, template_format)
|
||||
elif isinstance(v, dict):
|
||||
input_variables += _get_input_variables(v, template_format)
|
||||
elif isinstance(v, (list, tuple)):
|
||||
for x in v:
|
||||
if isinstance(x, str):
|
||||
input_variables += get_template_variables(x, template_format)
|
||||
elif isinstance(x, dict):
|
||||
input_variables += _get_input_variables(x, template_format)
|
||||
return list(set(input_variables))
|
||||
|
||||
|
||||
def _insert_input_variables(
|
||||
template: dict[str, Any],
|
||||
inputs: dict[str, Any],
|
||||
template_format: Literal["f-string", "mustache"],
|
||||
) -> dict[str, Any]:
|
||||
formatted = {}
|
||||
formatter = DEFAULT_FORMATTER_MAPPING[template_format]
|
||||
for k, v in template.items():
|
||||
if isinstance(v, str):
|
||||
formatted[k] = formatter(v, **inputs)
|
||||
elif isinstance(v, dict):
|
||||
# No longer support loading local images.
|
||||
if k == "image_url" and "path" in v:
|
||||
msg = (
|
||||
"Specifying image inputs via file path in environments with "
|
||||
"user-input paths is a security vulnerability. Out of an abundance "
|
||||
"of caution, the utility has been removed to prevent possible "
|
||||
"misuse."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
formatted[k] = _insert_input_variables(v, inputs, template_format)
|
||||
elif isinstance(v, (list, tuple)):
|
||||
formatted_v = []
|
||||
for x in v:
|
||||
if isinstance(x, str):
|
||||
formatted_v.append(formatter(x, **inputs))
|
||||
elif isinstance(x, dict):
|
||||
formatted_v.append(
|
||||
_insert_input_variables(x, inputs, template_format)
|
||||
)
|
||||
formatted[k] = type(v)(formatted_v)
|
||||
return formatted
|
||||
|
@ -108,7 +108,7 @@ class Node(NamedTuple):
|
||||
|
||||
id: str
|
||||
name: str
|
||||
data: Union[type[BaseModel], RunnableType]
|
||||
data: Union[type[BaseModel], RunnableType, None]
|
||||
metadata: Optional[dict[str, Any]]
|
||||
|
||||
def copy(self, *, id: Optional[str] = None, name: Optional[str] = None) -> Node:
|
||||
@ -181,7 +181,7 @@ class MermaidDrawMethod(Enum):
|
||||
API = "api" # Uses Mermaid.INK API to render the graph
|
||||
|
||||
|
||||
def node_data_str(id: str, data: Union[type[BaseModel], RunnableType]) -> str:
|
||||
def node_data_str(id: str, data: Union[type[BaseModel], RunnableType, None]) -> str:
|
||||
"""Convert the data of a node to a string.
|
||||
|
||||
Args:
|
||||
@ -193,7 +193,7 @@ def node_data_str(id: str, data: Union[type[BaseModel], RunnableType]) -> str:
|
||||
"""
|
||||
from langchain_core.runnables.base import Runnable
|
||||
|
||||
if not is_uuid(id):
|
||||
if not is_uuid(id) or data is None:
|
||||
return id
|
||||
data_str = data.get_name() if isinstance(data, Runnable) else data.__name__
|
||||
return data_str if not data_str.startswith("Runnable") else data_str[8:]
|
||||
@ -215,8 +215,10 @@ def node_data_json(
|
||||
from langchain_core.load.serializable import to_json_not_implemented
|
||||
from langchain_core.runnables.base import Runnable, RunnableSerializable
|
||||
|
||||
if isinstance(node.data, RunnableSerializable):
|
||||
json: dict[str, Any] = {
|
||||
if node.data is None:
|
||||
json: dict[str, Any] = {}
|
||||
elif isinstance(node.data, RunnableSerializable):
|
||||
json = {
|
||||
"type": "runnable",
|
||||
"data": {
|
||||
"id": node.data.lc_id(),
|
||||
@ -317,7 +319,7 @@ class Graph:
|
||||
|
||||
def add_node(
|
||||
self,
|
||||
data: Union[type[BaseModel], RunnableType],
|
||||
data: Union[type[BaseModel], RunnableType, None],
|
||||
id: Optional[str] = None,
|
||||
*,
|
||||
metadata: Optional[dict[str, Any]] = None,
|
||||
|
@ -1,3 +1,3 @@
|
||||
"""langchain-core version information and utilities."""
|
||||
|
||||
VERSION = "0.3.54"
|
||||
VERSION = "0.3.56"
|
||||
|
@ -17,7 +17,7 @@ dependencies = [
|
||||
"pydantic<3.0.0,>=2.7.4; python_full_version >= \"3.12.4\"",
|
||||
]
|
||||
name = "langchain-core"
|
||||
version = "0.3.54"
|
||||
version = "0.3.56"
|
||||
description = "Building applications with LLMs through composability"
|
||||
readme = "README.md"
|
||||
|
||||
|
@ -13,6 +13,7 @@ from langchain_core.language_models import (
|
||||
FakeListChatModel,
|
||||
ParrotFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models._utils import _normalize_messages
|
||||
from langchain_core.language_models.fake_chat_models import FakeListChatModelError
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@ -455,3 +456,143 @@ def test_trace_images_in_openai_format() -> None:
|
||||
"url": "https://example.com/image.png",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_extend_support_to_openai_multimodal_formats() -> None:
|
||||
"""Test that chat models normalize OpenAI file and audio inputs."""
|
||||
llm = ParrotFakeChatModel()
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "draconomicon.pdf",
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 data>", "format": "wav"},
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
expected_content = [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "draconomicon.pdf",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "audio",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "audio/wav",
|
||||
},
|
||||
]
|
||||
response = llm.invoke(messages)
|
||||
assert response.content == expected_content
|
||||
|
||||
# Test no mutation
|
||||
assert messages[0]["content"] == [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "draconomicon.pdf",
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 data>", "format": "wav"},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_normalize_messages_edge_cases() -> None:
|
||||
# Test some blocks that should pass through
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "file",
|
||||
"file": "uri",
|
||||
},
|
||||
{
|
||||
"type": "input_file",
|
||||
"file_data": "uri",
|
||||
"filename": "file-name",
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": "uri",
|
||||
},
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "uri",
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
assert messages == _normalize_messages(messages)
|
||||
|
@ -33,6 +33,7 @@ EXPECTED_ALL = [
|
||||
"filter_messages",
|
||||
"merge_message_runs",
|
||||
"trim_messages",
|
||||
"convert_to_openai_data_block",
|
||||
"convert_to_openai_image_block",
|
||||
"convert_to_openai_messages",
|
||||
]
|
||||
|
@ -1186,6 +1186,76 @@ def test_convert_to_openai_messages_developer() -> None:
|
||||
assert result == [{"role": "developer", "content": "a"}] * 2
|
||||
|
||||
|
||||
def test_convert_to_openai_messages_multimodal() -> None:
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Text message"},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://example.com/test.png",
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "image/png",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "test.pdf",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "id",
|
||||
"id": "file-abc123",
|
||||
},
|
||||
{
|
||||
"type": "audio",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "audio/wav",
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
result = convert_to_openai_messages(messages, text_format="block")
|
||||
assert len(result) == 1
|
||||
message = result[0]
|
||||
assert len(message["content"]) == 6
|
||||
|
||||
# Test adding filename
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
with pytest.warns(match="filename"):
|
||||
result = convert_to_openai_messages(messages, text_format="block")
|
||||
assert len(result) == 1
|
||||
message = result[0]
|
||||
assert len(message["content"]) == 1
|
||||
block = message["content"][0]
|
||||
assert block == {
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
"filename": "LC_AUTOGENERATED",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_count_tokens_approximately_empty_messages() -> None:
|
||||
# Test with empty message list
|
||||
assert count_tokens_approximately([]) == 0
|
||||
|
@ -2,7 +2,7 @@ from collections.abc import AsyncIterator, Iterator
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@ -635,3 +635,24 @@ def test_parse_with_different_pydantic_1_proper() -> None:
|
||||
forecast="Sunny",
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def test_max_tokens_error(caplog: Any) -> None:
|
||||
parser = PydanticToolsParser(tools=[NameCollector], first_tool_only=True)
|
||||
input = AIMessage(
|
||||
content="",
|
||||
tool_calls=[
|
||||
{
|
||||
"id": "call_OwL7f5PE",
|
||||
"name": "NameCollector",
|
||||
"args": {"names": ["suz", "jerm"]},
|
||||
}
|
||||
],
|
||||
response_metadata={"stop_reason": "max_tokens"},
|
||||
)
|
||||
with pytest.raises(ValidationError):
|
||||
_ = parser.invoke(input)
|
||||
assert any(
|
||||
"`max_tokens` stop reason" in msg and record.levelname == "ERROR"
|
||||
for record, msg in zip(caplog.records, caplog.messages)
|
||||
)
|
||||
|
@ -3135,6 +3135,27 @@
|
||||
'name': 'PromptTemplate',
|
||||
'type': 'constructor',
|
||||
}),
|
||||
dict({
|
||||
'id': list([
|
||||
'langchain_core',
|
||||
'prompts',
|
||||
'dict',
|
||||
'DictPromptTemplate',
|
||||
]),
|
||||
'kwargs': dict({
|
||||
'template': dict({
|
||||
'cache_control': dict({
|
||||
'type': '{foo}',
|
||||
}),
|
||||
'text': "What's in this image?",
|
||||
'type': 'text',
|
||||
}),
|
||||
'template_format': 'f-string',
|
||||
}),
|
||||
'lc': 1,
|
||||
'name': 'DictPromptTemplate',
|
||||
'type': 'constructor',
|
||||
}),
|
||||
dict({
|
||||
'id': list([
|
||||
'langchain',
|
||||
|
@ -973,6 +973,11 @@ def test_chat_tmpl_serdes(snapshot: SnapshotAssertion) -> None:
|
||||
"hello",
|
||||
{"text": "What's in this image?"},
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What's in this image?",
|
||||
"cache_control": {"type": "{foo}"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": "data:image/jpeg;base64,{my_image}",
|
||||
@ -1012,7 +1017,7 @@ def test_chat_tmpl_serdes(snapshot: SnapshotAssertion) -> None:
|
||||
@pytest.mark.xfail(
|
||||
reason=(
|
||||
"In a breaking release, we can update `_convert_to_message_template` to use "
|
||||
"_DictMessagePromptTemplate for all `dict` inputs, allowing for templatization "
|
||||
"DictPromptTemplate for all `dict` inputs, allowing for templatization "
|
||||
"of message attributes outside content blocks. That would enable the below "
|
||||
"test to pass."
|
||||
)
|
||||
|
34
libs/core/tests/unit_tests/prompts/test_dict.py
Normal file
34
libs/core/tests/unit_tests/prompts/test_dict.py
Normal file
@ -0,0 +1,34 @@
|
||||
from langchain_core.load import load
|
||||
from langchain_core.prompts.dict import DictPromptTemplate
|
||||
|
||||
|
||||
def test__dict_message_prompt_template_fstring() -> None:
|
||||
template = {
|
||||
"type": "text",
|
||||
"text": "{text1}",
|
||||
"cache_control": {"type": "{cache_type}"},
|
||||
}
|
||||
prompt = DictPromptTemplate(template=template, template_format="f-string")
|
||||
expected = {
|
||||
"type": "text",
|
||||
"text": "important message",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
actual = prompt.format(text1="important message", cache_type="ephemeral")
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_deserialize_legacy() -> None:
|
||||
ser = {
|
||||
"type": "constructor",
|
||||
"lc": 1,
|
||||
"id": ["langchain_core", "prompts", "message", "_DictMessagePromptTemplate"],
|
||||
"kwargs": {
|
||||
"template_format": "f-string",
|
||||
"template": {"type": "audio", "audio": "{audio_data}"},
|
||||
},
|
||||
}
|
||||
expected = DictPromptTemplate(
|
||||
template={"type": "audio", "audio": "{audio_data}"}, template_format="f-string"
|
||||
)
|
||||
assert load(ser) == expected
|
@ -6,6 +6,7 @@ EXPECTED_ALL = [
|
||||
"BasePromptTemplate",
|
||||
"ChatMessagePromptTemplate",
|
||||
"ChatPromptTemplate",
|
||||
"DictPromptTemplate",
|
||||
"FewShotPromptTemplate",
|
||||
"FewShotPromptWithTemplates",
|
||||
"FewShotChatMessagePromptTemplate",
|
||||
|
@ -1,61 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
from langchain_core.messages import AIMessage, BaseMessage, ToolMessage
|
||||
from langchain_core.prompts.message import _DictMessagePromptTemplate
|
||||
|
||||
CUR_DIR = Path(__file__).parent.absolute().resolve()
|
||||
|
||||
|
||||
def test__dict_message_prompt_template_fstring() -> None:
|
||||
template = {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "{text1}", "cache_control": {"type": "ephemeral"}},
|
||||
],
|
||||
"name": "{name1}",
|
||||
"tool_calls": [
|
||||
{
|
||||
"name": "{tool_name1}",
|
||||
"args": {"arg1": "{tool_arg1}"},
|
||||
"id": "1",
|
||||
"type": "tool_call",
|
||||
}
|
||||
],
|
||||
}
|
||||
prompt = _DictMessagePromptTemplate(template=template, template_format="f-string")
|
||||
expected: BaseMessage = AIMessage(
|
||||
[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "important message",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
name="foo",
|
||||
tool_calls=[
|
||||
{
|
||||
"name": "do_stuff",
|
||||
"args": {"arg1": "important arg1"},
|
||||
"id": "1",
|
||||
"type": "tool_call",
|
||||
}
|
||||
],
|
||||
)
|
||||
actual = prompt.format_messages(
|
||||
text1="important message",
|
||||
name1="foo",
|
||||
tool_arg1="important arg1",
|
||||
tool_name1="do_stuff",
|
||||
)[0]
|
||||
assert actual == expected
|
||||
|
||||
template = {
|
||||
"role": "tool",
|
||||
"content": "{content1}",
|
||||
"tool_call_id": "1",
|
||||
"name": "{name1}",
|
||||
}
|
||||
prompt = _DictMessagePromptTemplate(template=template, template_format="f-string")
|
||||
expected = ToolMessage("foo", name="bar", tool_call_id="1")
|
||||
actual = prompt.format_messages(content1="foo", name1="bar")[0]
|
||||
assert actual == expected
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user