mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-04 11:25:11 +00:00
220 lines
4.8 KiB
Plaintext
220 lines
4.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b843b5c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"# How to cache LLM responses\n",
|
|
"\n",
|
|
"LangChain provides an optional caching layer for LLMs. This is useful for two reasons:\n",
|
|
"\n",
|
|
"It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times.\n",
|
|
"It can speed up your application by reducing the number of API calls you make to the LLM provider.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "0aa6d335",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.globals import set_llm_cache\n",
|
|
"from langchain_openai import OpenAI\n",
|
|
"\n",
|
|
"# To make the caching really obvious, lets use a slower model.\n",
|
|
"llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "f168ff0d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: user 13.7 ms, sys: 6.54 ms, total: 20.2 ms\n",
|
|
"Wall time: 330 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"from langchain.cache import InMemoryCache\n",
|
|
"\n",
|
|
"set_llm_cache(InMemoryCache())\n",
|
|
"\n",
|
|
"# The first time, it is not yet in cache, so it should take longer\n",
|
|
"llm.predict(\"Tell me a joke\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "ce7620fb",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: user 436 µs, sys: 921 µs, total: 1.36 ms\n",
|
|
"Wall time: 1.36 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"# The second time it is, so it goes faster\n",
|
|
"llm.predict(\"Tell me a joke\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4ab452f4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## SQLite Cache"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "2e65de83",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!rm .langchain.db"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "0be83715",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# We can do the same thing with a SQLite cache\n",
|
|
"from langchain.cache import SQLiteCache\n",
|
|
"\n",
|
|
"set_llm_cache(SQLiteCache(database_path=\".langchain.db\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "9b427ce7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: user 29.3 ms, sys: 17.3 ms, total: 46.7 ms\n",
|
|
"Wall time: 364 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'\\n\\nWhy did the tomato turn red?\\n\\nBecause it saw the salad dressing!'"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"# The first time, it is not yet in cache, so it should take longer\n",
|
|
"llm.predict(\"Tell me a joke\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "87f52611",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU times: user 4.58 ms, sys: 2.23 ms, total: 6.8 ms\n",
|
|
"Wall time: 4.68 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'\\n\\nWhy did the tomato turn red?\\n\\nBecause it saw the salad dressing!'"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"# The second time it is, so it goes faster\n",
|
|
"llm.predict(\"Tell me a joke\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6a9bb158",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|