langchain/docs/versioned_docs/version-0.2.x/how_to/llm_caching.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b843b5c4",
   "metadata": {},
   "source": [
    "# How to cache LLM responses\n",
    "\n",
    "LangChain provides an optional caching layer for LLMs. This is useful for two reasons:\n",
    "\n",
    "It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times.\n",
    "It can speed up your application by reducing the number of API calls you make to the LLM provider.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0aa6d335",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.globals import set_llm_cache\n",
    "from langchain_openai import OpenAI\n",
    "\n",
    "# To make the caching really obvious, lets use a slower model.\n",
    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f168ff0d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 13.7 ms, sys: 6.54 ms, total: 20.2 ms\n",
      "Wall time: 330 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "from langchain.cache import InMemoryCache\n",
    "\n",
    "set_llm_cache(InMemoryCache())\n",
    "\n",
    "# The first time, it is not yet in cache, so it should take longer\n",
    "llm.predict(\"Tell me a joke\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ce7620fb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 436 µs, sys: 921 µs, total: 1.36 ms\n",
      "Wall time: 1.36 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was two-tired!\""
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "# The second time it is, so it goes faster\n",
    "llm.predict(\"Tell me a joke\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ab452f4",
   "metadata": {},
   "source": [
    "## SQLite Cache"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2e65de83",
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm .langchain.db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "0be83715",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We can do the same thing with a SQLite cache\n",
    "from langchain.cache import SQLiteCache\n",
    "\n",
    "set_llm_cache(SQLiteCache(database_path=\".langchain.db\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9b427ce7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 29.3 ms, sys: 17.3 ms, total: 46.7 ms\n",
      "Wall time: 364 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\n\\nWhy did the tomato turn red?\\n\\nBecause it saw the salad dressing!'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "# The first time, it is not yet in cache, so it should take longer\n",
    "llm.predict(\"Tell me a joke\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "87f52611",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4.58 ms, sys: 2.23 ms, total: 6.8 ms\n",
      "Wall time: 4.68 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\n\\nWhy did the tomato turn red?\\n\\nBecause it saw the salad dressing!'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "# The second time it is, so it goes faster\n",
    "llm.predict(\"Tell me a joke\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a9bb158",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}