changes to llm chain (#6328)

- return raw and full output (but keep run shortcut method functional) - change output parser to take in generations (good for working with messages) - add output parser to base class, always run (default to same as current) --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2025-09-05 04:55:14 +00:00 · 2023-06-18 22:49:47 -07:00
parent d3c2eab0b3
commit 6a4a950a3c
16 changed files with 704 additions and 298 deletions
--- a/docs/extras/modules/chains/additional/extraction.ipynb
+++ b/docs/extras/modules/chains/additional/extraction.ipynb
@@ -17,7 +17,16 @@
   "execution_count": 1,
   "id": "34f04daf",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.chains import create_extraction_chain, create_extraction_chain_pydantic\n",
@@ -71,7 +80,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "id": "640bd005",
   "metadata": {},
   "outputs": [],
@@ -84,7 +93,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "id": "64313214",
   "metadata": {},
   "outputs": [],
@@ -102,7 +111,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "id": "cc5436ed",
   "metadata": {},
   "outputs": [
@@ -119,7 +128,7 @@
       "  'person_hair_color': 'brunette'}]"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -150,7 +159,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "id": "6792866b",
   "metadata": {},
   "outputs": [],
@@ -161,7 +170,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
   "id": "36a63761",
   "metadata": {},
   "outputs": [],
@@ -176,7 +185,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "id": "8ffd1e57",
   "metadata": {},
   "outputs": [],
@@ -186,7 +195,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
   "id": "24baa954",
   "metadata": {
    "scrolled": false
@@ -220,7 +229,7 @@
       " Properties(person_name='Claudia', person_height=6, person_hair_color='brunette', dog_breed=None, dog_name=None)]"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -228,13 +237,21 @@
   "source": [
    "chain.run(inp)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0df61283",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "general",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
-   "name": "general"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
@@ -246,7 +263,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/extras/modules/chains/additional/qa_citations.ipynb
+++ b/docs/extras/modules/chains/additional/qa_citations.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "9b5c258f",
+   "metadata": {},
+   "source": [
+    "# Question-Answering Citations\n",
+    "\n",
+    "This notebook shows how to use OpenAI functions ability to extract citations from text."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "eae4ca3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.chains import create_citation_fuzzy_match_chain\n",
+    "from langchain.chat_models import ChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2c6e62ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "question = \"What did the author do during college?\"\n",
+    "context = \"\"\"\n",
+    "My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n",
+    "I went to an arts highschool but in university I studied Computational Mathematics and physics. \n",
+    "As part of coop I worked at many companies including Stitchfix, Facebook.\n",
+    "I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "078e0300",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "02cad6d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = create_citation_fuzzy_match_chain(llm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e3c6e7ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = chain.run(question=question, context=context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6f7615f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "question='What did the author do during college?' answer=[FactWithEvidence(fact='The author studied Computational Mathematics and physics in university.', substring_quote=['in university I studied Computational Mathematics and physics']), FactWithEvidence(fact='The author started the Data Science club at the University of Waterloo.', substring_quote=['I also started the Data Science club at the University of Waterloo']), FactWithEvidence(fact='The author was the president of the Data Science club for 2 years.', substring_quote=['I was the president of the club for 2 years'])]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3be6f366",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def highlight(text, span):\n",
+    "    return (\n",
+    "        \"...\"\n",
+    "        + text[span[0] - 20 : span[0]]\n",
+    "        + \"*\"\n",
+    "        + \"\\033[91m\"\n",
+    "        + text[span[0] : span[1]]\n",
+    "        + \"\\033[0m\"\n",
+    "        + \"*\"\n",
+    "        + text[span[1] : span[1] + 20]\n",
+    "        + \"...\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "636c4528",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Statement: The author studied Computational Mathematics and physics in university.\n",
+      "Citation: ...arts highschool but *\u001b[91min university I studied Computational Mathematics and physics\u001b[0m*. \n",
+      "As part of coop I...\n",
+      "\n",
+      "Statement: The author started the Data Science club at the University of Waterloo.\n",
+      "Citation: ...titchfix, Facebook.\n",
+      "*\u001b[91mI also started the Data Science club at the University of Waterloo\u001b[0m* and I was the presi...\n",
+      "\n",
+      "Statement: The author was the president of the Data Science club for 2 years.\n",
+      "Citation: ...ity of Waterloo and *\u001b[91mI was the president of the club for 2 years\u001b[0m*.\n",
+      "...\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "for fact in result.answer:\n",
+    "    print(\"Statement:\", fact.fact)\n",
+    "    for span in fact.get_spans(context):\n",
+    "        print(\"Citation:\", highlight(context, span))\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8409cab0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/extras/modules/chains/additional/tagging.ipynb
+++ b/docs/extras/modules/chains/additional/tagging.ipynb
@@ -17,7 +17,16 @@
   "execution_count": 1,
   "id": "bafb496a",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.chains import create_tagging_chain, create_tagging_chain_pydantic\n",
@@ -52,7 +61,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "8329f943",
   "metadata": {},
   "outputs": [],
@@ -68,7 +77,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "6146ae70",
   "metadata": {},
   "outputs": [],
@@ -88,7 +97,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 5,
   "id": "5509b6a6",
   "metadata": {},
   "outputs": [
@@ -98,7 +107,7 @@
       "{'sentiment': 'positive', 'language': 'Spanish'}"
      ]
     },
-     "execution_count": 59,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -110,17 +119,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 6,
   "id": "9154474c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'sentiment': 'enojado', 'aggressiveness': 1, 'language': 'Spanish'}"
+       "{'sentiment': 'enojado', 'aggressiveness': 1, 'language': 'es'}"
      ]
     },
-     "execution_count": 60,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -132,7 +141,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 7,
   "id": "aae85b27",
   "metadata": {},
   "outputs": [
@@ -142,7 +151,7 @@
       "{'sentiment': 'positive', 'aggressiveness': 0, 'language': 'English'}"
      ]
     },
-     "execution_count": 61,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -176,7 +185,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 8,
   "id": "6a5f7961",
   "metadata": {},
   "outputs": [],
@@ -200,7 +209,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 9,
   "id": "e5a5881f",
   "metadata": {},
   "outputs": [],
@@ -218,7 +227,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
   "id": "d9b9d53d",
   "metadata": {},
   "outputs": [
@@ -228,7 +237,7 @@
       "{'sentiment': 'happy', 'aggressiveness': 0, 'language': 'spanish'}"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -240,7 +249,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
   "id": "1c12fa00",
   "metadata": {},
   "outputs": [
@@ -250,7 +259,7 @@
       "{'sentiment': 'sad', 'aggressiveness': 10, 'language': 'spanish'}"
      ]
     },
-     "execution_count": 14,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -262,7 +271,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 12,
   "id": "0bdfcb05",
   "metadata": {},
   "outputs": [
@@ -272,7 +281,7 @@
       "{'sentiment': 'neutral', 'aggressiveness': 0, 'language': 'english'}"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -304,7 +313,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 13,
   "id": "bf1f367e",
   "metadata": {},
   "outputs": [],
@@ -315,7 +324,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 14,
   "id": "83a2e826",
   "metadata": {},
   "outputs": [],
@@ -334,7 +343,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 15,
   "id": "6e404892",
   "metadata": {},
   "outputs": [],
@@ -344,7 +353,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 16,
   "id": "b5fc43c4",
   "metadata": {},
   "outputs": [],
@@ -355,7 +364,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 17,
   "id": "5074bcc3",
   "metadata": {},
   "outputs": [
@@ -365,7 +374,7 @@
       "Tags(sentiment='sad', aggressiveness=10, language='spanish')"
      ]
     },
-     "execution_count": 26,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -377,9 +386,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "general",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
-   "name": "general"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
@@ -391,7 +400,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,