Enable streaming for OpenAI LLM (#986)

* Support a callback `on_llm_new_token` that users can implement when
`OpenAI.streaming` is set to `True`
This commit is contained in:
Ankush Gola
2023-02-14 15:06:14 -08:00
committed by GitHub
parent f05f025e41
commit caa8e4742e
26 changed files with 1311 additions and 155 deletions

View File

@@ -14,7 +14,9 @@
"cell_type": "code",
"execution_count": 1,
"id": "70c4e529",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
@@ -36,7 +38,9 @@
"cell_type": "code",
"execution_count": 2,
"id": "01c46e92",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
@@ -56,7 +60,9 @@
"cell_type": "code",
"execution_count": 3,
"id": "433363a5",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# loaders = [....]\n",
@@ -75,9 +81,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "a8930cf7",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
@@ -106,9 +114,11 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 5,
"id": "7b4110f3",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0), vectorstore)"
@@ -126,7 +136,9 @@
"cell_type": "code",
"execution_count": 6,
"id": "7fe3e730",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"chat_history = []\n",
@@ -136,9 +148,11 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "bfff9cc8",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
@@ -146,7 +160,7 @@
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
]
},
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -165,9 +179,11 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"id": "00b4cf00",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"chat_history = [(query, result[\"answer\"])]\n",
@@ -177,9 +193,11 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 9,
"id": "f01828d1",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
@@ -187,7 +205,7 @@
"' Justice Stephen Breyer'"
]
},
"execution_count": 11,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -196,10 +214,90 @@
"result['answer']"
]
},
{
"cell_type": "markdown",
"id": "2324cdc6-98bf-4708-b8cd-02a98b1e5b67",
"metadata": {},
"source": [
"## Chat Vector DB with streaming to `stdout`\n",
"\n",
"Output from the chain will be streamed to `stdout` token by token in this example."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "2efacec3-2690-4b05-8de3-a32fd2ac3911",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.chains.llm import LLMChain\n",
"from langchain.callbacks.base import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT\n",
"from langchain.chains.question_answering import load_qa_chain\n",
"\n",
"# Construct a ChatVectorDBChain with a streaming llm for combine docs\n",
"# and a separate, non-streaming llm for question generation\n",
"llm = OpenAI(temperature=0)\n",
"streaming_llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
"\n",
"question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)\n",
"doc_chain = load_qa_chain(streaming_llm, chain_type=\"stuff\", prompt=QA_PROMPT)\n",
"\n",
"qa = ChatVectorDBChain(vectorstore=vectorstore, combine_docs_chain=doc_chain, question_generator=question_generator)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "fd6d43f4-7428-44a4-81bc-26fe88a98762",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
]
}
],
"source": [
"chat_history = []\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"result = qa({\"question\": query, \"chat_history\": chat_history})"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5ab38978-f3e8-4fa7-808c-c79dec48379a",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Justice Stephen Breyer"
]
}
],
"source": [
"chat_history = [(query, result[\"answer\"])]\n",
"query = \"Did he mention who she suceeded\"\n",
"result = qa({\"question\": query, \"chat_history\": chat_history})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0f869c6",
"id": "a7ea93ff-1899-4171-9c24-85df20ae1a3d",
"metadata": {},
"outputs": [],
"source": []
@@ -221,7 +319,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.9"
}
},
"nbformat": 4,

View File

@@ -18,7 +18,9 @@
"cell_type": "code",
"execution_count": 1,
"id": "df924055",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.llms import OpenAI"
@@ -207,14 +209,6 @@
"source": [
"llm.get_num_tokens(\"what a joke\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b004ffdd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

View File

@@ -8,6 +8,7 @@ They are split into two categories:
1. `Generic Functionality <./generic_how_to.html>`_: Covering generic functionality all LLMs should have.
2. `Integrations <./integrations.html>`_: Covering integrations with various LLM providers.
3. `Asynchronous <./async_llm.html>`_: Covering asynchronous functionality.
4. `Streaming <./streaming_llm.html>`_: Covering streaming functionality.
.. toctree::
:maxdepth: 1

View File

@@ -0,0 +1,140 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "6eaf7e66-f49c-42da-8d11-22ea13bef718",
"metadata": {},
"source": [
"# Streaming with LLMs\n",
"\n",
"LangChain provides streaming support for LLMs. Currently, we only support streaming for the `OpenAI` LLM implementation, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Verse 1\n",
"I'm sippin' on sparkling water,\n",
"It's so refreshing and light,\n",
"It's the perfect way to quench my thirst,\n",
"On a hot summer night.\n",
"\n",
"Chorus\n",
"Sparkling water, sparkling water,\n",
"It's the best way to stay hydrated,\n",
"It's so refreshing and light,\n",
"It's the perfect way to stay alive.\n",
"\n",
"Verse 2\n",
"I'm sippin' on sparkling water,\n",
"It's so bubbly and bright,\n",
"It's the perfect way to cool me down,\n",
"On a hot summer night.\n",
"\n",
"Chorus\n",
"Sparkling water, sparkling water,\n",
"It's the best way to stay hydrated,\n",
"It's so refreshing and light,\n",
"It's the perfect way to stay alive.\n",
"\n",
"Verse 3\n",
"I'm sippin' on sparkling water,\n",
"It's so crisp and clean,\n",
"It's the perfect way to keep me going,\n",
"On a hot summer day.\n",
"\n",
"Chorus\n",
"Sparkling water, sparkling water,\n",
"It's the best way to stay hydrated,\n",
"It's so refreshing and light,\n",
"It's the perfect way to stay alive."
]
}
],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.callbacks.base import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"\n",
"\n",
"llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
"resp = llm(\"Write me a song about sparkling water.\")"
]
},
{
"cell_type": "markdown",
"id": "61fb6de7-c6c8-48d0-a48e-1204c027a23c",
"metadata": {
"tags": []
},
"source": [
"We still have access to the end `LLMResult` if using `generate`. However, `token_usage` is not currently supported for streaming."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a35373f1-9ee6-4753-a343-5aee749b8527",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Q: What did the fish say when it hit the wall?\n",
"A: Dam!"
]
},
{
"data": {
"text/plain": [
"LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {}})"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm.generate([\"Tell me a joke.\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}