mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-15 07:36:08 +00:00
parent
8dac0fb3f1
commit
66265aaac4
@ -14,21 +14,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"%pip install --upgrade --quiet gpt4all > /dev/null"
|
"%pip install --upgrade --quiet langchain-community gpt4all"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -47,9 +39,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.chains import LLMChain\n",
|
|
||||||
"from langchain_community.llms import GPT4All\n",
|
"from langchain_community.llms import GPT4All\n",
|
||||||
"from langchain_core.callbacks import StreamingStdOutCallbackHandler\n",
|
|
||||||
"from langchain_core.prompts import PromptTemplate"
|
"from langchain_core.prompts import PromptTemplate"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -92,64 +82,79 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"For more info, visit https://github.com/nomic-ai/gpt4all.\n",
|
"For more info, visit https://github.com/nomic-ai/gpt4all.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"---"
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"This integration does not yet support streaming in chunks via the [`.stream()`](https://python.langchain.com/v0.2/docs/how_to/streaming/) method. The below example uses a callback handler with `streaming=True`:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"local_path = (\n",
|
"local_path = (\n",
|
||||||
" \"./models/ggml-gpt4all-l13b-snoozy.bin\" # replace with your desired local file path\n",
|
" \"./models/Meta-Llama-3-8B-Instruct.Q4_0.gguf\" # replace with your local file path\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Token: Justin\n",
|
||||||
|
"Token: Bieber\n",
|
||||||
|
"Token: was\n",
|
||||||
|
"Token: born\n",
|
||||||
|
"Token: on\n",
|
||||||
|
"Token: March\n",
|
||||||
|
"Token: \n",
|
||||||
|
"Token: 1\n",
|
||||||
|
"Token: ,\n",
|
||||||
|
"Token: \n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Callbacks support token-wise streaming\n",
|
"from langchain_core.callbacks import BaseCallbackHandler\n",
|
||||||
"callbacks = [StreamingStdOutCallbackHandler()]\n",
|
"\n",
|
||||||
|
"count = 0\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"class MyCustomHandler(BaseCallbackHandler):\n",
|
||||||
|
" def on_llm_new_token(self, token: str, **kwargs) -> None:\n",
|
||||||
|
" global count\n",
|
||||||
|
" if count < 10:\n",
|
||||||
|
" print(f\"Token: {token}\")\n",
|
||||||
|
" count += 1\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Verbose is required to pass to the callback manager\n",
|
"# Verbose is required to pass to the callback manager\n",
|
||||||
"llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)\n",
|
"llm = GPT4All(model=local_path, callbacks=[MyCustomHandler()], streaming=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# If you want to use a custom model add the backend parameter\n",
|
"# If you want to use a custom model add the backend parameter\n",
|
||||||
"# Check https://docs.gpt4all.io/gpt4all_python.html for supported backends\n",
|
"# Check https://docs.gpt4all.io/gpt4all_python.html for supported backends\n",
|
||||||
"llm = GPT4All(model=local_path, backend=\"gptj\", callbacks=callbacks, verbose=True)"
|
"# llm = GPT4All(model=local_path, backend=\"gptj\", callbacks=callbacks, streaming=True)\n",
|
||||||
]
|
"\n",
|
||||||
},
|
"chain = prompt | llm\n",
|
||||||
{
|
"\n",
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"llm_chain.run(question)"
|
"# Streamed tokens will be logged/aggregated via the passed callback\n",
|
||||||
|
"res = chain.invoke({\"question\": question})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
"cell_type": "code",
|
||||||
"cell_type": "markdown",
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"outputs": [],
|
||||||
"Justin Bieber was born on March 1, 1994. In 1994, The Cowboys won Super Bowl XXVIII."
|
"source": []
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -7,7 +7,7 @@ This page covers how to use the `GPT4All` wrapper within LangChain. The tutorial
|
|||||||
- Install the Python package with `pip install gpt4all`
|
- Install the Python package with `pip install gpt4all`
|
||||||
- Download a [GPT4All model](https://gpt4all.io/index.html) and place it in your desired directory
|
- Download a [GPT4All model](https://gpt4all.io/index.html) and place it in your desired directory
|
||||||
|
|
||||||
In this example, We are using `mistral-7b-openorca.Q4_0.gguf`(Best overall fast chat model):
|
In this example, we are using `mistral-7b-openorca.Q4_0.gguf`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir models
|
mkdir models
|
||||||
@ -30,7 +30,7 @@ model = GPT4All(model="./models/mistral-7b-openorca.Q4_0.gguf", n_threads=8)
|
|||||||
response = model.invoke("Once upon a time, ")
|
response = model.invoke("Once upon a time, ")
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also customize the generation parameters, such as n_predict, temp, top_p, top_k, and others.
|
You can also customize the generation parameters, such as `n_predict`, `temp`, `top_p`, `top_k`, and others.
|
||||||
|
|
||||||
To stream the model's predictions, add in a CallbackManager.
|
To stream the model's predictions, add in a CallbackManager.
|
||||||
|
|
||||||
@ -45,11 +45,11 @@ callbacks = [StreamingStdOutCallbackHandler()]
|
|||||||
model = GPT4All(model="./models/mistral-7b-openorca.Q4_0.gguf", n_threads=8)
|
model = GPT4All(model="./models/mistral-7b-openorca.Q4_0.gguf", n_threads=8)
|
||||||
|
|
||||||
# Generate text. Tokens are streamed through the callback manager.
|
# Generate text. Tokens are streamed through the callback manager.
|
||||||
model("Once upon a time, ", callbacks=callbacks)
|
model.invoke("Once upon a time, ", callbacks=callbacks)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Model File
|
## Model File
|
||||||
|
|
||||||
You can find links to model file downloads in the [https://gpt4all.io/](https://gpt4all.io/index.html).
|
You can download model files from the GPT4All client. You can download the client from the [GPT4All](https://gpt4all.io/index.html) website.
|
||||||
|
|
||||||
For a more detailed walkthrough of this, see [this notebook](/docs/integrations/llms/gpt4all)
|
For a more detailed walkthrough of this, see [this notebook](/docs/integrations/llms/gpt4all)
|
||||||
|
Loading…
Reference in New Issue
Block a user