mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-19 11:08:55 +00:00
docs: add quantization to vllm and update API (#16950)
- **Description:** Update vLLM docs to include instructions on how to use quantized models, as well as to replace the deprecated methods.
This commit is contained in:
parent
2a510c71a0
commit
71f9ea33b6
@ -82,7 +82,7 @@
|
||||
" temperature=0.8,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(llm(\"What is the capital of France ?\"))"
|
||||
"print(llm.invoke(\"What is the capital of France ?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -117,8 +117,7 @@
|
||||
"1. The first Pokemon game was released in 1996.\n",
|
||||
"2. The president was Bill Clinton.\n",
|
||||
"3. Clinton was president from 1993 to 2001.\n",
|
||||
"4. The answer is Clinton.\n",
|
||||
"\n"
|
||||
"4. The answer is Clinton.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -142,7 +141,7 @@
|
||||
"\n",
|
||||
"question = \"Who was the US president in the year the first Pokemon game was released?\"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
"print(llm_chain.invoke(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -172,7 +171,36 @@
|
||||
" trust_remote_code=True, # mandatory for hf models\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"llm(\"What is the future of AI?\")"
|
||||
"llm.invoke(\"What is the future of AI?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6ca8fd911d25faa",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Quantization\n",
|
||||
"\n",
|
||||
"vLLM supports `awq` quantization. To enable it, pass `quantization` to `vllm_kwargs`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2cada3174c46a0ea",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_q = VLLM(\n",
|
||||
" model=\"TheBloke/Llama-2-7b-Chat-AWQ\",\n",
|
||||
" trust_remote_code=True,\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" vllm_kwargs={\"quantization\": \"awq\"},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -216,7 +244,7 @@
|
||||
" model_name=\"tiiuae/falcon-7b\",\n",
|
||||
" model_kwargs={\"stop\": [\".\"]},\n",
|
||||
")\n",
|
||||
"print(llm(\"Rome is\"))"
|
||||
"print(llm.invoke(\"Rome is\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user