diff --git a/docs/docs/integrations/llms/vllm.ipynb b/docs/docs/integrations/llms/vllm.ipynb index eb4208b97bf..6abf9a1376b 100644 --- a/docs/docs/integrations/llms/vllm.ipynb +++ b/docs/docs/integrations/llms/vllm.ipynb @@ -82,7 +82,7 @@ " temperature=0.8,\n", ")\n", "\n", - "print(llm(\"What is the capital of France ?\"))" + "print(llm.invoke(\"What is the capital of France ?\"))" ] }, { @@ -117,8 +117,7 @@ "1. The first Pokemon game was released in 1996.\n", "2. The president was Bill Clinton.\n", "3. Clinton was president from 1993 to 2001.\n", - "4. The answer is Clinton.\n", - "\n" + "4. The answer is Clinton.\n" ] }, { @@ -142,7 +141,7 @@ "\n", "question = \"Who was the US president in the year the first Pokemon game was released?\"\n", "\n", - "print(llm_chain.run(question))" + "print(llm_chain.invoke(question))" ] }, { @@ -172,7 +171,36 @@ " trust_remote_code=True, # mandatory for hf models\n", ")\n", "\n", - "llm(\"What is the future of AI?\")" + "llm.invoke(\"What is the future of AI?\")" + ] + }, + { + "cell_type": "markdown", + "id": "d6ca8fd911d25faa", + "metadata": { + "collapsed": false + }, + "source": [ + "## Quantization\n", + "\n", + "vLLM supports `awq` quantization. To enable it, pass `quantization` to `vllm_kwargs`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cada3174c46a0ea", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "llm_q = VLLM(\n", + " model=\"TheBloke/Llama-2-7b-Chat-AWQ\",\n", + " trust_remote_code=True,\n", + " max_new_tokens=512,\n", + " vllm_kwargs={\"quantization\": \"awq\"},\n", + ")" ] }, { @@ -216,7 +244,7 @@ " model_name=\"tiiuae/falcon-7b\",\n", " model_kwargs={\"stop\": [\".\"]},\n", ")\n", - "print(llm(\"Rome is\"))" + "print(llm.invoke(\"Rome is\"))" ] } ],