From 41c841ec8596e666a458af078999bb4c4bc8b63a Mon Sep 17 00:00:00 2001 From: Lance Martin <122662504+rlancemartin@users.noreply.github.com> Date: Tue, 18 Jul 2023 15:13:27 -0700 Subject: [PATCH] Add Llama-v2 to Llama.cpp notebook (#7913) --- .../models/llms/integrations/llamacpp.ipynb | 103 +++++++++++++++--- 1 file changed, 87 insertions(+), 16 deletions(-) diff --git a/docs/extras/modules/model_io/models/llms/integrations/llamacpp.ipynb b/docs/extras/modules/model_io/models/llms/integrations/llamacpp.ipynb index cbf2a05c7b8..345aa0b7dc4 100644 --- a/docs/extras/modules/model_io/models/llms/integrations/llamacpp.ipynb +++ b/docs/extras/modules/model_io/models/llms/integrations/llamacpp.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,7 +13,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -40,7 +38,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -61,7 +58,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -78,7 +74,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -99,7 +94,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -116,7 +110,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -124,7 +117,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -135,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "tags": [] }, @@ -148,7 +140,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "tags": [] }, @@ -172,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "tags": [] }, @@ -184,13 +175,96 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### CPU" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Llama-v2`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure the model path is correct for your system!\n", + "llm = LlamaCpp(\n", + " model_path=\"/Users/rlm/Desktop/Code/llama/llama-2-7b-ggml/llama-2-7b-chat.ggmlv3.q4_0.bin\",\n", + " input={\"temperature\": 0.75, \"max_length\": 2000, \"top_p\": 1},\n", + " callback_manager=callback_manager,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Stephen Colbert:\n", + "Yo, John, I heard you've been talkin' smack about me on your show.\n", + "Let me tell you somethin', pal, I'm the king of late-night TV\n", + "My satire is sharp as a razor, it cuts deeper than a knife\n", + "While you're just a british bloke tryin' to be funny with your accent and your wit.\n", + "John Oliver:\n", + "Oh Stephen, don't be ridiculous, you may have the ratings but I got the real talk.\n", + "My show is the one that people actually watch and listen to, not just for the laughs but for the facts.\n", + "While you're busy talkin' trash, I'm out here bringing the truth to light.\n", + "Stephen Colbert:\n", + "Truth? Ha! You think your show is about truth? Please, it's all just a joke to you.\n", + "You're just a fancy-pants british guy tryin' to be funny with your news and your jokes.\n", + "While I'm the one who's really makin' a difference, with my sat" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 358.60 ms\n", + "llama_print_timings: sample time = 172.55 ms / 256 runs ( 0.67 ms per token, 1483.59 tokens per second)\n", + "llama_print_timings: prompt eval time = 613.36 ms / 16 tokens ( 38.33 ms per token, 26.09 tokens per second)\n", + "llama_print_timings: eval time = 10151.17 ms / 255 runs ( 39.81 ms per token, 25.12 tokens per second)\n", + "llama_print_timings: total time = 11332.41 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\nStephen Colbert:\\nYo, John, I heard you've been talkin' smack about me on your show.\\nLet me tell you somethin', pal, I'm the king of late-night TV\\nMy satire is sharp as a razor, it cuts deeper than a knife\\nWhile you're just a british bloke tryin' to be funny with your accent and your wit.\\nJohn Oliver:\\nOh Stephen, don't be ridiculous, you may have the ratings but I got the real talk.\\nMy show is the one that people actually watch and listen to, not just for the laughs but for the facts.\\nWhile you're busy talkin' trash, I'm out here bringing the truth to light.\\nStephen Colbert:\\nTruth? Ha! You think your show is about truth? Please, it's all just a joke to you.\\nYou're just a fancy-pants british guy tryin' to be funny with your news and your jokes.\\nWhile I'm the one who's really makin' a difference, with my sat\"" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prompt = \"\"\"\n", + "Question: A rap battle between Stephen Colbert and John Oliver\n", + "\"\"\"\n", + "llm(prompt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Llama-v1`" + ] + }, { "cell_type": "code", "execution_count": 18, @@ -260,7 +334,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -366,7 +439,6 @@ "source": [] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -405,7 +477,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [