diff --git a/examples/obgpt_index.ipynb b/examples/obgpt_index.ipynb index e69de29bb..bb7b014a9 100644 --- a/examples/obgpt_index.ipynb +++ b/examples/obgpt_index.ipynb @@ -0,0 +1,132 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/magic/miniconda3/envs/gpt_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "from llama_index import LLMPredictor\n", + "import torch\n", + "from langchain.llms.base import LLM\n", + "from transformers import pipeline\n", + "\n", + "import os\n", + "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:512\"\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading checkpoint shards: 100%|██████████| 2/2 [00:34<00:00, 17.15s/it]\n" + ] + }, + { + "ename": "OutOfMemoryError", + "evalue": "CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mclass\u001b[39;00m \u001b[39mFlanLLM\u001b[39;00m(LLM):\n\u001b[1;32m 2\u001b[0m model_name \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m../../vicuna-7b\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m pipeline \u001b[39m=\u001b[39m pipeline(\u001b[39m\"\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m\"\u001b[39m, model\u001b[39m=\u001b[39mmodel_name, device\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, model_kwargs\u001b[39m=\u001b[39m{\n\u001b[1;32m 4\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m: torch\u001b[39m.\u001b[39mbfloat16\n\u001b[1;32m 5\u001b[0m })\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/main.py:221\u001b[0m, in \u001b[0;36mpydantic.main.ModelMetaclass.__new__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:506\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.infer\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:436\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.__init__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:546\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.prepare\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:570\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField._set_default_and_type\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/fields.py:439\u001b[0m, in \u001b[0;36mpydantic.fields.ModelField.get_default\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/pydantic/utils.py:693\u001b[0m, in \u001b[0;36mpydantic.utils.smart_deepcopy\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (1 times)]\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (2 times), _deepcopy_dict at line 230 (1 times), _reconstruct at line 270 (1 times), deepcopy at line 146 (1 times)]\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", + " \u001b[0;31m[... skipping similar frames: deepcopy at line 172 (7 times), _deepcopy_dict at line 230 (3 times), _reconstruct at line 270 (3 times), _reconstruct at line 296 (3 times), deepcopy at line 146 (3 times)]\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:270\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[39mif\u001b[39;00m state \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m \u001b[39mif\u001b[39;00m deep:\n\u001b[0;32m--> 270\u001b[0m state \u001b[39m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m 271\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(y, \u001b[39m'\u001b[39m\u001b[39m__setstate__\u001b[39m\u001b[39m'\u001b[39m):\n\u001b[1;32m 272\u001b[0m y\u001b[39m.\u001b[39m__setstate__(state)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:146\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 144\u001b[0m copier \u001b[39m=\u001b[39m _deepcopy_dispatch\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n\u001b[1;32m 145\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 146\u001b[0m y \u001b[39m=\u001b[39m copier(x, memo)\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39missubclass\u001b[39m(\u001b[39mcls\u001b[39m, \u001b[39mtype\u001b[39m):\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:230\u001b[0m, in \u001b[0;36m_deepcopy_dict\u001b[0;34m(x, memo, deepcopy)\u001b[0m\n\u001b[1;32m 228\u001b[0m memo[\u001b[39mid\u001b[39m(x)] \u001b[39m=\u001b[39m y\n\u001b[1;32m 229\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m x\u001b[39m.\u001b[39mitems():\n\u001b[0;32m--> 230\u001b[0m y[deepcopy(key, memo)] \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 231\u001b[0m \u001b[39mreturn\u001b[39;00m y\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:172\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 170\u001b[0m y \u001b[39m=\u001b[39m x\n\u001b[1;32m 171\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m y \u001b[39m=\u001b[39m _reconstruct(x, memo, \u001b[39m*\u001b[39;49mrv)\n\u001b[1;32m 174\u001b[0m \u001b[39m# If is its own copy, don't memoize.\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[39mif\u001b[39;00m y \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m x:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:296\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[39mfor\u001b[39;00m key, value \u001b[39min\u001b[39;00m dictiter:\n\u001b[1;32m 295\u001b[0m key \u001b[39m=\u001b[39m deepcopy(key, memo)\n\u001b[0;32m--> 296\u001b[0m value \u001b[39m=\u001b[39m deepcopy(value, memo)\n\u001b[1;32m 297\u001b[0m y[key] \u001b[39m=\u001b[39m value\n\u001b[1;32m 298\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/copy.py:153\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 151\u001b[0m copier \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(x, \u001b[39m\"\u001b[39m\u001b[39m__deepcopy__\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m 152\u001b[0m \u001b[39mif\u001b[39;00m copier \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 153\u001b[0m y \u001b[39m=\u001b[39m copier(memo)\n\u001b[1;32m 154\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m reductor \u001b[39m=\u001b[39m dispatch_table\u001b[39m.\u001b[39mget(\u001b[39mcls\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/gpt_env/lib/python3.9/site-packages/torch/nn/parameter.py:55\u001b[0m, in \u001b[0;36mParameter.__deepcopy__\u001b[0;34m(self, memo)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[39mreturn\u001b[39;00m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)]\n\u001b[1;32m 54\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 55\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdata\u001b[39m.\u001b[39;49mclone(memory_format\u001b[39m=\u001b[39;49mtorch\u001b[39m.\u001b[39;49mpreserve_format), \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequires_grad)\n\u001b[1;32m 56\u001b[0m memo[\u001b[39mid\u001b[39m(\u001b[39mself\u001b[39m)] \u001b[39m=\u001b[39m result\n\u001b[1;32m 57\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", + "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 23.62 GiB total capacity; 22.63 GiB already allocated; 37.56 MiB free; 22.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF" + ] + } + ], + "source": [ + "class FlanLLM(LLM):\n", + " model_name = \"../../vicuna-7b\"\n", + " pipeline = pipeline(\"text-generation\", model=model_name, device=0, model_kwargs={\n", + " \"torch_dtype\": torch.bfloat16\n", + " })\n", + "\n", + " def _call(self, prompt, stop=None):\n", + " return self.pipeline(prompt, max_length=9999)[0][\"generated_text\"]\n", + "\n", + " def _identifying_params(self):\n", + " return {\"name_of_model\": self.model_name}\n", + "\n", + " def _llm_type(self):\n", + " return \"custome\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_predictor = LLMPredictor(llm=FlanLLM())\n", + "hfemb = HuggingFaceEmbeddings()\n", + "embed_model = LangchainEmbedding(hfemb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "gpt_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}