{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# AtlasDB\n",
    "\n",
    "This notebook shows you how to use functionality related to the AtlasDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "import time\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.text_splitter import SpacyTextSplitter\n",
    "from langchain.vectorstores import AtlasDB\n",
    "from langchain.document_loaders import TextLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true,
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "!python -m spacy download en_core_web_sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ATLAS_TEST_API_KEY = '7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader('../../state_of_the_union.txt')\n",
    "documents = loader.load()\n",
    "text_splitter = SpacyTextSplitter(separator='|')\n",
    "texts = []\n",
    "for doc in text_splitter.split_documents(documents):\n",
    "    texts.extend(doc.page_content.split('|'))\n",
    "                 \n",
    "texts = [e.strip() for e in texts]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "db = AtlasDB.from_texts(texts=texts,\n",
    "                        name='test_index_'+str(time.time()), # unique name for your vector store\n",
    "                        description='test_index', #a description for your vector store\n",
    "                        api_key=ATLAS_TEST_API_KEY,\n",
    "                        index_kwargs={'build_topic_model': True})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "db.project.wait_for_project_lock()"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            test_index_1677255228.136989\n",
       "            
\n",
       "            A description for your project 508 datums inserted.\n",
       "            
\n",
       "            1 index built.\n",
       "            
Projections\n",
       "