{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# AtlasDB\n",
"\n",
"This notebook shows you how to use functionality related to the AtlasDB"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import time\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import SpacyTextSplitter\n",
"from langchain.vectorstores import AtlasDB\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting en-core-web-sm==3.5.0\n",
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m12.8/12.8 MB\u001B[0m \u001B[31m90.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\n",
"\u001B[?25hRequirement already satisfied: spacy<3.6.0,>=3.5.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from en-core-web-sm==3.5.0) (3.5.0)\n",
"Requirement already satisfied: packaging>=20.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (23.0)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.1.1)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.3.0)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.4.5)\n",
"Requirement already satisfied: pathy>=0.10.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.10.1)\n",
"Requirement already satisfied: setuptools in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (67.4.0)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (4.64.1)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.0.4)\n",
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (6.3.0)\n",
"Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (8.1.7)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.0.7)\n",
"Requirement already satisfied: typer<0.8.0,>=0.3.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.7.0)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.28.2)\n",
"Requirement already satisfied: jinja2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.1.2)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.10.5)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.0.8)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.12)\n",
"Requirement already satisfied: numpy>=1.15.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.24.2)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.0.9)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.8)\n",
"Requirement already satisfied: typing-extensions>=4.2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (4.5.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.1)\n",
"Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2022.12.7)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.26.14)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.7.9)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.0.4)\n",
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from typer<0.8.0,>=0.3.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (8.1.3)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from jinja2->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.1.2)\n",
"\n",
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.0.1\u001B[0m\n",
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n",
"\u001B[38;5;2m✔ Download and installation successful\u001B[0m\n",
"You can now load the package via spacy.load('en_core_web_sm')\n"
]
}
],
"source": [
"!python -m spacy download en_core_web_sm"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"ATLAS_TEST_API_KEY = '7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"loader = TextLoader('../../state_of_the_union.txt')\n",
"documents = loader.load()\n",
"text_splitter = SpacyTextSplitter(separator='|')\n",
"texts = []\n",
"for doc in text_splitter.split_documents(documents):\n",
" texts.extend(doc.page_content.split('|'))\n",
" \n",
"texts = [e.strip() for e in texts]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-02-24 16:13:49.696 | INFO | nomic.project:_create_project:884 - Creating project `test_index_1677255228.136989` in organization `Atlas Demo`\n",
"2023-02-24 16:13:51.087 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
"2023-02-24 16:13:51.225 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
"2023-02-24 16:13:51.481 | INFO | nomic.project:add_text:1351 - Uploading text to Atlas.\n",
"1it [00:00, 1.20it/s]\n",
"2023-02-24 16:13:52.318 | INFO | nomic.project:add_text:1422 - Text upload succeeded.\n",
"2023-02-24 16:13:52.628 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
"2023-02-24 16:13:53.380 | INFO | nomic.project:create_index:1192 - Created map `test_index_1677255228.136989_index` in project `test_index_1677255228.136989`: https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\n"
]
}
],
"source": [
"db = AtlasDB.from_texts(texts=texts,\n",
" name='test_index_'+str(time.time()),\n",
" description='test_index',\n",
" api_key=ATLAS_TEST_API_KEY,\n",
" index_kwargs={'build_topic_model': True})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-02-24 16:14:09.106 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n"
]
}
],
"source": [
"with db.project.wait_for_project_lock():\n",
" time.sleep(1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" test_index_1677255228.136989\n",
"
\n",
" A description for your project 508 datums inserted.\n",
"
\n",
" 1 index built.\n",
"
Projections\n",
"