mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-30 23:29:54 +00:00 
			
		
		
		
	- Updated errors in the AtlasDB vector store documentation - Removed extraneous output logs in example notebook.
		
			
				
	
	
		
			201 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			201 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "# AtlasDB\n",
 | |
|     "\n",
 | |
|     "This notebook shows you how to use functionality related to the AtlasDB"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {
 | |
|     "pycharm": {
 | |
|      "is_executing": true
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "import time\n",
 | |
|     "from langchain.embeddings.openai import OpenAIEmbeddings\n",
 | |
|     "from langchain.text_splitter import SpacyTextSplitter\n",
 | |
|     "from langchain.vectorstores import AtlasDB\n",
 | |
|     "from langchain.document_loaders import TextLoader"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {
 | |
|     "scrolled": true,
 | |
|     "pycharm": {
 | |
|      "is_executing": true
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "!python -m spacy download en_core_web_sm"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 3,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "ATLAS_TEST_API_KEY = '7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6'"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 4,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "loader = TextLoader('../../state_of_the_union.txt')\n",
 | |
|     "documents = loader.load()\n",
 | |
|     "text_splitter = SpacyTextSplitter(separator='|')\n",
 | |
|     "texts = []\n",
 | |
|     "for doc in text_splitter.split_documents(documents):\n",
 | |
|     "    texts.extend(doc.page_content.split('|'))\n",
 | |
|     "                 \n",
 | |
|     "texts = [e.strip() for e in texts]"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {
 | |
|     "pycharm": {
 | |
|      "is_executing": true
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "db = AtlasDB.from_texts(texts=texts,\n",
 | |
|     "                        name='test_index_'+str(time.time()), # unique name for your vector store\n",
 | |
|     "                        description='test_index', #a description for your vector store\n",
 | |
|     "                        api_key=ATLAS_TEST_API_KEY,\n",
 | |
|     "                        index_kwargs={'build_topic_model': True})"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "db.project.wait_for_project_lock()"
 | |
|    ],
 | |
|    "metadata": {
 | |
|     "collapsed": false
 | |
|    }
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 7,
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/html": [
 | |
|        "\n",
 | |
|        "            <strong><a href=\"https://atlas.nomic.ai/dashboard/project/ee2354a3-7f9a-4c6b-af43-b0cda09d7198\">test_index_1677255228.136989</strong></a>\n",
 | |
|        "            <br>\n",
 | |
|        "            A description for your project 508 datums inserted.\n",
 | |
|        "            <br>\n",
 | |
|        "            1 index built.\n",
 | |
|        "            <br><strong>Projections</strong>\n",
 | |
|        "<ul>\n",
 | |
|        "<li>test_index_1677255228.136989_index. Status Completed. <a target=\"_blank\" href=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\">view online</a></li></ul><hr><script>\n",
 | |
|        "            destroy = function() {\n",
 | |
|        "                document.getElementById(\"iframedb996d77-8981-48a0-897a-ff2c22bbf541\").remove()\n",
 | |
|        "            }\n",
 | |
|        "        </script>\n",
 | |
|        "\n",
 | |
|        "        <h4>Projection ID: db996d77-8981-48a0-897a-ff2c22bbf541</h4>\n",
 | |
|        "        <div class=\"actions\">\n",
 | |
|        "            <div id=\"hide\" class=\"action\" onclick=\"destroy()\">Hide embedded project</div>\n",
 | |
|        "            <div class=\"action\" id=\"out\">\n",
 | |
|        "                <a href=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\" target=\"_blank\">Explore on atlas.nomic.ai</a>\n",
 | |
|        "            </div>\n",
 | |
|        "        </div>\n",
 | |
|        "        \n",
 | |
|        "        <iframe class=\"iframe\" id=\"iframedb996d77-8981-48a0-897a-ff2c22bbf541\" allow=\"clipboard-read; clipboard-write\" src=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\">\n",
 | |
|        "        </iframe>\n",
 | |
|        "\n",
 | |
|        "        <style>\n",
 | |
|        "            .iframe {\n",
 | |
|        "                /* vh can be **very** large in vscode ipynb. */\n",
 | |
|        "                height: min(75vh, 66vw);\n",
 | |
|        "                width: 100%;\n",
 | |
|        "            }\n",
 | |
|        "        </style>\n",
 | |
|        "        \n",
 | |
|        "        <style>\n",
 | |
|        "            .actions {\n",
 | |
|        "              display: block;\n",
 | |
|        "            }\n",
 | |
|        "            .action {\n",
 | |
|        "              min-height: 18px;\n",
 | |
|        "              margin: 5px;\n",
 | |
|        "              transition: all 500ms ease-in-out;\n",
 | |
|        "            }\n",
 | |
|        "            .action:hover {\n",
 | |
|        "              cursor: pointer;\n",
 | |
|        "            }\n",
 | |
|        "            #hide:hover::after {\n",
 | |
|        "                content: \" X\";\n",
 | |
|        "            }\n",
 | |
|        "            #out:hover::after {\n",
 | |
|        "                content: \"\";\n",
 | |
|        "            }\n",
 | |
|        "        </style>\n",
 | |
|        "        "
 | |
|       ],
 | |
|       "text/plain": [
 | |
|        "AtlasProject: <{'id': 'ee2354a3-7f9a-4c6b-af43-b0cda09d7198', 'owner': '9c29afbb-a002-4d49-958e-ecf5ae1351ac', 'project_name': 'test_index_1677255228.136989', 'creator': 'auth0|63efc4b5462246f4d9a6ecf2', 'description': 'A description for your project', 'opensearch_index_id': 'f61fb8dd-0abf-4f31-9130-41870e443902', 'is_public': True, 'project_fields': ['atlas_id', 'text'], 'unique_id_field': 'atlas_id', 'modality': 'text', 'total_datums_in_project': 508, 'created_timestamp': '2023-02-24T16:13:50.313363+00:00', 'atlas_indices': [{'id': 'b1b01833-0964-4597-a4bc-a2d60700949d', 'project_id': 'ee2354a3-7f9a-4c6b-af43-b0cda09d7198', 'index_name': 'test_index_1677255228.136989_index', 'indexed_field': 'text', 'created_timestamp': '2023-02-24T16:13:52.957101+00:00', 'updated_timestamp': '2023-02-24T16:14:03.469621+00:00', 'atoms': ['charchunk', 'document'], 'colorable_fields': [], 'embedders': [{'id': '7ec0868a-4eed-4414-a482-25cce9803e1b', 'atlas_index_id': 'b1b01833-0964-4597-a4bc-a2d60700949d', 'ready': True, 'model_name': 'NomicEmbed', 'hyperparameters': {'norm': 'both', 'batch_size': 20, 'polymerize_by': 'charchunk', 'dataset_buffer_size': 1000}}], 'nearest_neighbor_indices': [{'id': '86f8e3ff-e07c-4678-a4d7-144db4b0301d', 'index_name': 'NomicOrganize', 'ready': True, 'hyperparameters': {'dim': 384, 'space': 'l2'}, 'atom_strategies': ['document']}], 'projections': [{'id': 'db996d77-8981-48a0-897a-ff2c22bbf541', 'projection_name': 'NomicProject', 'ready': True, 'hyperparameters': {'spread': 1.0, 'n_epochs': 50, 'n_neighbors': 15}, 'atom_strategies': ['document'], 'created_timestamp': '2023-02-24T16:13:52.979561+00:00', 'updated_timestamp': '2023-02-24T16:14:03.466309+00:00'}]}], 'insert_update_delete_lock': False}>"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 7,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "db.project"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.9.4"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 1
 | |
| }
 |