mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-07 22:11:51 +00:00
community[minor]: integrate China Mobile Ecloud vector search (#15298)
- **Description:** integrate China Mobile Ecloud vector search, - **Dependencies:** elasticsearch==7.10.1 Co-authored-by: liuyongheng <liuyongheng@cmss.chinamobile.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
317
docs/docs/integrations/vectorstores/ecloud_vector_search.ipynb
Normal file
317
docs/docs/integrations/vectorstores/ecloud_vector_search.ipynb
Normal file
@@ -0,0 +1,317 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# China Mobile ECloud ElasticSearch VectorSearch\n",
|
||||
"\n",
|
||||
">[China Mobile ECloud VectorSearch](https://ecloud.10086.cn/portal/product/elasticsearch) is a fully managed, enterprise-level distributed search and analysis service. China Mobile ECloud VectorSearch provides low-cost, high-performance, and reliable retrieval and analysis platform level product services for structured/unstructured data. As a vector database , it supports multiple index types and similarity distance methods. \n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `ECloud ElasticSearch VectorStore`.\n",
|
||||
"To run, you should have an [China Mobile ECloud VectorSearch](https://ecloud.10086.cn/portal/product/elasticsearch) instance up and running:\n",
|
||||
"\n",
|
||||
"Read the [help document](https://ecloud.10086.cn/op-help-center/doc/category/1094) to quickly familiarize and configure China Mobile ECloud ElasticSearch instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After the instance is up and running, follow these steps to split documents, get embeddings, connect to the baidu cloud elasticsearch instance, index documents, and perform vector retrieval."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install elasticsearch == 7.10.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, we want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Secondly, split documents and get embeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import EcloudESVectorStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"ES_URL = \"http://localhost:9200\"\n",
|
||||
"USER = \"your user name\"\n",
|
||||
"PASSWORD = \"your password\"\n",
|
||||
"indexname = \"your index name\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"then, index documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = EcloudESVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" es_url=ES_URL,\n",
|
||||
" user=USER,\n",
|
||||
" password=PASSWORD,\n",
|
||||
" index_name=indexname,\n",
|
||||
" refresh_indices=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, Query and retrive data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query, k=10)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A commonly used case"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test_dense_float_vectore_lsh_cosine() -> None:\n",
|
||||
" \"\"\"\n",
|
||||
" Test indexing with vectore type knn_dense_float_vector and model-similarity of lsh-cosine\n",
|
||||
" this mapping is compatible with model of exact and similarity of l2/cosine\n",
|
||||
" this mapping is compatible with model of lsh and similarity of cosine\n",
|
||||
" \"\"\"\n",
|
||||
" docsearch = EcloudESVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" es_url=ES_URL,\n",
|
||||
" user=USER,\n",
|
||||
" password=PASSWORD,\n",
|
||||
" index_name=indexname,\n",
|
||||
" refresh_indices=True,\n",
|
||||
" text_field=\"my_text\",\n",
|
||||
" vector_field=\"my_vec\",\n",
|
||||
" vector_type=\"knn_dense_float_vector\",\n",
|
||||
" vector_params={\"model\": \"lsh\", \"similarity\": \"cosine\", \"L\": 99, \"k\": 1},\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"exact\",\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)\n",
|
||||
"\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"exact\",\n",
|
||||
" \"similarity\": \"l2\",\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)\n",
|
||||
"\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"exact\",\n",
|
||||
" \"similarity\": \"cosine\",\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)\n",
|
||||
"\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"lsh\",\n",
|
||||
" \"similarity\": \"cosine\",\n",
|
||||
" \"candidates\": 10,\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With filter case"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test_dense_float_vectore_exact_with_filter() -> None:\n",
|
||||
" \"\"\"\n",
|
||||
" Test indexing with vectore type knn_dense_float_vector and default model/similarity\n",
|
||||
" this mapping is compatible with model of exact and similarity of l2/cosine\n",
|
||||
" \"\"\"\n",
|
||||
" docsearch = EcloudESVectorStore.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" es_url=ES_URL,\n",
|
||||
" user=USER,\n",
|
||||
" password=PASSWORD,\n",
|
||||
" index_name=indexname,\n",
|
||||
" refresh_indices=True,\n",
|
||||
" text_field=\"my_text\",\n",
|
||||
" vector_field=\"my_vec\",\n",
|
||||
" vector_type=\"knn_dense_float_vector\",\n",
|
||||
" )\n",
|
||||
" # filter={\"match_all\": {}} ,default\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" filter={\"match_all\": {}},\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"exact\",\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)\n",
|
||||
"\n",
|
||||
" # filter={\"term\": {\"my_text\": \"Jackson\"}}\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" filter={\"term\": {\"my_text\": \"Jackson\"}},\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"exact\",\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)\n",
|
||||
"\n",
|
||||
" # filter={\"term\": {\"my_text\": \"president\"}}\n",
|
||||
" docs = docsearch.similarity_search(\n",
|
||||
" query,\n",
|
||||
" k=10,\n",
|
||||
" filter={\"term\": {\"my_text\": \"president\"}},\n",
|
||||
" search_params={\n",
|
||||
" \"model\": \"exact\",\n",
|
||||
" \"similarity\": \"l2\",\n",
|
||||
" \"vector_field\": \"my_vec\",\n",
|
||||
" \"text_field\": \"my_text\",\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
Reference in New Issue
Block a user