From 95883a99a9ee9010e76f72b614c138bfd42500e7 Mon Sep 17 00:00:00 2001
From: Gabriele Ghisleni <74197369+GabrieleGhisleni@users.noreply.github.com>
Date: Thu, 6 Jun 2024 16:36:43 +0200
Subject: [PATCH] docs: ElasticsearchCacheStore in stores integrations
 documentation  (#22612)

The package for LangChain integrations with Elasticsearch
https://github.com/langchain-ai/langchain-elastic contains a
Elasticsearch byte store cache integration (see
https://github.com/langchain-ai/langchain-elastic/pull/27). This is the
documentation contribution on the page dedicated to stores integrations

Co-authored-by: Gabriele Ghisleni <gabriele.ghisleni@spaziodati.eu>
---
 .../integrations/stores/elasticsearch.ipynb   | 139 ++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 docs/docs/integrations/stores/elasticsearch.ipynb

diff --git a/docs/docs/integrations/stores/elasticsearch.ipynb b/docs/docs/integrations/stores/elasticsearch.ipynb
new file mode 100644
index 00000000000..f55919c23d6
--- /dev/null
+++ b/docs/docs/integrations/stores/elasticsearch.ipynb
@@ -0,0 +1,139 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Elasticsearch \n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ElasticsearchEmbeddingsCache\n",
+    "\n",
+    "The `ElasticsearchEmbeddingsCache` is a `ByteStore` implementation that uses your Elasticsearch instance for efficient storage and retrieval of embeddings.\n",
+    "\n",
+    "\n",
+    "First install the LangChain integration with Elasticsearch."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U langchain-elasticsearch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "it can be instantiated using `CacheBackedEmbeddings.from_bytes_store` method."
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import CacheBackedEmbeddings\n",
+    "from langchain_elasticsearch import ElasticsearchEmbeddingsCache\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "underlying_embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
+    "\n",
+    "store = ElasticsearchEmbeddingsCache(\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"llm-chat-cache\",\n",
+    "    metadata={\"project\": \"my_chatgpt_project\"},\n",
+    "    namespace=\"my_chatgpt_project\",\n",
+    ")\n",
+    "\n",
+    "embeddings = CacheBackedEmbeddings.from_bytes_store(\n",
+    "    underlying_embeddings=OpenAIEmbeddings(),\n",
+    "    document_embedding_cache=store,\n",
+    "    query_embedding_cache=store,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The index_name parameter can also accept aliases. This allows to use the ILM: Manage the index lifecycle that we suggest to consider for managing retention and controlling cache growth.\n",
+    "\n",
+    "Look at the class docstring for all parameters."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Index the generated vectors\n",
+    "The cached vectors won't be searchable by default. The developer can customize the building of the Elasticsearch document in order to add indexed vector field.\n",
+    "\n",
+    "This can be done by subclassing end overriding methods. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict, List\n",
+    "\n",
+    "from langchain_elasticsearch import ElasticsearchEmbeddingsCache\n",
+    "\n",
+    "\n",
+    "class SearchableElasticsearchStore(ElasticsearchEmbeddingsCache):\n",
+    "    @property\n",
+    "    def mapping(self) -> Dict[str, Any]:\n",
+    "        mapping = super().mapping\n",
+    "        mapping[\"mappings\"][\"properties\"][\"vector\"] = {\n",
+    "            \"type\": \"dense_vector\",\n",
+    "            \"dims\": 1536,\n",
+    "            \"index\": True,\n",
+    "            \"similarity\": \"dot_product\",\n",
+    "        }\n",
+    "        return mapping\n",
+    "\n",
+    "    def build_document(self, llm_input: str, vector: List[float]) -> Dict[str, Any]:\n",
+    "        body = super().build_document(llm_input, vector)\n",
+    "        body[\"vector\"] = vector\n",
+    "        return body"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "When overriding the mapping and the document building, please only make additive modifications, keeping the base mapping intact."
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}