diff --git a/docs/modules/models/text_embedding/examples/elasticsearch.ipynb b/docs/modules/models/text_embedding/examples/elasticsearch.ipynb index a9aa79884da..3a9b6b7d0ab 100644 --- a/docs/modules/models/text_embedding/examples/elasticsearch.ipynb +++ b/docs/modules/models/text_embedding/examples/elasticsearch.ipynb @@ -1,124 +1,252 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "1eZl1oaVUNeC" + }, + "source": [ + "# Elasticsearch\n", + "Walkthrough of how to generate embeddings using a hosted embedding model in Elasticsearch\n", + "\n", + "The easiest way to instantiate the `ElasticsearchEmebddings` class it either\n", + "- using the `from_credentials` constructor if you are using Elastic Cloud\n", + "- or using the `from_es_connection` constructor with any Elasticsearch cluster" + ] }, - "cells": [ - { - "cell_type": "code", - "source": [ - "!pip -q install elasticsearch langchain" - ], - "metadata": { - "id": "6dJxqebov4eU" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "import elasticsearch\n", - "from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings" - ], - "metadata": { - "id": "RV7C3DUmv4aq" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Define the model ID\n", - "model_id = 'your_model_id'" - ], - "metadata": { - "id": "MrT3jplJvp09" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Instantiate ElasticsearchEmbeddings using credentials\n", - "embeddings = ElasticsearchEmbeddings.from_credentials(\n", - " model_id,\n", - " es_cloud_id='your_cloud_id', \n", - " es_user='your_user', \n", - " es_password='your_password'\n", - ")\n" - ], - "metadata": { - "id": "svtdnC-dvpxR" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Create embeddings for multiple documents\n", - "documents = [\n", - " 'This is an example document.', \n", - " 'Another example document to generate embeddings for.'\n", - "]\n", - "document_embeddings = embeddings.embed_documents(documents)\n" - ], - "metadata": { - "id": "7DXZAK7Kvpth" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Print document embeddings\n", - "for i, embedding in enumerate(document_embeddings):\n", - " print(f\"Embedding for document {i+1}: {embedding}\")\n" - ], - "metadata": { - "id": "K8ra75W_vpqy" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Create an embedding for a single query\n", - "query = 'This is a single query.'\n", - "query_embedding = embeddings.embed_query(query)\n" - ], - "metadata": { - "id": "V4Q5kQo9vpna" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Print query embedding\n", - "print(f\"Embedding for query: {query_embedding}\")\n" - ], - "metadata": { - "id": "O0oQDzGKvpkz" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6dJxqebov4eU" + }, + "outputs": [], + "source": [ + "!pip -q install elasticsearch langchain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RV7C3DUmv4aq" + }, + "outputs": [], + "source": [ + "import elasticsearch\n", + "from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MrT3jplJvp09" + }, + "outputs": [], + "source": [ + "# Define the model ID\n", + "model_id = 'your_model_id'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j5F-nwLVS_Zu" + }, + "source": [ + "## Testing with `from_credentials`\n", + "This required an Elastic Cloud `cloud_id`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "svtdnC-dvpxR" + }, + "outputs": [], + "source": [ + "# Instantiate ElasticsearchEmbeddings using credentials\n", + "embeddings = ElasticsearchEmbeddings.from_credentials(\n", + " model_id,\n", + " es_cloud_id='your_cloud_id', \n", + " es_user='your_user', \n", + " es_password='your_password'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7DXZAK7Kvpth" + }, + "outputs": [], + "source": [ + "# Create embeddings for multiple documents\n", + "documents = [\n", + " 'This is an example document.', \n", + " 'Another example document to generate embeddings for.'\n", + "]\n", + "document_embeddings = embeddings.embed_documents(documents)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K8ra75W_vpqy" + }, + "outputs": [], + "source": [ + "# Print document embeddings\n", + "for i, embedding in enumerate(document_embeddings):\n", + " print(f\"Embedding for document {i+1}: {embedding}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V4Q5kQo9vpna" + }, + "outputs": [], + "source": [ + "# Create an embedding for a single query\n", + "query = 'This is a single query.'\n", + "query_embedding = embeddings.embed_query(query)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O0oQDzGKvpkz" + }, + "outputs": [], + "source": [ + "# Print query embedding\n", + "print(f\"Embedding for query: {query_embedding}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rHN03yV6TJ5q" + }, + "source": [ + "## Testing with Existing Elasticsearch client connection\n", + "This can be used with any Elasticsearch deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GMQcJDwBTJFm" + }, + "outputs": [], + "source": [ + "# Create Elasticsearch connection\n", + "es_connection = Elasticsearch(\n", + " hosts=['https://es_cluster_url:port'], \n", + " basic_auth=('user', 'password')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WTYIU4u3TJO1" + }, + "outputs": [], + "source": [ + "# Instantiate ElasticsearchEmbeddings using es_connection\n", + "embeddings = ElasticsearchEmbeddings.from_es_connection(\n", + " model_id,\n", + " es_connection,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4gdAUHwoTJO3" + }, + "outputs": [], + "source": [ + "# Create embeddings for multiple documents\n", + "documents = [\n", + " 'This is an example document.', \n", + " 'Another example document to generate embeddings for.'\n", + "]\n", + "document_embeddings = embeddings.embed_documents(documents)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RC_-tov6TJO3" + }, + "outputs": [], + "source": [ + "# Print document embeddings\n", + "for i, embedding in enumerate(document_embeddings):\n", + " print(f\"Embedding for document {i+1}: {embedding}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6GEnHBqETJO3" + }, + "outputs": [], + "source": [ + "# Create an embedding for a single query\n", + "query = 'This is a single query.'\n", + "query_embedding = embeddings.embed_query(query)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-kyUQAXDTJO4" + }, + "outputs": [], + "source": [ + "# Print query embedding\n", + "print(f\"Embedding for query: {query_embedding}\")\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/langchain/embeddings/elasticsearch.py b/langchain/embeddings/elasticsearch.py index 78d7dec04cf..9d3b1192310 100644 --- a/langchain/embeddings/elasticsearch.py +++ b/langchain/embeddings/elasticsearch.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, List, Optional from langchain.utils import get_from_env if TYPE_CHECKING: + from elasticsearch import Elasticsearch from elasticsearch.client import MlClient from langchain.embeddings.base import Embeddings @@ -110,6 +111,68 @@ class ElasticsearchEmbeddings(Embeddings): client = MlClient(es_connection) return cls(client, model_id, input_field=input_field) + @classmethod + def from_es_connection( + cls, + model_id: str, + es_connection: Elasticsearch, + input_field: str = "text_field", + ) -> ElasticsearchEmbeddings: + """ + Instantiate embeddings from an existing Elasticsearch connection. + + This method provides a way to create an instance of the ElasticsearchEmbeddings + class using an existing Elasticsearch connection. The connection object is used + to create an MlClient, which is then used to initialize the + ElasticsearchEmbeddings instance. + + Args: + model_id (str): The model_id of the model deployed in the Elasticsearch cluster. + es_connection (elasticsearch.Elasticsearch): An existing Elasticsearch + connection object. input_field (str, optional): The name of the key for the + input text field in the document. Defaults to 'text_field'. + + Returns: + ElasticsearchEmbeddings: An instance of the ElasticsearchEmbeddings class. + + Example Usage: + from elasticsearch import Elasticsearch + from langchain.embeddings import ElasticsearchEmbeddings + + # Define the model ID and input field name (if different from default) + model_id = "your_model_id" + # Optional, only if different from 'text_field' + input_field = "your_input_field" + + # Create Elasticsearch connection + es_connection = Elasticsearch( + hosts=["localhost:9200"], http_auth=("user", "password") + ) + + # Instantiate ElasticsearchEmbeddings using the existing connection + embeddings = ElasticsearchEmbeddings.from_es_connection( + model_id, + es_connection, + input_field=input_field, + ) + + documents = [ + "This is an example document.", + "Another example document to generate embeddings for.", + ] + embeddings_generator.embed_documents(documents) + """ + # Importing MlClient from elasticsearch.client within the method to + # avoid unnecessary import if the method is not used + from elasticsearch.client import MlClient + + # Create an MlClient from the given Elasticsearch connection + client = MlClient(es_connection) + + # Return a new instance of the ElasticsearchEmbeddings class with + # the MlClient, model_id, and input_field + return cls(client, model_id, input_field=input_field) + def _embedding_func(self, texts: List[str]) -> List[List[float]]: """ Generate embeddings for the given texts using the Elasticsearch model.