From d7c14cb6f9871056e17171fa161c7d73d0691b3f Mon Sep 17 00:00:00 2001
From: hulitaitai <146365078+hulitaitai@users.noreply.github.com>
Date: Tue, 26 Mar 2024 23:06:58 +0800
Subject: [PATCH] community[minor]: Add embeddings integration for text2vec
 (#19267)

Create a Class which allows to use the "text2vec" open source embedding
model.

It should install the model by running 'pip install -U text2vec'.
Example to call the model through LangChain:

from langchain_community.embeddings.text2vec import Text2vecEmbeddings

            embedding = Text2vecEmbeddings()
            bookend.embed_documents([
                "This is a CoSENT(Cosine Sentence) model.",
"It maps sentences to a 768 dimensional dense vector space.",
            ])
            bookend.embed_query(
                "It can be used for text matching or semantic search."
            )

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
---
 .../embeddings/text2vec.py                    | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 libs/community/langchain_community/embeddings/text2vec.py

diff --git a/libs/community/langchain_community/embeddings/text2vec.py b/libs/community/langchain_community/embeddings/text2vec.py
new file mode 100644
index 00000000000..98cf5498c14
--- /dev/null
+++ b/libs/community/langchain_community/embeddings/text2vec.py
@@ -0,0 +1,78 @@
+"""Wrapper around text2vec embedding models."""
+
+from typing import Any, List, Optional
+
+from langchain_core.embeddings import Embeddings
+from langchain_core.pydantic_v1 import BaseModel
+
+
+class Text2vecEmbeddings(Embeddings, BaseModel):
+    """text2vec embedding models.
+
+    Install text2vec first, run 'pip install -U text2vec'.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.embeddings.text2vec import Text2vecEmbeddings
+
+            embedding = Text2vecEmbeddings()
+            bookend.embed_documents([
+                "This is a CoSENT(Cosine Sentence) model.",
+                "It maps sentences to a 768 dimensional dense vector space.",
+            ])
+            bookend.embed_query(
+                "It can be used for text matching or semantic search."
+            )
+    """
+
+    model_name_or_path: Optional[str] = None
+    encoder_type: Any = "MEAN"
+    max_seq_length: int = 256
+    device: Optional[str] = None
+    model: Any = None
+
+    def __init__(
+        self,
+        *,
+        model: Any = None,
+        model_name_or_path: Optional[str] = None,
+        **kwargs: Any,
+    ):
+        try:
+            from text2vec import SentenceModel
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import text2vec, please install with "
+                "`pip install -U text2vec`."
+            ) from e
+
+        model_kwargs = {}
+        if model_name_or_path is not None:
+            model_kwargs["model_name_or_path"] = model_name_or_path
+        model = model or SentenceModel(**model_kwargs, **kwargs)
+        super().__init__(model=model, model_name_or_path=model_name_or_path, **kwargs)
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed documents using the text2vec embeddings model.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+
+        return self.model.encode(texts)
+
+    def embed_query(self, text: str) -> List[float]:
+        """Embed a query using the text2vec embeddings model.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+
+        return self.model.encode(text)