Add AI21Embeddings class text embeddings (#2253 )

This pull request adds the AI21Embeddings class to the langchain/langchain/embeddings module, providing support for generating text embeddings using the AI21 language model. The new class offers a similar interface as the existing OpenAIEmbeddings class, making it easy to integrate into the existing codebase. The AI21Embeddings class has the following main methods: generate_embeddings(texts, model): Generates embeddings for a list of texts using the specified AI21 model. get_similarity(text1, text2, model): Calculates the similarity between two texts using the specified AI21 model. The implementation uses the AI21 Python SDK for interacting with the AI21 API and requires an API key for initialization. The class is designed to be flexible and easy to use, allowing users to switch between different AI21 models by simply providing the model name as an argument. This PR addresses issue #85 and provides an alternative to the existing OpenAI-based text embeddings. Please let me know if you have any questions or need further changes.
2026-01-30 13:50:11 +00:00 · 2023-04-03 14:26:58 -07:00
1 changed files with 104 additions and 0 deletions
--- a/langchain/embeddings/ai21.py
+++ b/langchain/embeddings/ai21.py
@@ -0,0 +1,104 @@
+from typing import Any, List, Optional, Union
+
+import ai21
+import numpy as np
+from scipy.spatial.distance import cosine
+
+from langchain.embeddings.base import Embeddings
+
+
+class AI21Embeddings(Embeddings):
+    def __init__(self, api_key: str):
+        """
+        Initialize AI21Embeddings with the provided API key.
+        """
+        ai21.api_key = api_key
+
+    def __enter__(self) -> "AI21Embeddings":
+        """
+        Enable usage of the 'with' statement for this class.
+        """
+        return self
+
+    def __exit__(
+        self,
+        exc_type: Optional[type],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[Any],
+    ) -> None:
+        """
+        Close the AI21Embeddings instance when used with the 'with' statement.
+        """
+        pass
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        embeddings = self.generate_embeddings(texts)
+        return [embedding.tolist() for embedding in embeddings]
+
+    def embed_query(self, text: str) -> List[float]:
+        embeddings = self.generate_embeddings([text])
+        return embeddings[0].tolist()
+
+    def generate_embeddings(
+        self, texts: List[str], model: str = "j2-grande-instruct"
+    ) -> List[np.ndarray]:
+        """
+        Generate embeddings for a list of texts using the specified model.
+
+        :param texts: A list of texts for which to generate embeddings.
+        :param model: The name of the AI21 model to use for generating embeddings.
+        :return: A list of numpy arrays containing the embeddings.
+        """
+        prompt = "\n".join(
+            [
+                f"Embed the following text as a 768-dimensional vector: {text}"
+                for text in texts
+            ]
+        )
+
+        try:
+            response: dict = ai21.Completion.execute(
+                model=model,
+                prompt=prompt,
+                numResults=1,
+                maxTokens=768 * len(texts),
+                temperature=0,
+                topKReturn=0,
+                topP=1,
+            )
+            tokens = response["completions"][0]["data"]["tokens"]
+            embeddings = [
+                np.array(
+                    [
+                        token["generatedToken"]["logprob"]
+                        for token in tokens[i * 768 : (i + 1) * 768]
+                    ]
+                )
+                for i in range(len(texts))
+            ]
+            return embeddings
+        except Exception as e:
+            print(f"Error while generating embeddings: {e}")
+            return []
+
+    def get_similarity(
+        self, text1: str, text2: str, model: str = "j2-grande-instruct"
+    ) -> Union[float, None]:
+        """
+        Calculate the similarity between two texts using the specified model.
+
+        :param text1: The first text.
+        :param text2: The second text.
+        :param model: The name of the AI21 model to use for generating embeddings.
+        :return: A float representing the similarity between the two texts.
+        """
+        emb1, emb2 = self.generate_embeddings([text1, text2], model=model)
+        if not emb1 or not emb2:
+            return None
+
+        try:
+            similarity = 1 - cosine(emb1, emb2)
+            return similarity
+        except Exception as e:
+            print(f"Error while calculating similarity: {e}")
+            return None