langchain/libs/community/langchain_community/embeddings/clova.py

from __future__ import annotations

from typing import Any, Dict, List, Optional, cast

import requests
from langchain_core._api.deprecation import deprecated
from langchain_core.embeddings import Embeddings
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
from pydantic import BaseModel, ConfigDict, SecretStr, model_validator


@deprecated(
    since="0.3.4",
    removal="1.0.0",
    alternative_import="langchain_community.ClovaXEmbeddings",
)
class ClovaEmbeddings(BaseModel, Embeddings):
    """
    Clova's embedding service.

    To use this service,

    you should have the following environment variables
    set with your API tokens and application ID,
    or pass them as named parameters to the constructor:

    - ``CLOVA_EMB_API_KEY``: API key for accessing Clova's embedding service.
    - ``CLOVA_EMB_APIGW_API_KEY``: API gateway key for enhanced security.
    - ``CLOVA_EMB_APP_ID``: Application ID for identifying your application.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import ClovaEmbeddings
            embeddings = ClovaEmbeddings(
                clova_emb_api_key='your_clova_emb_api_key',
                clova_emb_apigw_api_key='your_clova_emb_apigw_api_key',
                app_id='your_app_id'
            )

            query_text = "This is a test query."
            query_result = embeddings.embed_query(query_text)

            document_text = "This is a test document."
            document_result = embeddings.embed_documents([document_text])

    """

    endpoint_url: str = (
        "https://clovastudio.apigw.ntruss.com/testapp/v1/api-tools/embedding"
    )
    """Endpoint URL to use."""
    model: str = "clir-emb-dolphin"
    """Embedding model name to use."""
    clova_emb_api_key: Optional[SecretStr] = None
    """API key for accessing Clova's embedding service."""
    clova_emb_apigw_api_key: Optional[SecretStr] = None
    """API gateway key for enhanced security."""
    app_id: Optional[SecretStr] = None
    """Application ID for identifying your application."""

    model_config = ConfigDict(
        extra="forbid",
    )

    @model_validator(mode="before")
    @classmethod
    def validate_environment(cls, values: Dict) -> Any:
        """Validate api key exists in environment."""
        values["clova_emb_api_key"] = convert_to_secret_str(
            get_from_dict_or_env(values, "clova_emb_api_key", "CLOVA_EMB_API_KEY")
        )
        values["clova_emb_apigw_api_key"] = convert_to_secret_str(
            get_from_dict_or_env(
                values, "clova_emb_apigw_api_key", "CLOVA_EMB_APIGW_API_KEY"
            )
        )
        values["app_id"] = convert_to_secret_str(
            get_from_dict_or_env(values, "app_id", "CLOVA_EMB_APP_ID")
        )
        return values

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """
        Embed a list of texts and return their embeddings.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        """
        embeddings = []
        for text in texts:
            embeddings.append(self._embed_text(text))
        return embeddings

    def embed_query(self, text: str) -> List[float]:
        """
        Embed a single query text and return its embedding.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        return self._embed_text(text)

    def _embed_text(self, text: str) -> List[float]:
        """
        Internal method to call the embedding API and handle the response.
        """
        payload = {"text": text}

        # HTTP headers for authorization
        headers = {
            "X-NCP-CLOVASTUDIO-API-KEY": cast(
                SecretStr, self.clova_emb_api_key
            ).get_secret_value(),
            "X-NCP-APIGW-API-KEY": cast(
                SecretStr, self.clova_emb_apigw_api_key
            ).get_secret_value(),
            "Content-Type": "application/json",
        }

        # send request
        app_id = cast(SecretStr, self.app_id).get_secret_value()
        response = requests.post(
            f"{self.endpoint_url}/{self.model}/{app_id}",
            headers=headers,
            json=payload,
        )

        # check for errors
        if response.status_code == 200:
            response_data = response.json()
            if "result" in response_data and "embedding" in response_data["result"]:
                return response_data["result"]["embedding"]
        raise ValueError(
            f"API request failed with status {response.status_code}: {response.text}"
        )