From 3ecb903d49b44abc1e7a0fa6300b3340fdf38b6f Mon Sep 17 00:00:00 2001 From: Phat Vo <50920961+phtvo@users.noreply.github.com> Date: Fri, 8 Mar 2024 10:47:44 +0700 Subject: [PATCH] community[patch] : Tidy up and update Clarifai SDK functions (#18314) Description : * Tidy up, add missing docstring and fix unused params * Enable using session token --- .../embeddings/clarifai.py | 85 +++++++------------ .../langchain_community/llms/clarifai.py | 77 +++++++---------- .../vectorstores/clarifai.py | 83 ++++++++++++------ 3 files changed, 117 insertions(+), 128 deletions(-) diff --git a/libs/community/langchain_community/embeddings/clarifai.py b/libs/community/langchain_community/embeddings/clarifai.py index f3dd4022e01..e846df927a7 100644 --- a/libs/community/langchain_community/embeddings/clarifai.py +++ b/libs/community/langchain_community/embeddings/clarifai.py @@ -1,9 +1,8 @@ import logging -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from langchain_core.embeddings import Embeddings -from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator logger = logging.getLogger(__name__) @@ -37,8 +36,11 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): """Clarifai application id to use.""" user_id: Optional[str] = None """Clarifai user id to use.""" - pat: Optional[str] = None + pat: Optional[str] = Field(default=None, exclude=True) """Clarifai personal access token to use.""" + token: Optional[str] = Field(default=None, exclude=True) + """Clarifai session token to use.""" + model: Any = Field(default=None, exclude=True) #: :meta private: api_base: str = "https://api.clarifai.com" class Config: @@ -51,21 +53,32 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): """Validate that we have all required info to access Clarifai platform and python package exists in environment.""" - values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT") + try: + from clarifai.client.model import Model + except ImportError: + raise ImportError( + "Could not import clarifai python package. " + "Please install it with `pip install clarifai`." + ) user_id = values.get("user_id") app_id = values.get("app_id") model_id = values.get("model_id") + model_version_id = values.get("model_version_id") model_url = values.get("model_url") + api_base = values.get("api_base") + pat = values.get("pat") + token = values.get("token") - if model_url is not None and model_id is not None: - raise ValueError("Please provide either model_url or model_id, not both.") - - if model_url is None and model_id is None: - raise ValueError("Please provide one of model_url or model_id.") - - if model_url is None and model_id is not None: - if user_id is None or app_id is None: - raise ValueError("Please provide a user_id and app_id.") + values["model"] = Model( + url=model_url, + app_id=app_id, + user_id=user_id, + model_version=dict(id=model_version_id), + pat=pat, + token=token, + model_id=model_id, + base_url=api_base, + ) return values @@ -78,27 +91,9 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): Returns: List of embeddings, one for each text. """ - try: - from clarifai.client.input import Inputs - from clarifai.client.model import Model - except ImportError: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) - if self.pat is not None: - pat = self.pat - if self.model_url is not None: - _model_init = Model(url=self.model_url, pat=pat) - else: - _model_init = Model( - model_id=self.model_id, - user_id=self.user_id, - app_id=self.app_id, - pat=pat, - ) + from clarifai.client.input import Inputs - input_obj = Inputs(pat=pat) + input_obj = Inputs.from_auth_helper(self.model.auth_helper) batch_size = 32 embeddings = [] @@ -109,7 +104,7 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): input_obj.get_text_input(input_id=str(id), raw_text=inp) for id, inp in enumerate(batch) ] - predict_response = _model_init.predict(input_batch) + predict_response = self.model.predict(input_batch) embeddings.extend( [ list(output.data.embeddings[0].vector) @@ -131,27 +126,9 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): Returns: Embeddings for the text. """ - try: - from clarifai.client.model import Model - except ImportError: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) - if self.pat is not None: - pat = self.pat - if self.model_url is not None: - _model_init = Model(url=self.model_url, pat=pat) - else: - _model_init = Model( - model_id=self.model_id, - user_id=self.user_id, - app_id=self.app_id, - pat=pat, - ) try: - predict_response = _model_init.predict_by_bytes( + predict_response = self.model.predict_by_bytes( bytes(text, "utf-8"), input_type="text" ) embeddings = [ diff --git a/libs/community/langchain_community/llms/clarifai.py b/libs/community/langchain_community/llms/clarifai.py index 7690bd9bdd2..78a9115d6b4 100644 --- a/libs/community/langchain_community/llms/clarifai.py +++ b/libs/community/langchain_community/llms/clarifai.py @@ -4,8 +4,7 @@ from typing import Any, Dict, List, Optional from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from langchain_core.outputs import Generation, LLMResult -from langchain_core.pydantic_v1 import Extra, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.pydantic_v1 import Extra, Field, root_validator from langchain_community.llms.utils import enforce_stop_tokens @@ -42,8 +41,11 @@ class Clarifai(LLM): """Clarifai application id to use.""" user_id: Optional[str] = None """Clarifai user id to use.""" - pat: Optional[str] = None + pat: Optional[str] = Field(default=None, exclude=True) #: :meta private: """Clarifai personal access token to use.""" + token: Optional[str] = Field(default=None, exclude=True) #: :meta private: + """Clarifai session token to use.""" + model: Any = Field(default=None, exclude=True) #: :meta private: api_base: str = "https://api.clarifai.com" class Config: @@ -55,21 +57,32 @@ class Clarifai(LLM): def validate_environment(cls, values: Dict) -> Dict: """Validate that we have all required info to access Clarifai platform and python package exists in environment.""" - values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT") + try: + from clarifai.client.model import Model + except ImportError: + raise ImportError( + "Could not import clarifai python package. " + "Please install it with `pip install clarifai`." + ) user_id = values.get("user_id") app_id = values.get("app_id") model_id = values.get("model_id") + model_version_id = values.get("model_version_id") model_url = values.get("model_url") + api_base = values.get("api_base") + pat = values.get("pat") + token = values.get("token") - if model_url is not None and model_id is not None: - raise ValueError("Please provide either model_url or model_id, not both.") - - if model_url is None and model_id is None: - raise ValueError("Please provide one of model_url or model_id.") - - if model_url is None and model_id is not None: - if user_id is None or app_id is None: - raise ValueError("Please provide a user_id and app_id.") + values["model"] = Model( + url=model_url, + app_id=app_id, + user_id=user_id, + model_version=dict(id=model_version_id), + pat=pat, + token=token, + model_id=model_id, + base_url=api_base, + ) return values @@ -117,28 +130,10 @@ class Clarifai(LLM): response = clarifai_llm("Tell me a joke.") """ - # If version_id None, Defaults to the latest model version - try: - from clarifai.client.model import Model - except ImportError: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) - if self.pat is not None: - pat = self.pat - if self.model_url is not None: - _model_init = Model(url=self.model_url, pat=pat) - else: - _model_init = Model( - model_id=self.model_id, - user_id=self.user_id, - app_id=self.app_id, - pat=pat, - ) + try: (inference_params := {}) if inference_params is None else inference_params - predict_response = _model_init.predict_by_bytes( + predict_response = self.model.predict_by_bytes( bytes(prompt, "utf-8"), input_type="text", inference_params=inference_params, @@ -165,27 +160,15 @@ class Clarifai(LLM): # TODO: add caching here. try: from clarifai.client.input import Inputs - from clarifai.client.model import Model except ImportError: raise ImportError( "Could not import clarifai python package. " "Please install it with `pip install clarifai`." ) - if self.pat is not None: - pat = self.pat - if self.model_url is not None: - _model_init = Model(url=self.model_url, pat=pat) - else: - _model_init = Model( - model_id=self.model_id, - user_id=self.user_id, - app_id=self.app_id, - pat=pat, - ) generations = [] batch_size = 32 - input_obj = Inputs(pat=pat) + input_obj = Inputs.from_auth_helper(self.model.auth_helper) try: for i in range(0, len(prompts), batch_size): batch = prompts[i : i + batch_size] @@ -196,7 +179,7 @@ class Clarifai(LLM): ( inference_params := {} ) if inference_params is None else inference_params - predict_response = _model_init.predict( + predict_response = self.model.predict( inputs=input_batch, inference_params=inference_params ) diff --git a/libs/community/langchain_community/vectorstores/clarifai.py b/libs/community/langchain_community/vectorstores/clarifai.py index 5d8ae8c86b0..34ff88421cc 100644 --- a/libs/community/langchain_community/vectorstores/clarifai.py +++ b/libs/community/langchain_community/vectorstores/clarifai.py @@ -36,8 +36,10 @@ class Clarifai(VectorStore): self, user_id: Optional[str] = None, app_id: Optional[str] = None, - number_of_docs: Optional[int] = None, + number_of_docs: Optional[int] = 4, pat: Optional[str] = None, + token: Optional[str] = None, + api_base: Optional[str] = "https://api.clarifai.com", ) -> None: """Initialize with Clarifai client. @@ -45,6 +47,7 @@ class Clarifai(VectorStore): user_id (Optional[str], optional): User ID. Defaults to None. app_id (Optional[str], optional): App ID. Defaults to None. pat (Optional[str], optional): Personal access token. Defaults to None. + token (Optional[str], optional): Session token. Defaults to None. number_of_docs (Optional[int], optional): Number of documents to return during vector search. Defaults to None. api_base (Optional[str], optional): API base. Defaults to None. @@ -52,21 +55,33 @@ class Clarifai(VectorStore): Raises: ValueError: If user ID, app ID or personal access token is not provided. """ - self._user_id = user_id or os.environ.get("CLARIFAI_USER_ID") - self._app_id = app_id or os.environ.get("CLARIFAI_APP_ID") - if pat: - os.environ["CLARIFAI_PAT"] = pat - self._pat = os.environ.get("CLARIFAI_PAT") - if self._user_id is None or self._app_id is None or self._pat is None: + _user_id = user_id or os.environ.get("CLARIFAI_USER_ID") + _app_id = app_id or os.environ.get("CLARIFAI_APP_ID") + if _user_id is None or _app_id is None: raise ValueError( - "Could not find CLARIFAI_USER_ID, CLARIFAI_APP_ID or\ - CLARIFAI_PAT in your environment. " - "Please set those env variables with a valid user ID, \ - app ID and personal access token \ - from https://clarifai.com/settings/security." + "Could not find CLARIFAI_USER_ID " + "or CLARIFAI_APP_ID in your environment. " + "Please set those env variables with a valid user ID, app ID" ) self._number_of_docs = number_of_docs + try: + from clarifai.client.search import Search + except ImportError as e: + raise ImportError( + "Could not import clarifai python package. " + "Please install it with `pip install clarifai`." + ) from e + + self._auth = Search( + user_id=_user_id, + app_id=_app_id, + top_k=number_of_docs, + pat=pat, + token=token, + base_url=api_base, + ).auth_helper + def add_texts( self, texts: Iterable[str], @@ -109,7 +124,7 @@ class Clarifai(VectorStore): ids ), "Number of text inputs and input ids should be the same." - input_obj = Inputs(app_id=self._app_id, user_id=self._user_id) + input_obj = Inputs.from_auth_helper(auth=self._auth) batch_size = 32 input_job_ids = [] for idx in range(0, length, batch_size): @@ -149,7 +164,7 @@ class Clarifai(VectorStore): def similarity_search_with_score( self, query: str, - k: int = 4, + k: Optional[int] = None, filters: Optional[dict] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: @@ -157,7 +172,8 @@ class Clarifai(VectorStore): Args: query (str): Query text to search for. - k (int): Number of results to return. Defaults to 4. + k (Optional[int]): Number of results to return. If not set, + it'll take _number_of_docs. Defaults to None. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. @@ -175,10 +191,9 @@ class Clarifai(VectorStore): ) from e # Get number of docs to return - if self._number_of_docs is not None: - k = self._number_of_docs + top_k = k or self._number_of_docs - search_obj = Search(user_id=self._user_id, app_id=self._app_id, top_k=k) + search_obj = Search.from_auth_helper(auth=self._auth, top_k=top_k) rank = [{"text_raw": query}] # Add filter by metadata if provided. if filters is not None: @@ -193,7 +208,7 @@ class Clarifai(VectorStore): def hit_to_document(hit: resources_pb2.Hit) -> Tuple[Document, float]: metadata = json_format.MessageToDict(hit.input.data.metadata) - h = {"Authorization": f"Key {self._pat}"} + h = dict(self._auth.metadata) request = requests.get(hit.input.data.text.url, headers=h) # override encoding by real educated guess as provided by chardet @@ -215,19 +230,20 @@ class Clarifai(VectorStore): def similarity_search( self, query: str, - k: int = 4, + k: Optional[int] = None, **kwargs: Any, ) -> List[Document]: """Run similarity search using Clarifai. Args: query: Text to look up documents similar to. - k: Number of Documents to return. Defaults to 4. + k: Number of Documents to return. + If not set, it'll take _number_of_docs. Defaults to None. Returns: List of Documents most similar to the query and score for each """ - docs_and_scores = self.similarity_search_with_score(query, **kwargs) + docs_and_scores = self.similarity_search_with_score(query, k=k, **kwargs) return [doc for doc, _ in docs_and_scores] @classmethod @@ -240,6 +256,7 @@ class Clarifai(VectorStore): app_id: Optional[str] = None, number_of_docs: Optional[int] = None, pat: Optional[str] = None, + token: Optional[str] = None, **kwargs: Any, ) -> Clarifai: """Create a Clarifai vectorstore from a list of texts. @@ -248,10 +265,14 @@ class Clarifai(VectorStore): user_id (str): User ID. app_id (str): App ID. texts (List[str]): List of texts to add. - number_of_docs (Optional[int]): Number of documents to return - during vector search. Defaults to None. - metadatas (Optional[List[dict]]): Optional list of metadatas. + number_of_docs (Optional[int]): Number of documents + to return during vector search. Defaults to None. + pat (Optional[str], optional): Personal access token. Defaults to None. + token (Optional[str], optional): Session token. Defaults to None. + metadatas (Optional[List[dict]]): Optional list + of metadatas. Defaults to None. + **kwargs: Additional keyword arguments to be passed to the Search. Returns: Clarifai: Clarifai vectorstore. @@ -261,6 +282,8 @@ class Clarifai(VectorStore): app_id=app_id, number_of_docs=number_of_docs, pat=pat, + token=token, + **kwargs, ) clarifai_vector_db.add_texts(texts=texts, metadatas=metadatas) return clarifai_vector_db @@ -274,6 +297,7 @@ class Clarifai(VectorStore): app_id: Optional[str] = None, number_of_docs: Optional[int] = None, pat: Optional[str] = None, + token: Optional[str] = None, **kwargs: Any, ) -> Clarifai: """Create a Clarifai vectorstore from a list of documents. @@ -282,8 +306,11 @@ class Clarifai(VectorStore): user_id (str): User ID. app_id (str): App ID. documents (List[Document]): List of documents to add. - number_of_docs (Optional[int]): Number of documents to return - during vector search. Defaults to None. + number_of_docs (Optional[int]): Number of documents + to return during vector search. Defaults to None. + pat (Optional[str], optional): Personal access token. Defaults to None. + token (Optional[str], optional): Session token. Defaults to None. + **kwargs: Additional keyword arguments to be passed to the Search. Returns: Clarifai: Clarifai vectorstore. @@ -297,4 +324,6 @@ class Clarifai(VectorStore): number_of_docs=number_of_docs, pat=pat, metadatas=metadatas, + token=token, + **kwargs, )