community[patch] : Tidy up and update Clarifai SDK functions (#18314)

Description :
* Tidy up, add missing docstring and fix unused params
* Enable using session token
This commit is contained in:
Phat Vo 2024-03-08 10:47:44 +07:00 committed by GitHub
parent 93b87f2bfb
commit 3ecb903d49
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 117 additions and 128 deletions

View File

@ -1,9 +1,8 @@
import logging import logging
from typing import Dict, List, Optional from typing import Any, Dict, List, Optional
from langchain_core.embeddings import Embeddings from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator
from langchain_core.utils import get_from_dict_or_env
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -37,8 +36,11 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
"""Clarifai application id to use.""" """Clarifai application id to use."""
user_id: Optional[str] = None user_id: Optional[str] = None
"""Clarifai user id to use.""" """Clarifai user id to use."""
pat: Optional[str] = None pat: Optional[str] = Field(default=None, exclude=True)
"""Clarifai personal access token to use.""" """Clarifai personal access token to use."""
token: Optional[str] = Field(default=None, exclude=True)
"""Clarifai session token to use."""
model: Any = Field(default=None, exclude=True) #: :meta private:
api_base: str = "https://api.clarifai.com" api_base: str = "https://api.clarifai.com"
class Config: class Config:
@ -51,21 +53,32 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
"""Validate that we have all required info to access Clarifai """Validate that we have all required info to access Clarifai
platform and python package exists in environment.""" platform and python package exists in environment."""
values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT") try:
from clarifai.client.model import Model
except ImportError:
raise ImportError(
"Could not import clarifai python package. "
"Please install it with `pip install clarifai`."
)
user_id = values.get("user_id") user_id = values.get("user_id")
app_id = values.get("app_id") app_id = values.get("app_id")
model_id = values.get("model_id") model_id = values.get("model_id")
model_version_id = values.get("model_version_id")
model_url = values.get("model_url") model_url = values.get("model_url")
api_base = values.get("api_base")
pat = values.get("pat")
token = values.get("token")
if model_url is not None and model_id is not None: values["model"] = Model(
raise ValueError("Please provide either model_url or model_id, not both.") url=model_url,
app_id=app_id,
if model_url is None and model_id is None: user_id=user_id,
raise ValueError("Please provide one of model_url or model_id.") model_version=dict(id=model_version_id),
pat=pat,
if model_url is None and model_id is not None: token=token,
if user_id is None or app_id is None: model_id=model_id,
raise ValueError("Please provide a user_id and app_id.") base_url=api_base,
)
return values return values
@ -78,27 +91,9 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
Returns: Returns:
List of embeddings, one for each text. List of embeddings, one for each text.
""" """
try: from clarifai.client.input import Inputs
from clarifai.client.input import Inputs
from clarifai.client.model import Model
except ImportError:
raise ImportError(
"Could not import clarifai python package. "
"Please install it with `pip install clarifai`."
)
if self.pat is not None:
pat = self.pat
if self.model_url is not None:
_model_init = Model(url=self.model_url, pat=pat)
else:
_model_init = Model(
model_id=self.model_id,
user_id=self.user_id,
app_id=self.app_id,
pat=pat,
)
input_obj = Inputs(pat=pat) input_obj = Inputs.from_auth_helper(self.model.auth_helper)
batch_size = 32 batch_size = 32
embeddings = [] embeddings = []
@ -109,7 +104,7 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
input_obj.get_text_input(input_id=str(id), raw_text=inp) input_obj.get_text_input(input_id=str(id), raw_text=inp)
for id, inp in enumerate(batch) for id, inp in enumerate(batch)
] ]
predict_response = _model_init.predict(input_batch) predict_response = self.model.predict(input_batch)
embeddings.extend( embeddings.extend(
[ [
list(output.data.embeddings[0].vector) list(output.data.embeddings[0].vector)
@ -131,27 +126,9 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
Returns: Returns:
Embeddings for the text. Embeddings for the text.
""" """
try:
from clarifai.client.model import Model
except ImportError:
raise ImportError(
"Could not import clarifai python package. "
"Please install it with `pip install clarifai`."
)
if self.pat is not None:
pat = self.pat
if self.model_url is not None:
_model_init = Model(url=self.model_url, pat=pat)
else:
_model_init = Model(
model_id=self.model_id,
user_id=self.user_id,
app_id=self.app_id,
pat=pat,
)
try: try:
predict_response = _model_init.predict_by_bytes( predict_response = self.model.predict_by_bytes(
bytes(text, "utf-8"), input_type="text" bytes(text, "utf-8"), input_type="text"
) )
embeddings = [ embeddings = [

View File

@ -4,8 +4,7 @@ from typing import Any, Dict, List, Optional
from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM from langchain_core.language_models.llms import LLM
from langchain_core.outputs import Generation, LLMResult from langchain_core.outputs import Generation, LLMResult
from langchain_core.pydantic_v1 import Extra, root_validator from langchain_core.pydantic_v1 import Extra, Field, root_validator
from langchain_core.utils import get_from_dict_or_env
from langchain_community.llms.utils import enforce_stop_tokens from langchain_community.llms.utils import enforce_stop_tokens
@ -42,8 +41,11 @@ class Clarifai(LLM):
"""Clarifai application id to use.""" """Clarifai application id to use."""
user_id: Optional[str] = None user_id: Optional[str] = None
"""Clarifai user id to use.""" """Clarifai user id to use."""
pat: Optional[str] = None pat: Optional[str] = Field(default=None, exclude=True) #: :meta private:
"""Clarifai personal access token to use.""" """Clarifai personal access token to use."""
token: Optional[str] = Field(default=None, exclude=True) #: :meta private:
"""Clarifai session token to use."""
model: Any = Field(default=None, exclude=True) #: :meta private:
api_base: str = "https://api.clarifai.com" api_base: str = "https://api.clarifai.com"
class Config: class Config:
@ -55,21 +57,32 @@ class Clarifai(LLM):
def validate_environment(cls, values: Dict) -> Dict: def validate_environment(cls, values: Dict) -> Dict:
"""Validate that we have all required info to access Clarifai """Validate that we have all required info to access Clarifai
platform and python package exists in environment.""" platform and python package exists in environment."""
values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT") try:
from clarifai.client.model import Model
except ImportError:
raise ImportError(
"Could not import clarifai python package. "
"Please install it with `pip install clarifai`."
)
user_id = values.get("user_id") user_id = values.get("user_id")
app_id = values.get("app_id") app_id = values.get("app_id")
model_id = values.get("model_id") model_id = values.get("model_id")
model_version_id = values.get("model_version_id")
model_url = values.get("model_url") model_url = values.get("model_url")
api_base = values.get("api_base")
pat = values.get("pat")
token = values.get("token")
if model_url is not None and model_id is not None: values["model"] = Model(
raise ValueError("Please provide either model_url or model_id, not both.") url=model_url,
app_id=app_id,
if model_url is None and model_id is None: user_id=user_id,
raise ValueError("Please provide one of model_url or model_id.") model_version=dict(id=model_version_id),
pat=pat,
if model_url is None and model_id is not None: token=token,
if user_id is None or app_id is None: model_id=model_id,
raise ValueError("Please provide a user_id and app_id.") base_url=api_base,
)
return values return values
@ -117,28 +130,10 @@ class Clarifai(LLM):
response = clarifai_llm("Tell me a joke.") response = clarifai_llm("Tell me a joke.")
""" """
# If version_id None, Defaults to the latest model version
try:
from clarifai.client.model import Model
except ImportError:
raise ImportError(
"Could not import clarifai python package. "
"Please install it with `pip install clarifai`."
)
if self.pat is not None:
pat = self.pat
if self.model_url is not None:
_model_init = Model(url=self.model_url, pat=pat)
else:
_model_init = Model(
model_id=self.model_id,
user_id=self.user_id,
app_id=self.app_id,
pat=pat,
)
try: try:
(inference_params := {}) if inference_params is None else inference_params (inference_params := {}) if inference_params is None else inference_params
predict_response = _model_init.predict_by_bytes( predict_response = self.model.predict_by_bytes(
bytes(prompt, "utf-8"), bytes(prompt, "utf-8"),
input_type="text", input_type="text",
inference_params=inference_params, inference_params=inference_params,
@ -165,27 +160,15 @@ class Clarifai(LLM):
# TODO: add caching here. # TODO: add caching here.
try: try:
from clarifai.client.input import Inputs from clarifai.client.input import Inputs
from clarifai.client.model import Model
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"Could not import clarifai python package. " "Could not import clarifai python package. "
"Please install it with `pip install clarifai`." "Please install it with `pip install clarifai`."
) )
if self.pat is not None:
pat = self.pat
if self.model_url is not None:
_model_init = Model(url=self.model_url, pat=pat)
else:
_model_init = Model(
model_id=self.model_id,
user_id=self.user_id,
app_id=self.app_id,
pat=pat,
)
generations = [] generations = []
batch_size = 32 batch_size = 32
input_obj = Inputs(pat=pat) input_obj = Inputs.from_auth_helper(self.model.auth_helper)
try: try:
for i in range(0, len(prompts), batch_size): for i in range(0, len(prompts), batch_size):
batch = prompts[i : i + batch_size] batch = prompts[i : i + batch_size]
@ -196,7 +179,7 @@ class Clarifai(LLM):
( (
inference_params := {} inference_params := {}
) if inference_params is None else inference_params ) if inference_params is None else inference_params
predict_response = _model_init.predict( predict_response = self.model.predict(
inputs=input_batch, inference_params=inference_params inputs=input_batch, inference_params=inference_params
) )

View File

@ -36,8 +36,10 @@ class Clarifai(VectorStore):
self, self,
user_id: Optional[str] = None, user_id: Optional[str] = None,
app_id: Optional[str] = None, app_id: Optional[str] = None,
number_of_docs: Optional[int] = None, number_of_docs: Optional[int] = 4,
pat: Optional[str] = None, pat: Optional[str] = None,
token: Optional[str] = None,
api_base: Optional[str] = "https://api.clarifai.com",
) -> None: ) -> None:
"""Initialize with Clarifai client. """Initialize with Clarifai client.
@ -45,6 +47,7 @@ class Clarifai(VectorStore):
user_id (Optional[str], optional): User ID. Defaults to None. user_id (Optional[str], optional): User ID. Defaults to None.
app_id (Optional[str], optional): App ID. Defaults to None. app_id (Optional[str], optional): App ID. Defaults to None.
pat (Optional[str], optional): Personal access token. Defaults to None. pat (Optional[str], optional): Personal access token. Defaults to None.
token (Optional[str], optional): Session token. Defaults to None.
number_of_docs (Optional[int], optional): Number of documents to return number_of_docs (Optional[int], optional): Number of documents to return
during vector search. Defaults to None. during vector search. Defaults to None.
api_base (Optional[str], optional): API base. Defaults to None. api_base (Optional[str], optional): API base. Defaults to None.
@ -52,21 +55,33 @@ class Clarifai(VectorStore):
Raises: Raises:
ValueError: If user ID, app ID or personal access token is not provided. ValueError: If user ID, app ID or personal access token is not provided.
""" """
self._user_id = user_id or os.environ.get("CLARIFAI_USER_ID") _user_id = user_id or os.environ.get("CLARIFAI_USER_ID")
self._app_id = app_id or os.environ.get("CLARIFAI_APP_ID") _app_id = app_id or os.environ.get("CLARIFAI_APP_ID")
if pat: if _user_id is None or _app_id is None:
os.environ["CLARIFAI_PAT"] = pat
self._pat = os.environ.get("CLARIFAI_PAT")
if self._user_id is None or self._app_id is None or self._pat is None:
raise ValueError( raise ValueError(
"Could not find CLARIFAI_USER_ID, CLARIFAI_APP_ID or\ "Could not find CLARIFAI_USER_ID "
CLARIFAI_PAT in your environment. " "or CLARIFAI_APP_ID in your environment. "
"Please set those env variables with a valid user ID, \ "Please set those env variables with a valid user ID, app ID"
app ID and personal access token \
from https://clarifai.com/settings/security."
) )
self._number_of_docs = number_of_docs self._number_of_docs = number_of_docs
try:
from clarifai.client.search import Search
except ImportError as e:
raise ImportError(
"Could not import clarifai python package. "
"Please install it with `pip install clarifai`."
) from e
self._auth = Search(
user_id=_user_id,
app_id=_app_id,
top_k=number_of_docs,
pat=pat,
token=token,
base_url=api_base,
).auth_helper
def add_texts( def add_texts(
self, self,
texts: Iterable[str], texts: Iterable[str],
@ -109,7 +124,7 @@ class Clarifai(VectorStore):
ids ids
), "Number of text inputs and input ids should be the same." ), "Number of text inputs and input ids should be the same."
input_obj = Inputs(app_id=self._app_id, user_id=self._user_id) input_obj = Inputs.from_auth_helper(auth=self._auth)
batch_size = 32 batch_size = 32
input_job_ids = [] input_job_ids = []
for idx in range(0, length, batch_size): for idx in range(0, length, batch_size):
@ -149,7 +164,7 @@ class Clarifai(VectorStore):
def similarity_search_with_score( def similarity_search_with_score(
self, self,
query: str, query: str,
k: int = 4, k: Optional[int] = None,
filters: Optional[dict] = None, filters: Optional[dict] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]:
@ -157,7 +172,8 @@ class Clarifai(VectorStore):
Args: Args:
query (str): Query text to search for. query (str): Query text to search for.
k (int): Number of results to return. Defaults to 4. k (Optional[int]): Number of results to return. If not set,
it'll take _number_of_docs. Defaults to None.
filter (Optional[Dict[str, str]]): Filter by metadata. filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None. Defaults to None.
@ -175,10 +191,9 @@ class Clarifai(VectorStore):
) from e ) from e
# Get number of docs to return # Get number of docs to return
if self._number_of_docs is not None: top_k = k or self._number_of_docs
k = self._number_of_docs
search_obj = Search(user_id=self._user_id, app_id=self._app_id, top_k=k) search_obj = Search.from_auth_helper(auth=self._auth, top_k=top_k)
rank = [{"text_raw": query}] rank = [{"text_raw": query}]
# Add filter by metadata if provided. # Add filter by metadata if provided.
if filters is not None: if filters is not None:
@ -193,7 +208,7 @@ class Clarifai(VectorStore):
def hit_to_document(hit: resources_pb2.Hit) -> Tuple[Document, float]: def hit_to_document(hit: resources_pb2.Hit) -> Tuple[Document, float]:
metadata = json_format.MessageToDict(hit.input.data.metadata) metadata = json_format.MessageToDict(hit.input.data.metadata)
h = {"Authorization": f"Key {self._pat}"} h = dict(self._auth.metadata)
request = requests.get(hit.input.data.text.url, headers=h) request = requests.get(hit.input.data.text.url, headers=h)
# override encoding by real educated guess as provided by chardet # override encoding by real educated guess as provided by chardet
@ -215,19 +230,20 @@ class Clarifai(VectorStore):
def similarity_search( def similarity_search(
self, self,
query: str, query: str,
k: int = 4, k: Optional[int] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Document]: ) -> List[Document]:
"""Run similarity search using Clarifai. """Run similarity search using Clarifai.
Args: Args:
query: Text to look up documents similar to. query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4. k: Number of Documents to return.
If not set, it'll take _number_of_docs. Defaults to None.
Returns: Returns:
List of Documents most similar to the query and score for each List of Documents most similar to the query and score for each
""" """
docs_and_scores = self.similarity_search_with_score(query, **kwargs) docs_and_scores = self.similarity_search_with_score(query, k=k, **kwargs)
return [doc for doc, _ in docs_and_scores] return [doc for doc, _ in docs_and_scores]
@classmethod @classmethod
@ -240,6 +256,7 @@ class Clarifai(VectorStore):
app_id: Optional[str] = None, app_id: Optional[str] = None,
number_of_docs: Optional[int] = None, number_of_docs: Optional[int] = None,
pat: Optional[str] = None, pat: Optional[str] = None,
token: Optional[str] = None,
**kwargs: Any, **kwargs: Any,
) -> Clarifai: ) -> Clarifai:
"""Create a Clarifai vectorstore from a list of texts. """Create a Clarifai vectorstore from a list of texts.
@ -248,10 +265,14 @@ class Clarifai(VectorStore):
user_id (str): User ID. user_id (str): User ID.
app_id (str): App ID. app_id (str): App ID.
texts (List[str]): List of texts to add. texts (List[str]): List of texts to add.
number_of_docs (Optional[int]): Number of documents to return number_of_docs (Optional[int]): Number of documents
during vector search. Defaults to None. to return during vector search. Defaults to None.
metadatas (Optional[List[dict]]): Optional list of metadatas. pat (Optional[str], optional): Personal access token.
Defaults to None. Defaults to None.
token (Optional[str], optional): Session token. Defaults to None.
metadatas (Optional[List[dict]]): Optional list
of metadatas. Defaults to None.
**kwargs: Additional keyword arguments to be passed to the Search.
Returns: Returns:
Clarifai: Clarifai vectorstore. Clarifai: Clarifai vectorstore.
@ -261,6 +282,8 @@ class Clarifai(VectorStore):
app_id=app_id, app_id=app_id,
number_of_docs=number_of_docs, number_of_docs=number_of_docs,
pat=pat, pat=pat,
token=token,
**kwargs,
) )
clarifai_vector_db.add_texts(texts=texts, metadatas=metadatas) clarifai_vector_db.add_texts(texts=texts, metadatas=metadatas)
return clarifai_vector_db return clarifai_vector_db
@ -274,6 +297,7 @@ class Clarifai(VectorStore):
app_id: Optional[str] = None, app_id: Optional[str] = None,
number_of_docs: Optional[int] = None, number_of_docs: Optional[int] = None,
pat: Optional[str] = None, pat: Optional[str] = None,
token: Optional[str] = None,
**kwargs: Any, **kwargs: Any,
) -> Clarifai: ) -> Clarifai:
"""Create a Clarifai vectorstore from a list of documents. """Create a Clarifai vectorstore from a list of documents.
@ -282,8 +306,11 @@ class Clarifai(VectorStore):
user_id (str): User ID. user_id (str): User ID.
app_id (str): App ID. app_id (str): App ID.
documents (List[Document]): List of documents to add. documents (List[Document]): List of documents to add.
number_of_docs (Optional[int]): Number of documents to return number_of_docs (Optional[int]): Number of documents
during vector search. Defaults to None. to return during vector search. Defaults to None.
pat (Optional[str], optional): Personal access token. Defaults to None.
token (Optional[str], optional): Session token. Defaults to None.
**kwargs: Additional keyword arguments to be passed to the Search.
Returns: Returns:
Clarifai: Clarifai vectorstore. Clarifai: Clarifai vectorstore.
@ -297,4 +324,6 @@ class Clarifai(VectorStore):
number_of_docs=number_of_docs, number_of_docs=number_of_docs,
pat=pat, pat=pat,
metadatas=metadatas, metadatas=metadatas,
token=token,
**kwargs,
) )