mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-10 13:27:36 +00:00
community[patch] : Tidy up and update Clarifai SDK functions (#18314)
Description : * Tidy up, add missing docstring and fix unused params * Enable using session token
This commit is contained in:
parent
93b87f2bfb
commit
3ecb903d49
@ -1,9 +1,8 @@
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -37,8 +36,11 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
"""Clarifai application id to use."""
|
||||
user_id: Optional[str] = None
|
||||
"""Clarifai user id to use."""
|
||||
pat: Optional[str] = None
|
||||
pat: Optional[str] = Field(default=None, exclude=True)
|
||||
"""Clarifai personal access token to use."""
|
||||
token: Optional[str] = Field(default=None, exclude=True)
|
||||
"""Clarifai session token to use."""
|
||||
model: Any = Field(default=None, exclude=True) #: :meta private:
|
||||
api_base: str = "https://api.clarifai.com"
|
||||
|
||||
class Config:
|
||||
@ -51,21 +53,32 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
"""Validate that we have all required info to access Clarifai
|
||||
platform and python package exists in environment."""
|
||||
|
||||
values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT")
|
||||
try:
|
||||
from clarifai.client.model import Model
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
)
|
||||
user_id = values.get("user_id")
|
||||
app_id = values.get("app_id")
|
||||
model_id = values.get("model_id")
|
||||
model_version_id = values.get("model_version_id")
|
||||
model_url = values.get("model_url")
|
||||
api_base = values.get("api_base")
|
||||
pat = values.get("pat")
|
||||
token = values.get("token")
|
||||
|
||||
if model_url is not None and model_id is not None:
|
||||
raise ValueError("Please provide either model_url or model_id, not both.")
|
||||
|
||||
if model_url is None and model_id is None:
|
||||
raise ValueError("Please provide one of model_url or model_id.")
|
||||
|
||||
if model_url is None and model_id is not None:
|
||||
if user_id is None or app_id is None:
|
||||
raise ValueError("Please provide a user_id and app_id.")
|
||||
values["model"] = Model(
|
||||
url=model_url,
|
||||
app_id=app_id,
|
||||
user_id=user_id,
|
||||
model_version=dict(id=model_version_id),
|
||||
pat=pat,
|
||||
token=token,
|
||||
model_id=model_id,
|
||||
base_url=api_base,
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
@ -78,27 +91,9 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
Returns:
|
||||
List of embeddings, one for each text.
|
||||
"""
|
||||
try:
|
||||
from clarifai.client.input import Inputs
|
||||
from clarifai.client.model import Model
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
)
|
||||
if self.pat is not None:
|
||||
pat = self.pat
|
||||
if self.model_url is not None:
|
||||
_model_init = Model(url=self.model_url, pat=pat)
|
||||
else:
|
||||
_model_init = Model(
|
||||
model_id=self.model_id,
|
||||
user_id=self.user_id,
|
||||
app_id=self.app_id,
|
||||
pat=pat,
|
||||
)
|
||||
from clarifai.client.input import Inputs
|
||||
|
||||
input_obj = Inputs(pat=pat)
|
||||
input_obj = Inputs.from_auth_helper(self.model.auth_helper)
|
||||
batch_size = 32
|
||||
embeddings = []
|
||||
|
||||
@ -109,7 +104,7 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
input_obj.get_text_input(input_id=str(id), raw_text=inp)
|
||||
for id, inp in enumerate(batch)
|
||||
]
|
||||
predict_response = _model_init.predict(input_batch)
|
||||
predict_response = self.model.predict(input_batch)
|
||||
embeddings.extend(
|
||||
[
|
||||
list(output.data.embeddings[0].vector)
|
||||
@ -131,27 +126,9 @@ class ClarifaiEmbeddings(BaseModel, Embeddings):
|
||||
Returns:
|
||||
Embeddings for the text.
|
||||
"""
|
||||
try:
|
||||
from clarifai.client.model import Model
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
)
|
||||
if self.pat is not None:
|
||||
pat = self.pat
|
||||
if self.model_url is not None:
|
||||
_model_init = Model(url=self.model_url, pat=pat)
|
||||
else:
|
||||
_model_init = Model(
|
||||
model_id=self.model_id,
|
||||
user_id=self.user_id,
|
||||
app_id=self.app_id,
|
||||
pat=pat,
|
||||
)
|
||||
|
||||
try:
|
||||
predict_response = _model_init.predict_by_bytes(
|
||||
predict_response = self.model.predict_by_bytes(
|
||||
bytes(text, "utf-8"), input_type="text"
|
||||
)
|
||||
embeddings = [
|
||||
|
@ -4,8 +4,7 @@ from typing import Any, Dict, List, Optional
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.outputs import Generation, LLMResult
|
||||
from langchain_core.pydantic_v1 import Extra, root_validator
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.pydantic_v1 import Extra, Field, root_validator
|
||||
|
||||
from langchain_community.llms.utils import enforce_stop_tokens
|
||||
|
||||
@ -42,8 +41,11 @@ class Clarifai(LLM):
|
||||
"""Clarifai application id to use."""
|
||||
user_id: Optional[str] = None
|
||||
"""Clarifai user id to use."""
|
||||
pat: Optional[str] = None
|
||||
pat: Optional[str] = Field(default=None, exclude=True) #: :meta private:
|
||||
"""Clarifai personal access token to use."""
|
||||
token: Optional[str] = Field(default=None, exclude=True) #: :meta private:
|
||||
"""Clarifai session token to use."""
|
||||
model: Any = Field(default=None, exclude=True) #: :meta private:
|
||||
api_base: str = "https://api.clarifai.com"
|
||||
|
||||
class Config:
|
||||
@ -55,21 +57,32 @@ class Clarifai(LLM):
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that we have all required info to access Clarifai
|
||||
platform and python package exists in environment."""
|
||||
values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT")
|
||||
try:
|
||||
from clarifai.client.model import Model
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
)
|
||||
user_id = values.get("user_id")
|
||||
app_id = values.get("app_id")
|
||||
model_id = values.get("model_id")
|
||||
model_version_id = values.get("model_version_id")
|
||||
model_url = values.get("model_url")
|
||||
api_base = values.get("api_base")
|
||||
pat = values.get("pat")
|
||||
token = values.get("token")
|
||||
|
||||
if model_url is not None and model_id is not None:
|
||||
raise ValueError("Please provide either model_url or model_id, not both.")
|
||||
|
||||
if model_url is None and model_id is None:
|
||||
raise ValueError("Please provide one of model_url or model_id.")
|
||||
|
||||
if model_url is None and model_id is not None:
|
||||
if user_id is None or app_id is None:
|
||||
raise ValueError("Please provide a user_id and app_id.")
|
||||
values["model"] = Model(
|
||||
url=model_url,
|
||||
app_id=app_id,
|
||||
user_id=user_id,
|
||||
model_version=dict(id=model_version_id),
|
||||
pat=pat,
|
||||
token=token,
|
||||
model_id=model_id,
|
||||
base_url=api_base,
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
@ -117,28 +130,10 @@ class Clarifai(LLM):
|
||||
|
||||
response = clarifai_llm("Tell me a joke.")
|
||||
"""
|
||||
# If version_id None, Defaults to the latest model version
|
||||
try:
|
||||
from clarifai.client.model import Model
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
)
|
||||
if self.pat is not None:
|
||||
pat = self.pat
|
||||
if self.model_url is not None:
|
||||
_model_init = Model(url=self.model_url, pat=pat)
|
||||
else:
|
||||
_model_init = Model(
|
||||
model_id=self.model_id,
|
||||
user_id=self.user_id,
|
||||
app_id=self.app_id,
|
||||
pat=pat,
|
||||
)
|
||||
|
||||
try:
|
||||
(inference_params := {}) if inference_params is None else inference_params
|
||||
predict_response = _model_init.predict_by_bytes(
|
||||
predict_response = self.model.predict_by_bytes(
|
||||
bytes(prompt, "utf-8"),
|
||||
input_type="text",
|
||||
inference_params=inference_params,
|
||||
@ -165,27 +160,15 @@ class Clarifai(LLM):
|
||||
# TODO: add caching here.
|
||||
try:
|
||||
from clarifai.client.input import Inputs
|
||||
from clarifai.client.model import Model
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
)
|
||||
if self.pat is not None:
|
||||
pat = self.pat
|
||||
if self.model_url is not None:
|
||||
_model_init = Model(url=self.model_url, pat=pat)
|
||||
else:
|
||||
_model_init = Model(
|
||||
model_id=self.model_id,
|
||||
user_id=self.user_id,
|
||||
app_id=self.app_id,
|
||||
pat=pat,
|
||||
)
|
||||
|
||||
generations = []
|
||||
batch_size = 32
|
||||
input_obj = Inputs(pat=pat)
|
||||
input_obj = Inputs.from_auth_helper(self.model.auth_helper)
|
||||
try:
|
||||
for i in range(0, len(prompts), batch_size):
|
||||
batch = prompts[i : i + batch_size]
|
||||
@ -196,7 +179,7 @@ class Clarifai(LLM):
|
||||
(
|
||||
inference_params := {}
|
||||
) if inference_params is None else inference_params
|
||||
predict_response = _model_init.predict(
|
||||
predict_response = self.model.predict(
|
||||
inputs=input_batch, inference_params=inference_params
|
||||
)
|
||||
|
||||
|
@ -36,8 +36,10 @@ class Clarifai(VectorStore):
|
||||
self,
|
||||
user_id: Optional[str] = None,
|
||||
app_id: Optional[str] = None,
|
||||
number_of_docs: Optional[int] = None,
|
||||
number_of_docs: Optional[int] = 4,
|
||||
pat: Optional[str] = None,
|
||||
token: Optional[str] = None,
|
||||
api_base: Optional[str] = "https://api.clarifai.com",
|
||||
) -> None:
|
||||
"""Initialize with Clarifai client.
|
||||
|
||||
@ -45,6 +47,7 @@ class Clarifai(VectorStore):
|
||||
user_id (Optional[str], optional): User ID. Defaults to None.
|
||||
app_id (Optional[str], optional): App ID. Defaults to None.
|
||||
pat (Optional[str], optional): Personal access token. Defaults to None.
|
||||
token (Optional[str], optional): Session token. Defaults to None.
|
||||
number_of_docs (Optional[int], optional): Number of documents to return
|
||||
during vector search. Defaults to None.
|
||||
api_base (Optional[str], optional): API base. Defaults to None.
|
||||
@ -52,21 +55,33 @@ class Clarifai(VectorStore):
|
||||
Raises:
|
||||
ValueError: If user ID, app ID or personal access token is not provided.
|
||||
"""
|
||||
self._user_id = user_id or os.environ.get("CLARIFAI_USER_ID")
|
||||
self._app_id = app_id or os.environ.get("CLARIFAI_APP_ID")
|
||||
if pat:
|
||||
os.environ["CLARIFAI_PAT"] = pat
|
||||
self._pat = os.environ.get("CLARIFAI_PAT")
|
||||
if self._user_id is None or self._app_id is None or self._pat is None:
|
||||
_user_id = user_id or os.environ.get("CLARIFAI_USER_ID")
|
||||
_app_id = app_id or os.environ.get("CLARIFAI_APP_ID")
|
||||
if _user_id is None or _app_id is None:
|
||||
raise ValueError(
|
||||
"Could not find CLARIFAI_USER_ID, CLARIFAI_APP_ID or\
|
||||
CLARIFAI_PAT in your environment. "
|
||||
"Please set those env variables with a valid user ID, \
|
||||
app ID and personal access token \
|
||||
from https://clarifai.com/settings/security."
|
||||
"Could not find CLARIFAI_USER_ID "
|
||||
"or CLARIFAI_APP_ID in your environment. "
|
||||
"Please set those env variables with a valid user ID, app ID"
|
||||
)
|
||||
self._number_of_docs = number_of_docs
|
||||
|
||||
try:
|
||||
from clarifai.client.search import Search
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import clarifai python package. "
|
||||
"Please install it with `pip install clarifai`."
|
||||
) from e
|
||||
|
||||
self._auth = Search(
|
||||
user_id=_user_id,
|
||||
app_id=_app_id,
|
||||
top_k=number_of_docs,
|
||||
pat=pat,
|
||||
token=token,
|
||||
base_url=api_base,
|
||||
).auth_helper
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
@ -109,7 +124,7 @@ class Clarifai(VectorStore):
|
||||
ids
|
||||
), "Number of text inputs and input ids should be the same."
|
||||
|
||||
input_obj = Inputs(app_id=self._app_id, user_id=self._user_id)
|
||||
input_obj = Inputs.from_auth_helper(auth=self._auth)
|
||||
batch_size = 32
|
||||
input_job_ids = []
|
||||
for idx in range(0, length, batch_size):
|
||||
@ -149,7 +164,7 @@ class Clarifai(VectorStore):
|
||||
def similarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
k: Optional[int] = None,
|
||||
filters: Optional[dict] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
@ -157,7 +172,8 @@ class Clarifai(VectorStore):
|
||||
|
||||
Args:
|
||||
query (str): Query text to search for.
|
||||
k (int): Number of results to return. Defaults to 4.
|
||||
k (Optional[int]): Number of results to return. If not set,
|
||||
it'll take _number_of_docs. Defaults to None.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata.
|
||||
Defaults to None.
|
||||
|
||||
@ -175,10 +191,9 @@ class Clarifai(VectorStore):
|
||||
) from e
|
||||
|
||||
# Get number of docs to return
|
||||
if self._number_of_docs is not None:
|
||||
k = self._number_of_docs
|
||||
top_k = k or self._number_of_docs
|
||||
|
||||
search_obj = Search(user_id=self._user_id, app_id=self._app_id, top_k=k)
|
||||
search_obj = Search.from_auth_helper(auth=self._auth, top_k=top_k)
|
||||
rank = [{"text_raw": query}]
|
||||
# Add filter by metadata if provided.
|
||||
if filters is not None:
|
||||
@ -193,7 +208,7 @@ class Clarifai(VectorStore):
|
||||
|
||||
def hit_to_document(hit: resources_pb2.Hit) -> Tuple[Document, float]:
|
||||
metadata = json_format.MessageToDict(hit.input.data.metadata)
|
||||
h = {"Authorization": f"Key {self._pat}"}
|
||||
h = dict(self._auth.metadata)
|
||||
request = requests.get(hit.input.data.text.url, headers=h)
|
||||
|
||||
# override encoding by real educated guess as provided by chardet
|
||||
@ -215,19 +230,20 @@ class Clarifai(VectorStore):
|
||||
def similarity_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
k: Optional[int] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Run similarity search using Clarifai.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
k: Number of Documents to return.
|
||||
If not set, it'll take _number_of_docs. Defaults to None.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
docs_and_scores = self.similarity_search_with_score(query, **kwargs)
|
||||
docs_and_scores = self.similarity_search_with_score(query, k=k, **kwargs)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
@classmethod
|
||||
@ -240,6 +256,7 @@ class Clarifai(VectorStore):
|
||||
app_id: Optional[str] = None,
|
||||
number_of_docs: Optional[int] = None,
|
||||
pat: Optional[str] = None,
|
||||
token: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> Clarifai:
|
||||
"""Create a Clarifai vectorstore from a list of texts.
|
||||
@ -248,10 +265,14 @@ class Clarifai(VectorStore):
|
||||
user_id (str): User ID.
|
||||
app_id (str): App ID.
|
||||
texts (List[str]): List of texts to add.
|
||||
number_of_docs (Optional[int]): Number of documents to return
|
||||
during vector search. Defaults to None.
|
||||
metadatas (Optional[List[dict]]): Optional list of metadatas.
|
||||
number_of_docs (Optional[int]): Number of documents
|
||||
to return during vector search. Defaults to None.
|
||||
pat (Optional[str], optional): Personal access token.
|
||||
Defaults to None.
|
||||
token (Optional[str], optional): Session token. Defaults to None.
|
||||
metadatas (Optional[List[dict]]): Optional list
|
||||
of metadatas. Defaults to None.
|
||||
**kwargs: Additional keyword arguments to be passed to the Search.
|
||||
|
||||
Returns:
|
||||
Clarifai: Clarifai vectorstore.
|
||||
@ -261,6 +282,8 @@ class Clarifai(VectorStore):
|
||||
app_id=app_id,
|
||||
number_of_docs=number_of_docs,
|
||||
pat=pat,
|
||||
token=token,
|
||||
**kwargs,
|
||||
)
|
||||
clarifai_vector_db.add_texts(texts=texts, metadatas=metadatas)
|
||||
return clarifai_vector_db
|
||||
@ -274,6 +297,7 @@ class Clarifai(VectorStore):
|
||||
app_id: Optional[str] = None,
|
||||
number_of_docs: Optional[int] = None,
|
||||
pat: Optional[str] = None,
|
||||
token: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> Clarifai:
|
||||
"""Create a Clarifai vectorstore from a list of documents.
|
||||
@ -282,8 +306,11 @@ class Clarifai(VectorStore):
|
||||
user_id (str): User ID.
|
||||
app_id (str): App ID.
|
||||
documents (List[Document]): List of documents to add.
|
||||
number_of_docs (Optional[int]): Number of documents to return
|
||||
during vector search. Defaults to None.
|
||||
number_of_docs (Optional[int]): Number of documents
|
||||
to return during vector search. Defaults to None.
|
||||
pat (Optional[str], optional): Personal access token. Defaults to None.
|
||||
token (Optional[str], optional): Session token. Defaults to None.
|
||||
**kwargs: Additional keyword arguments to be passed to the Search.
|
||||
|
||||
Returns:
|
||||
Clarifai: Clarifai vectorstore.
|
||||
@ -297,4 +324,6 @@ class Clarifai(VectorStore):
|
||||
number_of_docs=number_of_docs,
|
||||
pat=pat,
|
||||
metadatas=metadatas,
|
||||
token=token,
|
||||
**kwargs,
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user