mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-31 08:32:32 +00:00
community[patch] : [Fiddler] ensure dataset is not added if model is present (#19293)
**Description:** - minor PR to speed up onboarding by not trying to add a dataset, if a model is already present. - replace batch publish API with streaming when single events are published. **Dependencies:** any dependencies required for this change **Twitter handle:** behalder Co-authored-by: Barun Halder <barun@fiddler.ai>
This commit is contained in:
parent
6e090280fd
commit
9246ec6b36
@ -131,43 +131,42 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
|
||||
dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
|
||||
dataset_info.columns[i].possible_values = [SUCCESS, FAILURE]
|
||||
|
||||
if self.model not in self.fiddler_client.get_dataset_names(self.project):
|
||||
print( # noqa: T201
|
||||
f"adding dataset {self.model} to project {self.project}."
|
||||
"This only has to be done once."
|
||||
)
|
||||
try:
|
||||
self.fiddler_client.upload_dataset(
|
||||
project_id=self.project,
|
||||
dataset_id=self.model,
|
||||
dataset={"train": self._df},
|
||||
info=dataset_info,
|
||||
)
|
||||
except Exception as e:
|
||||
print( # noqa: T201
|
||||
f"Error adding dataset {self.model}: {e}."
|
||||
"Fiddler integration will not work."
|
||||
)
|
||||
raise e
|
||||
|
||||
model_info = self.fdl.ModelInfo.from_dataset_info(
|
||||
dataset_info=dataset_info,
|
||||
dataset_id="train",
|
||||
model_task=self.fdl.ModelTask.LLM,
|
||||
features=[PROMPT, CONTEXT, RESPONSE],
|
||||
target=FEEDBACK,
|
||||
metadata_cols=[
|
||||
RUN_ID,
|
||||
TOTAL_TOKENS,
|
||||
PROMPT_TOKENS,
|
||||
COMPLETION_TOKENS,
|
||||
MODEL_NAME,
|
||||
DURATION,
|
||||
],
|
||||
custom_features=self.custom_features,
|
||||
)
|
||||
|
||||
if self.model not in self.fiddler_client.get_model_names(self.project):
|
||||
if self.model not in self.fiddler_client.get_dataset_names(self.project):
|
||||
print( # noqa: T201
|
||||
f"adding dataset {self.model} to project {self.project}."
|
||||
"This only has to be done once."
|
||||
)
|
||||
try:
|
||||
self.fiddler_client.upload_dataset(
|
||||
project_id=self.project,
|
||||
dataset_id=self.model,
|
||||
dataset={"train": self._df},
|
||||
info=dataset_info,
|
||||
)
|
||||
except Exception as e:
|
||||
print( # noqa: T201
|
||||
f"Error adding dataset {self.model}: {e}."
|
||||
"Fiddler integration will not work."
|
||||
)
|
||||
raise e
|
||||
|
||||
model_info = self.fdl.ModelInfo.from_dataset_info(
|
||||
dataset_info=dataset_info,
|
||||
dataset_id="train",
|
||||
model_task=self.fdl.ModelTask.LLM,
|
||||
features=[PROMPT, CONTEXT, RESPONSE],
|
||||
target=FEEDBACK,
|
||||
metadata_cols=[
|
||||
RUN_ID,
|
||||
TOTAL_TOKENS,
|
||||
PROMPT_TOKENS,
|
||||
COMPLETION_TOKENS,
|
||||
MODEL_NAME,
|
||||
DURATION,
|
||||
],
|
||||
custom_features=self.custom_features,
|
||||
)
|
||||
print( # noqa: T201
|
||||
f"adding model {self.model} to project {self.project}."
|
||||
"This only has to be done once." # noqa: T201
|
||||
@ -286,7 +285,13 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
|
||||
df[key] = [value] * prompt_count if isinstance(value, int) else value
|
||||
|
||||
try:
|
||||
self.fiddler_client.publish_events_batch(self.project, self.model, df)
|
||||
if df.shape[0] > 1:
|
||||
self.fiddler_client.publish_events_batch(self.project, self.model, df)
|
||||
else:
|
||||
df_dict = df.to_dict(orient="records")
|
||||
self.fiddler_client.publish_event(
|
||||
self.project, self.model, event=df_dict[0]
|
||||
)
|
||||
except Exception as e:
|
||||
print( # noqa: T201
|
||||
f"Error publishing events to fiddler: {e}. continuing..."
|
||||
|
Loading…
Reference in New Issue
Block a user