mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-08 04:25:46 +00:00
community[patch] : [Fiddler] ensure dataset is not added if model is present (#19293)
**Description:** - minor PR to speed up onboarding by not trying to add a dataset, if a model is already present. - replace batch publish API with streaming when single events are published. **Dependencies:** any dependencies required for this change **Twitter handle:** behalder Co-authored-by: Barun Halder <barun@fiddler.ai>
This commit is contained in:
parent
6e090280fd
commit
9246ec6b36
@ -131,43 +131,42 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
|
|||||||
dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
|
dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
|
||||||
dataset_info.columns[i].possible_values = [SUCCESS, FAILURE]
|
dataset_info.columns[i].possible_values = [SUCCESS, FAILURE]
|
||||||
|
|
||||||
if self.model not in self.fiddler_client.get_dataset_names(self.project):
|
|
||||||
print( # noqa: T201
|
|
||||||
f"adding dataset {self.model} to project {self.project}."
|
|
||||||
"This only has to be done once."
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
self.fiddler_client.upload_dataset(
|
|
||||||
project_id=self.project,
|
|
||||||
dataset_id=self.model,
|
|
||||||
dataset={"train": self._df},
|
|
||||||
info=dataset_info,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print( # noqa: T201
|
|
||||||
f"Error adding dataset {self.model}: {e}."
|
|
||||||
"Fiddler integration will not work."
|
|
||||||
)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
model_info = self.fdl.ModelInfo.from_dataset_info(
|
|
||||||
dataset_info=dataset_info,
|
|
||||||
dataset_id="train",
|
|
||||||
model_task=self.fdl.ModelTask.LLM,
|
|
||||||
features=[PROMPT, CONTEXT, RESPONSE],
|
|
||||||
target=FEEDBACK,
|
|
||||||
metadata_cols=[
|
|
||||||
RUN_ID,
|
|
||||||
TOTAL_TOKENS,
|
|
||||||
PROMPT_TOKENS,
|
|
||||||
COMPLETION_TOKENS,
|
|
||||||
MODEL_NAME,
|
|
||||||
DURATION,
|
|
||||||
],
|
|
||||||
custom_features=self.custom_features,
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.model not in self.fiddler_client.get_model_names(self.project):
|
if self.model not in self.fiddler_client.get_model_names(self.project):
|
||||||
|
if self.model not in self.fiddler_client.get_dataset_names(self.project):
|
||||||
|
print( # noqa: T201
|
||||||
|
f"adding dataset {self.model} to project {self.project}."
|
||||||
|
"This only has to be done once."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
self.fiddler_client.upload_dataset(
|
||||||
|
project_id=self.project,
|
||||||
|
dataset_id=self.model,
|
||||||
|
dataset={"train": self._df},
|
||||||
|
info=dataset_info,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print( # noqa: T201
|
||||||
|
f"Error adding dataset {self.model}: {e}."
|
||||||
|
"Fiddler integration will not work."
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
model_info = self.fdl.ModelInfo.from_dataset_info(
|
||||||
|
dataset_info=dataset_info,
|
||||||
|
dataset_id="train",
|
||||||
|
model_task=self.fdl.ModelTask.LLM,
|
||||||
|
features=[PROMPT, CONTEXT, RESPONSE],
|
||||||
|
target=FEEDBACK,
|
||||||
|
metadata_cols=[
|
||||||
|
RUN_ID,
|
||||||
|
TOTAL_TOKENS,
|
||||||
|
PROMPT_TOKENS,
|
||||||
|
COMPLETION_TOKENS,
|
||||||
|
MODEL_NAME,
|
||||||
|
DURATION,
|
||||||
|
],
|
||||||
|
custom_features=self.custom_features,
|
||||||
|
)
|
||||||
print( # noqa: T201
|
print( # noqa: T201
|
||||||
f"adding model {self.model} to project {self.project}."
|
f"adding model {self.model} to project {self.project}."
|
||||||
"This only has to be done once." # noqa: T201
|
"This only has to be done once." # noqa: T201
|
||||||
@ -286,7 +285,13 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
|
|||||||
df[key] = [value] * prompt_count if isinstance(value, int) else value
|
df[key] = [value] * prompt_count if isinstance(value, int) else value
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.fiddler_client.publish_events_batch(self.project, self.model, df)
|
if df.shape[0] > 1:
|
||||||
|
self.fiddler_client.publish_events_batch(self.project, self.model, df)
|
||||||
|
else:
|
||||||
|
df_dict = df.to_dict(orient="records")
|
||||||
|
self.fiddler_client.publish_event(
|
||||||
|
self.project, self.model, event=df_dict[0]
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print( # noqa: T201
|
print( # noqa: T201
|
||||||
f"Error publishing events to fiddler: {e}. continuing..."
|
f"Error publishing events to fiddler: {e}. continuing..."
|
||||||
|
Loading…
Reference in New Issue
Block a user