From 9246ec6b3672bd57ee5b230d48ff9e24b1ab1756 Mon Sep 17 00:00:00 2001
From: Barun Amalkumar Halder <barunhaldernvs@gmail.com>
Date: Mon, 25 Mar 2024 17:28:05 -0700
Subject: [PATCH] community[patch] : [Fiddler] ensure dataset is not added if
 model is present (#19293)

**Description:**
- minor PR to speed up onboarding by not trying to add a dataset, if a
model is already present.
- replace batch publish API with streaming when single events are
published.

**Dependencies:** any dependencies required for this change
**Twitter handle:** behalder

Co-authored-by: Barun Halder <barun@fiddler.ai>
---
 .../callbacks/fiddler_callback.py             | 79 ++++++++++---------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/libs/community/langchain_community/callbacks/fiddler_callback.py b/libs/community/langchain_community/callbacks/fiddler_callback.py
index 3f5edefde23..c8dc9f0494e 100644
--- a/libs/community/langchain_community/callbacks/fiddler_callback.py
+++ b/libs/community/langchain_community/callbacks/fiddler_callback.py
@@ -131,43 +131,42 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
                 dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
                 dataset_info.columns[i].possible_values = [SUCCESS, FAILURE]
 
-        if self.model not in self.fiddler_client.get_dataset_names(self.project):
-            print(  # noqa: T201
-                f"adding dataset {self.model} to project {self.project}."
-                "This only has to be done once."
-            )
-            try:
-                self.fiddler_client.upload_dataset(
-                    project_id=self.project,
-                    dataset_id=self.model,
-                    dataset={"train": self._df},
-                    info=dataset_info,
-                )
-            except Exception as e:
-                print(  # noqa: T201
-                    f"Error adding dataset {self.model}: {e}."
-                    "Fiddler integration will not work."
-                )
-                raise e
-
-        model_info = self.fdl.ModelInfo.from_dataset_info(
-            dataset_info=dataset_info,
-            dataset_id="train",
-            model_task=self.fdl.ModelTask.LLM,
-            features=[PROMPT, CONTEXT, RESPONSE],
-            target=FEEDBACK,
-            metadata_cols=[
-                RUN_ID,
-                TOTAL_TOKENS,
-                PROMPT_TOKENS,
-                COMPLETION_TOKENS,
-                MODEL_NAME,
-                DURATION,
-            ],
-            custom_features=self.custom_features,
-        )
-
         if self.model not in self.fiddler_client.get_model_names(self.project):
+            if self.model not in self.fiddler_client.get_dataset_names(self.project):
+                print(  # noqa: T201
+                    f"adding dataset {self.model} to project {self.project}."
+                    "This only has to be done once."
+                )
+                try:
+                    self.fiddler_client.upload_dataset(
+                        project_id=self.project,
+                        dataset_id=self.model,
+                        dataset={"train": self._df},
+                        info=dataset_info,
+                    )
+                except Exception as e:
+                    print(  # noqa: T201
+                        f"Error adding dataset {self.model}: {e}."
+                        "Fiddler integration will not work."
+                    )
+                    raise e
+
+            model_info = self.fdl.ModelInfo.from_dataset_info(
+                dataset_info=dataset_info,
+                dataset_id="train",
+                model_task=self.fdl.ModelTask.LLM,
+                features=[PROMPT, CONTEXT, RESPONSE],
+                target=FEEDBACK,
+                metadata_cols=[
+                    RUN_ID,
+                    TOTAL_TOKENS,
+                    PROMPT_TOKENS,
+                    COMPLETION_TOKENS,
+                    MODEL_NAME,
+                    DURATION,
+                ],
+                custom_features=self.custom_features,
+            )
             print(  # noqa: T201
                 f"adding model {self.model} to project {self.project}."
                 "This only has to be done once."  # noqa: T201
@@ -286,7 +285,13 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
                 df[key] = [value] * prompt_count if isinstance(value, int) else value
 
         try:
-            self.fiddler_client.publish_events_batch(self.project, self.model, df)
+            if df.shape[0] > 1:
+                self.fiddler_client.publish_events_batch(self.project, self.model, df)
+            else:
+                df_dict = df.to_dict(orient="records")
+                self.fiddler_client.publish_event(
+                    self.project, self.model, event=df_dict[0]
+                )
         except Exception as e:
             print(  # noqa: T201
                 f"Error publishing events to fiddler: {e}. continuing..."