fix(chat_dashboard): improve JSON parsing robustness in output parser

Adds error handling and recovery mechanisms for malformed JSON responses from LLM models in the chat dashboard scene. Changes: - Add try-catch blocks with progressive fallback strategies for JSON parsing - Implement _clean_json_string() method to handle common JSON formatting issues - Add _extract_json_fallback() method for regex-based JSON extraction - Add proper logging for JSON parsing failures and recovery attempts
2025-07-31 15:47:05 +00:00 · 2025-07-15 14:56:14 +08:00 · 2025-07-15 14:56:14 +08:00 · 0e260e2aea
commit 0e260e2aea
parent 441cb55377
1 changed files with 51 additions and 1 deletions
--- a/packages/dbgpt-app/src/dbgpt_app/scene/chat_dashboard/out_parser.py
+++ b/packages/dbgpt-app/src/dbgpt_app/scene/chat_dashboard/out_parser.py
@ -23,7 +23,20 @@ class ChatDashboardOutputParser(BaseOutputParser):
    def parse_prompt_response(self, model_out_text):
        clean_str = super().parse_prompt_response(model_out_text)
        print("clean prompt response:", clean_str)
-        response = json.loads(clean_str)
+
+        try:
+            response = json.loads(clean_str)
+        except json.JSONDecodeError as e:
+            logger.warning(f"JSON parsing failed: {e}. Attempting to clean and retry.")
+            cleaned_str = self._clean_json_string(clean_str)
+            try:
+                response = json.loads(cleaned_str)
+            except json.JSONDecodeError:
+                logger.warning("JSON cleaning failed. Attempting fallback extraction.")
+                response = self._extract_json_fallback(clean_str)
+                if response is None:
+                    raise ValueError(f"Unable to parse JSON from response: {clean_str}")
+
        chart_items: List[ChartItem] = []
        if not isinstance(response, list):
            response = [response]
@ -38,6 +51,43 @@ class ChatDashboardOutputParser(BaseOutputParser):
            )
        return chart_items

+    def _clean_json_string(self, json_str: str) -> str:
+        """Clean common JSON formatting issues."""
+        # Remove leading/trailing whitespace
+        json_str = json_str.strip()
+
+        # Remove markdown code blocks if present
+        if json_str.startswith("```"):
+            lines = json_str.split("\n")
+            if len(lines) > 1:
+                # Remove first line (```json or ```)
+                json_str = "\n".join(lines[1:])
+                # Remove last line if it's just ```
+                if json_str.strip().endswith("```"):
+                    json_str = json_str.strip()[:-3]
+
+        # Fix common escaping issues
+        json_str = json_str.replace('\\"', '"')
+        json_str = json_str.replace("\\\\", "\\")
+
+        return json_str.strip()
+
+    def _extract_json_fallback(self, text: str) -> dict:
+        """Extract JSON using regex as fallback."""
+        import re
+
+        # Look for JSON-like structures
+        json_pattern = r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}"
+        matches = re.findall(json_pattern, text, re.DOTALL)
+
+        for match in matches:
+            try:
+                return json.loads(match)
+            except json.JSONDecodeError:
+                continue
+
+        return None
+
    def parse_view_response(self, speak, data, prompt_response) -> str:
        return json.dumps(data.prepare_dict(), ensure_ascii=False)