diff --git a/pilot/model/llm_out/proxy_llm.py b/pilot/model/llm_out/proxy_llm.py
index 68512ec3c..6dd1bfc2b 100644
--- a/pilot/model/llm_out/proxy_llm.py
+++ b/pilot/model/llm_out/proxy_llm.py
@@ -66,6 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
         "messages": history,
         "temperature": params.get("temperature"),
         "max_tokens": params.get("max_new_tokens"),
+        "stream": True
     }
 
     res = requests.post(
@@ -75,8 +76,32 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
     text = ""
     for line in res.iter_lines():
         if line:
-            decoded_line = line.decode("utf-8")
-            json_line = json.loads(decoded_line)
-            print(json_line)
-            text += json_line["choices"][0]["message"]["content"]
+            json_data = line.split(b': ', 1)[1]
+            decoded_line = json_data.decode("utf-8")
+            if decoded_line.lower() != '[DONE]'.lower():
+                obj = json.loads(json_data)
+                if obj['choices'][0]['delta'].get('content') is not None:
+                    content = obj['choices'][0]['delta']['content']
+                    text += content
             yield text
+
+    # native result.
+    # payloads = {
+    #     "model": "gpt-3.5-turbo",  # just for test, remove this later
+    #     "messages": history,
+    #     "temperature": params.get("temperature"),
+    #     "max_tokens": params.get("max_new_tokens"),
+    # }
+    #
+    # res = requests.post(
+    #     CFG.proxy_server_url, headers=headers, json=payloads, stream=True
+    # )
+    #
+    # text = ""
+    # line = res.content
+    # if line:
+    #     decoded_line = line.decode("utf-8")
+    #     json_line = json.loads(decoded_line)
+    #     print(json_line)
+    #     text += json_line["choices"][0]["message"]["content"]
+    #     yield text
\ No newline at end of file