diff --git a/pilot/model/llm_out/proxy_llm.py b/pilot/model/llm_out/proxy_llm.py index c4423a1a6..3ec5d8504 100644 --- a/pilot/model/llm_out/proxy_llm.py +++ b/pilot/model/llm_out/proxy_llm.py @@ -77,11 +77,15 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048) text = "" for line in res.iter_lines(): if line: - json_data = line.split(b": ", 1)[1] - decoded_line = json_data.decode("utf-8") - if decoded_line.lower() != "[DONE]".lower(): - obj = json.loads(json_data) - if obj["choices"][0]["delta"].get("content") is not None: - content = obj["choices"][0]["delta"]["content"] - text += content - yield text + if not line.startswith(b"data: "): + error_message = line.decode("utf-8") + yield error_message + else: + json_data = line.split(b": ", 1)[1] + decoded_line = json_data.decode("utf-8") + if decoded_line.lower() != "[DONE]".lower(): + obj = json.loads(json_data) + if obj["choices"][0]["delta"].get("content") is not None: + content = obj["choices"][0]["delta"]["content"] + text += content + yield text