diff --git a/pilot/server/vicuna_server.py b/pilot/server/vicuna_server.py index 91aba5dec..568503d6c 100644 --- a/pilot/server/vicuna_server.py +++ b/pilot/server/vicuna_server.py @@ -48,8 +48,8 @@ def generate_stream_gate(params): params, DEVICE, MAX_POSITION_EMBEDDINGS, - 2, ): + print("output: ", output) ret = { "text": output, "error_code": 0, @@ -68,7 +68,7 @@ async def api_generate_stream(request: Request): global model_semaphore, global_counter global_counter += 1 params = await request.json() - + print(model, tokenizer, params, DEVICE) if model_semaphore is None: model_semaphore = asyncio.Semaphore(LIMIT_MODEL_CONCURRENCY) await model_semaphore.acquire() diff --git a/pilot/server/webserver.py b/pilot/server/webserver.py index db5eca86c..7b37895a1 100644 --- a/pilot/server/webserver.py +++ b/pilot/server/webserver.py @@ -156,12 +156,13 @@ def http_bot(state, temperature, max_new_tokens, request: gr.Request): prompt = state.get_prompt() skip_echo_len = compute_skip_echo_len(prompt) + logger.info(f"State: {state}") payload = { "model": model_name, "prompt": prompt, "temperature": temperature, "max_new_tokens": max_new_tokens, - "stop": state.sep if state.sep_style == SeparatorStyle.SINGLE else None, + "stop": state.sep, } logger.info(f"Request: \n {payload}") @@ -179,6 +180,7 @@ def http_bot(state, temperature, max_new_tokens, request: gr.Request): for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"): if chunk: data = json.loads(chunk.decode()) + logger.info(f"Response: {data}") if data["error_code"] == 0: output = data["text"][skip_echo_len].strip() output = post_process_code(output)