From 3d101103148c6ac6ec1a5fb84520aae515e7cd24 Mon Sep 17 00:00:00 2001 From: Andriy Mulyar Date: Mon, 24 Jul 2023 11:34:50 -0400 Subject: [PATCH] Moved model check into cpu only paths --- gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py index 660b2000..5df9ddb7 100644 --- a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py +++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py @@ -111,10 +111,6 @@ async def completions(request: CompletionRequest): ''' Completes a GPT4All model response. ''' - - if request.model != settings.model: - raise HTTPException(status_code=400, detail=f"The GPT4All inference server is booted to only infer: `{settings.model}`") - if settings.inference_mode == "gpu": params = request.dict(exclude={'model', 'prompt', 'max_tokens', 'n'}) params["max_new_tokens"] = request.max_tokens @@ -170,6 +166,10 @@ async def completions(request: CompletionRequest): else: + if request.model != settings.model: + raise HTTPException(status_code=400, + detail=f"The GPT4All inference server is booted to only infer: `{settings.model}`") + if isinstance(request.prompt, list): if len(request.prompt) > 1: raise HTTPException(status_code=400, detail="Can only infer one inference per request in CPU mode.")