From 3d101103148c6ac6ec1a5fb84520aae515e7cd24 Mon Sep 17 00:00:00 2001
From: Andriy Mulyar <andriy.mulyar@gmail.com>
Date: Mon, 24 Jul 2023 11:34:50 -0400
Subject: [PATCH] Moved model check into cpu only paths

---
 gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
index 660b2000..5df9ddb7 100644
--- a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
+++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py
@@ -111,10 +111,6 @@ async def completions(request: CompletionRequest):
     '''
     Completes a GPT4All model response.
     '''
-
-    if request.model != settings.model:
-        raise HTTPException(status_code=400, detail=f"The GPT4All inference server is booted to only infer: `{settings.model}`")
-
     if settings.inference_mode == "gpu":
         params = request.dict(exclude={'model', 'prompt', 'max_tokens', 'n'})
         params["max_new_tokens"] = request.max_tokens
@@ -170,6 +166,10 @@ async def completions(request: CompletionRequest):
 
     else:
 
+        if request.model != settings.model:
+            raise HTTPException(status_code=400,
+                                detail=f"The GPT4All inference server is booted to only infer: `{settings.model}`")
+
         if isinstance(request.prompt, list):
             if len(request.prompt) > 1:
                 raise HTTPException(status_code=400, detail="Can only infer one inference per request in CPU mode.")