mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-06 10:33:38 +00:00
Moved model check into cpu only paths
This commit is contained in:
parent
8aba2c9009
commit
3d10110314
@ -111,10 +111,6 @@ async def completions(request: CompletionRequest):
|
|||||||
'''
|
'''
|
||||||
Completes a GPT4All model response.
|
Completes a GPT4All model response.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
if request.model != settings.model:
|
|
||||||
raise HTTPException(status_code=400, detail=f"The GPT4All inference server is booted to only infer: `{settings.model}`")
|
|
||||||
|
|
||||||
if settings.inference_mode == "gpu":
|
if settings.inference_mode == "gpu":
|
||||||
params = request.dict(exclude={'model', 'prompt', 'max_tokens', 'n'})
|
params = request.dict(exclude={'model', 'prompt', 'max_tokens', 'n'})
|
||||||
params["max_new_tokens"] = request.max_tokens
|
params["max_new_tokens"] = request.max_tokens
|
||||||
@ -170,6 +166,10 @@ async def completions(request: CompletionRequest):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
if request.model != settings.model:
|
||||||
|
raise HTTPException(status_code=400,
|
||||||
|
detail=f"The GPT4All inference server is booted to only infer: `{settings.model}`")
|
||||||
|
|
||||||
if isinstance(request.prompt, list):
|
if isinstance(request.prompt, list):
|
||||||
if len(request.prompt) > 1:
|
if len(request.prompt) > 1:
|
||||||
raise HTTPException(status_code=400, detail="Can only infer one inference per request in CPU mode.")
|
raise HTTPException(status_code=400, detail="Can only infer one inference per request in CPU mode.")
|
||||||
|
Loading…
Reference in New Issue
Block a user