gpt4all/gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
Zach Nussbaum 8aba2c9009
GPU Inference Server (#1112)
* feat: local inference server

* fix: source to use bash + vars

* chore: isort and black

* fix: make file + inference mode

* chore: logging

* refactor: remove old links

* fix: add new env vars

* feat: hf inference server

* refactor: remove old links

* test: batch and single response

* chore: black + isort

* separate gpu and cpu dockerfiles

* moved gpu to separate dockerfile

* Fixed test endpoints

* Edits to API. server won't start due to failed instantiation error

* Method signature

* fix: gpu_infer

* tests: fix tests

---------

Co-authored-by: Andriy Mulyar <andriy.mulyar@gmail.com>
2023-07-21 15:13:29 -04:00

46 lines
1.3 KiB
Python

"""
Use the OpenAI python API to test gpt4all models.
"""
import openai
openai.api_base = "http://localhost:4891/v1"
openai.api_key = "not needed for a local LLM"
def test_completion():
model = "ggml-mpt-7b-chat.bin"
prompt = "Who is Michael Jordan?"
response = openai.Completion.create(
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
assert len(response['choices'][0]['text']) > len(prompt)
def test_streaming_completion():
model = "ggml-mpt-7b-chat.bin"
prompt = "Who is Michael Jordan?"
tokens = []
for resp in openai.Completion.create(
model=model,
prompt=prompt,
max_tokens=50,
temperature=0.28,
top_p=0.95,
n=1,
echo=True,
stream=True):
tokens.append(resp.choices[0].text)
assert (len(tokens) > 0)
assert (len("".join(tokens)) > len(prompt))
def test_batched_completion():
model = "ggml-mpt-7b-chat.bin"
prompt = "Who is Michael Jordan?"
response = openai.Completion.create(
model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
assert len(response['choices'][0]['text']) > len(prompt)
assert len(response['choices']) == 3