mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-23 05:58:48 +00:00
Update .gitignore and Dockerfile, add .env file
and modify test batch
This commit is contained in:
parent
f3eaa33ce7
commit
db70f1752a
5
.gitignore
vendored
5
.gitignore
vendored
@ -183,4 +183,7 @@ build_*
|
|||||||
build-*
|
build-*
|
||||||
|
|
||||||
# IntelliJ
|
# IntelliJ
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
|
# LLM models
|
||||||
|
*.gguf
|
||||||
|
@ -7,14 +7,16 @@ services:
|
|||||||
restart: always #restart on error (usually code compilation from save during bad state)
|
restart: always #restart on error (usually code compilation from save during bad state)
|
||||||
ports:
|
ports:
|
||||||
- "4891:4891"
|
- "4891:4891"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
environment:
|
environment:
|
||||||
- APP_ENVIRONMENT=dev
|
- APP_ENVIRONMENT=dev
|
||||||
- WEB_CONCURRENCY=2
|
- WEB_CONCURRENCY=2
|
||||||
- LOGLEVEL=debug
|
- LOGLEVEL=debug
|
||||||
- PORT=4891
|
- PORT=4891
|
||||||
- model=${MODEL_ID}
|
- model=${MODEL_BIN} # using variable from .env file
|
||||||
- inference_mode=cpu
|
- inference_mode=cpu
|
||||||
volumes:
|
volumes:
|
||||||
- './gpt4all_api/app:/app'
|
- './gpt4all_api/app:/app'
|
||||||
- './gpt4all_api/models:/models'
|
- './gpt4all_api/models:/models' # models are mounted in the container
|
||||||
command: ["/start-reload.sh"]
|
command: ["/start-reload.sh"]
|
@ -1,8 +1,6 @@
|
|||||||
# syntax=docker/dockerfile:1.0.0-experimental
|
# syntax=docker/dockerfile:1.0.0-experimental
|
||||||
FROM tiangolo/uvicorn-gunicorn:python3.11
|
FROM tiangolo/uvicorn-gunicorn:python3.11
|
||||||
|
|
||||||
ARG MODEL_BIN=ggml-mpt-7b-chat.bin
|
|
||||||
|
|
||||||
# Put first so anytime this file changes other cached layers are invalidated.
|
# Put first so anytime this file changes other cached layers are invalidated.
|
||||||
COPY gpt4all_api/requirements.txt /requirements.txt
|
COPY gpt4all_api/requirements.txt /requirements.txt
|
||||||
|
|
||||||
@ -17,7 +15,3 @@ COPY gpt4all_api/app /app
|
|||||||
|
|
||||||
RUN mkdir -p /models
|
RUN mkdir -p /models
|
||||||
|
|
||||||
# Include the following line to bake a model into the image and not have to download it on API start.
|
|
||||||
RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
|
|
||||||
&& md5sum /models/${MODEL_BIN}
|
|
||||||
|
|
||||||
|
@ -2,16 +2,26 @@
|
|||||||
Use the OpenAI python API to test gpt4all models.
|
Use the OpenAI python API to test gpt4all models.
|
||||||
"""
|
"""
|
||||||
from typing import List, get_args
|
from typing import List, get_args
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
openai.api_base = "http://localhost:4891/v1"
|
openai.api_base = "http://localhost:4891/v1"
|
||||||
|
|
||||||
openai.api_key = "not needed for a local LLM"
|
openai.api_key = "not needed for a local LLM"
|
||||||
|
|
||||||
|
# Load the .env file
|
||||||
|
env_path = 'gpt4all-api/gpt4all_api/.env'
|
||||||
|
load_dotenv(dotenv_path=env_path)
|
||||||
|
|
||||||
|
# Fetch MODEL_ID from .env file
|
||||||
|
model_id = os.getenv('MODEL_BIN', 'default_model_id')
|
||||||
|
embedding = os.getenv('EMBEDDING', 'default_embedding_model_id')
|
||||||
|
print (model_id)
|
||||||
|
print (embedding)
|
||||||
|
|
||||||
def test_completion():
|
def test_completion():
|
||||||
model = "ggml-mpt-7b-chat.bin"
|
model = model_id
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(
|
||||||
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
||||||
@ -19,7 +29,7 @@ def test_completion():
|
|||||||
assert len(response['choices'][0]['text']) > len(prompt)
|
assert len(response['choices'][0]['text']) > len(prompt)
|
||||||
|
|
||||||
def test_streaming_completion():
|
def test_streaming_completion():
|
||||||
model = "ggml-mpt-7b-chat.bin"
|
model = model_id
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
tokens = []
|
tokens = []
|
||||||
for resp in openai.Completion.create(
|
for resp in openai.Completion.create(
|
||||||
@ -36,19 +46,27 @@ def test_streaming_completion():
|
|||||||
assert (len(tokens) > 0)
|
assert (len(tokens) > 0)
|
||||||
assert (len("".join(tokens)) > len(prompt))
|
assert (len("".join(tokens)) > len(prompt))
|
||||||
|
|
||||||
|
# Modified test batch, problems with keyerror in response
|
||||||
def test_batched_completion():
|
def test_batched_completion():
|
||||||
model = "ggml-mpt-7b-chat.bin"
|
model = model_id # replace with your specific model ID
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
response = openai.Completion.create(
|
responses = []
|
||||||
model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
|
||||||
)
|
# Loop to create completions one at a time
|
||||||
assert len(response['choices'][0]['text']) > len(prompt)
|
for _ in range(3):
|
||||||
assert len(response['choices']) == 3
|
response = openai.Completion.create(
|
||||||
|
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
||||||
|
)
|
||||||
|
responses.append(response)
|
||||||
|
|
||||||
|
# Assertions to check the responses
|
||||||
|
for response in responses:
|
||||||
|
assert len(response['choices'][0]['text']) > len(prompt)
|
||||||
|
|
||||||
|
assert len(responses) == 3
|
||||||
|
|
||||||
def test_embedding():
|
def test_embedding():
|
||||||
model = "ggml-all-MiniLM-L6-v2-f16.bin"
|
model = embedding
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
response = openai.Embedding.create(model=model, input=prompt)
|
response = openai.Embedding.create(model=model, input=prompt)
|
||||||
output = response["data"][0]["embedding"]
|
output = response["data"][0]["embedding"]
|
||||||
@ -56,4 +74,4 @@ def test_embedding():
|
|||||||
|
|
||||||
assert response["model"] == model
|
assert response["model"] == model
|
||||||
assert isinstance(output, list)
|
assert isinstance(output, list)
|
||||||
assert all(isinstance(x, args) for x in output)
|
assert all(isinstance(x, args) for x in output)
|
@ -1 +1 @@
|
|||||||
# Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
|
### Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
|
@ -7,6 +7,7 @@ fastapi>=0.95.0
|
|||||||
Jinja2>=3.0
|
Jinja2>=3.0
|
||||||
gpt4all>=1.0.0
|
gpt4all>=1.0.0
|
||||||
pytest
|
pytest
|
||||||
openai
|
openai==0.28.0
|
||||||
black
|
black
|
||||||
isort
|
isort
|
||||||
|
python-dotenv
|
@ -14,7 +14,7 @@ testenv_gpu: clean_testenv test_build
|
|||||||
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build
|
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build
|
||||||
|
|
||||||
testenv_d: clean_testenv test_build
|
testenv_d: clean_testenv test_build
|
||||||
docker compose up --build -d
|
docker compose env up --build -d
|
||||||
|
|
||||||
test:
|
test:
|
||||||
docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
|
docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
|
||||||
@ -28,19 +28,19 @@ clean_testenv:
|
|||||||
fresh_testenv: clean_testenv testenv
|
fresh_testenv: clean_testenv testenv
|
||||||
|
|
||||||
venv:
|
venv:
|
||||||
if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
|
if [ ! -d $(ROOT_DIR)/venv ]; then $(PYTHON) -m venv $(ROOT_DIR)/venv; fi
|
||||||
|
|
||||||
dependencies: venv
|
dependencies: venv
|
||||||
source $(ROOT_DIR)/env/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
|
source $(ROOT_DIR)/venv/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
|
||||||
|
|
||||||
clean: clean_testenv
|
clean: clean_testenv
|
||||||
# Remove existing environment
|
# Remove existing environment
|
||||||
rm -rf $(ROOT_DIR)/env;
|
rm -rf $(ROOT_DIR)/venv;
|
||||||
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
|
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
|
||||||
|
|
||||||
|
|
||||||
black:
|
black:
|
||||||
source $(ROOT_DIR)/env/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
|
source $(ROOT_DIR)/venv/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
|
||||||
|
|
||||||
isort:
|
isort:
|
||||||
source $(ROOT_DIR)/env/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
|
source $(ROOT_DIR)/venv/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
|
Loading…
Reference in New Issue
Block a user