diff --git a/gpt4all-chat/.flake8 b/gpt4all-chat/.flake8
index 535d840a..a339d60f 100644
--- a/gpt4all-chat/.flake8
+++ b/gpt4all-chat/.flake8
@@ -2,4 +2,4 @@
 [flake8]
 exclude = .*,__pycache__
 max-line-length = 120
-extend-ignore = B001,C408,D,DAR,E221,E303,E722,E741,E800,N801,N806,P101,S101,S324,S404,S406,S410,S603,WPS100,WPS110,WPS111,WPS113,WPS114,WPS115,WPS120,WPS2,WPS300,WPS301,WPS304,WPS305,WPS306,WPS309,WPS316,WPS317,WPS318,WPS319,WPS322,WPS323,WPS326,WPS329,WPS330,WPS332,WPS336,WPS337,WPS347,WPS360,WPS361,WPS414,WPS420,WPS421,WPS429,WPS430,WPS431,WPS432,WPS433,WPS437,WPS440,WPS440,WPS441,WPS442,WPS457,WPS458,WPS460,WPS462,WPS463,WPS473,WPS501,WPS504,WPS505,WPS508,WPS509,WPS510,WPS515,WPS516,WPS519,WPS529,WPS531,WPS602,WPS604,WPS605,WPS608,WPS609,WPS613,WPS615
+extend-ignore = B001,C408,D,DAR,E221,E303,E722,E741,E800,N801,N806,P101,S101,S324,S404,S406,S410,S603,WPS100,WPS110,WPS111,WPS113,WPS114,WPS115,WPS120,WPS2,WPS300,WPS301,WPS304,WPS305,WPS306,WPS309,WPS316,WPS317,WPS318,WPS319,WPS322,WPS323,WPS326,WPS329,WPS330,WPS332,WPS336,WPS337,WPS347,WPS360,WPS361,WPS407,WPS414,WPS420,WPS421,WPS429,WPS430,WPS431,WPS432,WPS433,WPS437,WPS440,WPS440,WPS441,WPS442,WPS457,WPS458,WPS460,WPS462,WPS463,WPS473,WPS501,WPS504,WPS505,WPS508,WPS509,WPS510,WPS515,WPS516,WPS519,WPS520,WPS529,WPS531,WPS602,WPS604,WPS605,WPS608,WPS609,WPS613,WPS615
diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt
index 06dd3237..93e37ed9 100644
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@@ -103,10 +103,30 @@ add_subdirectory(../gpt4all-backend llmodel)
 
 if (GPT4ALL_TEST)
     enable_testing()
+
+    # Llama-3.2-1B model
+    set(TEST_MODEL "Llama-3.2-1B-Instruct-Q4_0.gguf")
+    set(TEST_MODEL_MD5 "48ff0243978606fdba19d899b77802fc")
+    set(TEST_MODEL_PATH "${CMAKE_BINARY_DIR}/resources/${TEST_MODEL}")
+    set(TEST_MODEL_URL "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/${TEST_MODEL}")
+
+    # Create a custom command to download the file if it does not exist or if the checksum does not match
+    add_custom_command(
+        OUTPUT "${TEST_MODEL_PATH}"
+        COMMAND ${CMAKE_COMMAND} -E echo "Downloading test model from ${TEST_MODEL_URL} ..."
+        COMMAND ${CMAKE_COMMAND} -DURL="${TEST_MODEL_URL}" -DOUTPUT_PATH="${TEST_MODEL_PATH}" -DEXPECTED_MD5="${TEST_MODEL_MD5}" -P "${CMAKE_SOURCE_DIR}/cmake/download_model.cmake"
+        DEPENDS "${CMAKE_SOURCE_DIR}/cmake/download_model.cmake"
+    )
+
+    # Define a custom target that depends on the downloaded model
+    add_custom_target(download_test_model
+        DEPENDS "${TEST_MODEL_PATH}"
+    )
+
     add_subdirectory(tests)
 
     # The 'check' target makes sure the tests and their dependencies are up-to-date before running them
-    add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure DEPENDS chat gpt4all_tests)
+    add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure DEPENDS download_test_model chat gpt4all_tests)
 endif()
 
 set(CHAT_EXE_RESOURCES)
diff --git a/gpt4all-chat/cmake/download_model.cmake b/gpt4all-chat/cmake/download_model.cmake
new file mode 100644
index 00000000..11e96ddb
--- /dev/null
+++ b/gpt4all-chat/cmake/download_model.cmake
@@ -0,0 +1,12 @@
+if(NOT DEFINED URL OR NOT DEFINED OUTPUT_PATH OR NOT DEFINED EXPECTED_MD5)
+    message(FATAL_ERROR "Usage: cmake -DURL=<url> -DOUTPUT_PATH=<path> -DEXPECTED_MD5=<md5> -P download_model.cmake")
+endif()
+
+message(STATUS "Downloading model from ${URL} to ${OUTPUT_PATH} ...")
+
+file(DOWNLOAD "${URL}" "${OUTPUT_PATH}" EXPECTED_MD5 "${EXPECTED_MD5}" STATUS status)
+
+list(GET status 0 status_code)
+if(NOT status_code EQUAL 0)
+    message(FATAL_ERROR "Failed to download model: ${status}")
+endif()
diff --git a/gpt4all-chat/tests/CMakeLists.txt b/gpt4all-chat/tests/CMakeLists.txt
index 1e20a4ec..76f3cfc6 100644
--- a/gpt4all-chat/tests/CMakeLists.txt
+++ b/gpt4all-chat/tests/CMakeLists.txt
@@ -15,7 +15,7 @@ add_test(NAME ChatPythonTests
     COMMAND ${Python3_EXECUTABLE} -m pytest --color=yes "${CMAKE_CURRENT_SOURCE_DIR}/python"
 )
 set_tests_properties(ChatPythonTests PROPERTIES
-    ENVIRONMENT "CHAT_EXECUTABLE=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/chat"
+    ENVIRONMENT "CHAT_EXECUTABLE=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/chat;TEST_MODEL_PATH=${TEST_MODEL_PATH}"
     TIMEOUT 60
 )
 
diff --git a/gpt4all-chat/tests/python/test_server_api.py b/gpt4all-chat/tests/python/test_server_api.py
index 95c49cac..e1b0e476 100644
--- a/gpt4all-chat/tests/python/test_server_api.py
+++ b/gpt4all-chat/tests/python/test_server_api.py
@@ -1,9 +1,11 @@
 import os
+import shutil
 import signal
 import subprocess
 import sys
 import tempfile
 import textwrap
+from contextlib import contextmanager
 from pathlib import Path
 from subprocess import CalledProcessError
 from typing import Any, Iterator
@@ -20,59 +22,82 @@ class Requestor:
         self.session = requests.Session()
         self.http_adapter = self.session.adapters['http://']
 
-    def get(self, path: str, *, wait: bool = False) -> Any:
-        return self._request('GET', path, wait)
+    def get(self, path: str, *, raise_for_status: bool = True, wait: bool = False) -> Any:
+        return self._request('GET', path, raise_for_status=raise_for_status, wait=wait)
 
-    def _request(self, method: str, path: str, wait: bool) -> Any:
+    def post(self, path: str, data: dict[str, Any] | None, *, raise_for_status: bool = True, wait: bool = False) -> Any:
+        return self._request('POST', path, data, raise_for_status=raise_for_status, wait=wait)
+
+    def _request(
+        self, method: str, path: str, data: dict[str, Any] | None = None, *, raise_for_status: bool, wait: bool,
+    ) -> Any:
         if wait:
             retry = Retry(total=None, connect=10, read=False, status=0, other=0, backoff_factor=.01)
         else:
             retry = Retry(total=False)
         self.http_adapter.max_retries = retry  # type: ignore[attr-defined]
 
-        resp = self.session.request(method, f'http://localhost:4891/v1/{path}')
-        resp.raise_for_status()
-        return resp.json()
+        resp = self.session.request(method, f'http://localhost:4891/v1/{path}', json=data)
+        if raise_for_status:
+            resp.raise_for_status()
+            return resp.json()
+
+        try:
+            json_data = resp.json()
+        except ValueError:
+            json_data = None
+        return resp.status_code, json_data
 
 
 request = Requestor()
 
 
-@pytest.fixture
-def chat_server_config() -> Iterator[dict[str, str]]:
+def create_chat_server_config(tmpdir: Path, model_copied: bool = False) -> dict[str, str]:
+    xdg_confdir = tmpdir / 'config'
+    app_confdir = xdg_confdir / 'nomic.ai'
+    app_confdir.mkdir(parents=True)
+    with open(app_confdir / 'GPT4All.ini', 'w') as conf:
+        conf.write(textwrap.dedent(f"""\
+            [General]
+            serverChat=true
+
+            [download]
+            lastVersionStarted={config.APP_VERSION}
+
+            [network]
+            isActive=false
+            usageStatsActive=false
+        """))
+
+    if model_copied:
+        app_data_dir = tmpdir / 'share' / 'nomic.ai' / 'GPT4All'
+        app_data_dir.mkdir(parents=True)
+        local_env_file_path = Path(os.environ['TEST_MODEL_PATH'])
+        shutil.copy(local_env_file_path, app_data_dir / local_env_file_path.name)
+
+    return dict(
+        os.environ,
+        XDG_CACHE_HOME=str(tmpdir / 'cache'),
+        XDG_DATA_HOME=str(tmpdir / 'share'),
+        XDG_CONFIG_HOME=str(xdg_confdir),
+        APPIMAGE=str(tmpdir),  # hack to bypass SingleApplication
+    )
+
+
+@contextmanager
+def prepare_chat_server(model_copied: bool = False) -> Iterator[dict[str, str]]:
     if os.name != 'posix' or sys.platform == 'darwin':
         pytest.skip('Need non-Apple Unix to use alternate config path')
 
     with tempfile.TemporaryDirectory(prefix='gpt4all-test') as td:
         tmpdir = Path(td)
-        xdg_confdir = tmpdir / 'config'
-        app_confdir = xdg_confdir / 'nomic.ai'
-        app_confdir.mkdir(parents=True)
-        with open(app_confdir / 'GPT4All.ini', 'w') as conf:
-            conf.write(textwrap.dedent(f"""\
-                [General]
-                serverChat=true
-
-                [download]
-                lastVersionStarted={config.APP_VERSION}
-
-                [network]
-                isActive=false
-                usageStatsActive=false
-            """))
-        yield dict(
-            os.environ,
-            XDG_CACHE_HOME=str(tmpdir / 'cache'),
-            XDG_DATA_HOME=str(tmpdir / 'share'),
-            XDG_CONFIG_HOME=str(xdg_confdir),
-            APPIMAGE=str(tmpdir),  # hack to bypass SingleApplication
-        )
+        config = create_chat_server_config(tmpdir, model_copied=model_copied)
+        yield config
 
 
-@pytest.fixture
-def chat_server(chat_server_config: dict[str, str]) -> Iterator[None]:
+def start_chat_server(config: dict[str, str]) -> Iterator[None]:
     chat_executable = Path(os.environ['CHAT_EXECUTABLE']).absolute()
-    with subprocess.Popen(chat_executable, env=chat_server_config) as process:
+    with subprocess.Popen(chat_executable, env=config) as process:
         try:
             yield
         except:
@@ -83,5 +108,161 @@ def chat_server(chat_server_config: dict[str, str]) -> Iterator[None]:
             raise CalledProcessError(retcode, process.args)
 
 
-def test_list_models_empty(chat_server: None) -> None:
-    assert request.get('models', wait=True) == {'object': 'list', 'data': []}
+@pytest.fixture
+def chat_server() -> Iterator[None]:
+    with prepare_chat_server(model_copied=False) as config:
+        yield from start_chat_server(config)
+
+
+@pytest.fixture
+def chat_server_with_model() -> Iterator[None]:
+    with prepare_chat_server(model_copied=True) as config:
+        yield from start_chat_server(config)
+
+
+def test_with_models_empty(chat_server: None) -> None:
+    # non-sense endpoint
+    status_code, response = request.get('foobarbaz', wait=True, raise_for_status=False)
+    assert status_code == 404
+    assert response is None
+
+    # empty model list
+    response = request.get('models')
+    assert response == {'object': 'list', 'data': []}
+
+    # empty model info
+    response = request.get('models/foo')
+    assert response == {}
+
+    # POST for model list
+    status_code, response = request.post('models', data=None, raise_for_status=False)
+    assert status_code == 405
+    assert response == {'error': {
+        'code': None,
+        'message': 'Not allowed to POST on /v1/models. (HINT: Perhaps you meant to use a different HTTP method?)',
+        'param': None,
+        'type': 'invalid_request_error',
+    }}
+
+    # POST for model info
+    status_code, response = request.post('models/foo', data=None, raise_for_status=False)
+    assert status_code == 405
+    assert response == {'error': {
+        'code': None,
+        'message': 'Not allowed to POST on /v1/models/*. (HINT: Perhaps you meant to use a different HTTP method?)',
+        'param': None,
+        'type': 'invalid_request_error',
+    }}
+
+    # GET for completions
+    status_code, response = request.get('completions', raise_for_status=False)
+    assert status_code == 405
+    assert response == {'error': {
+        'code': 'method_not_supported',
+        'message': 'Only POST requests are accepted.',
+        'param': None,
+        'type': 'invalid_request_error',
+    }}
+
+    # GET for chat completions
+    status_code, response = request.get('chat/completions', raise_for_status=False)
+    assert status_code == 405
+    assert response == {'error': {
+        'code': 'method_not_supported',
+        'message': 'Only POST requests are accepted.',
+        'param': None,
+        'type': 'invalid_request_error',
+    }}
+
+
+EXPECTED_MODEL_INFO = {
+    'created': 0,
+    'id': 'Llama 3.2 1B Instruct',
+    'object': 'model',
+    'owned_by': 'humanity',
+    'parent': None,
+    'permissions': [
+        {
+            'allow_create_engine': False,
+            'allow_fine_tuning': False,
+            'allow_logprobs': False,
+            'allow_sampling': False,
+            'allow_search_indices': False,
+            'allow_view': True,
+            'created': 0,
+            'group': None,
+            'id': 'placeholder',
+            'is_blocking': False,
+            'object': 'model_permission',
+            'organization': '*',
+        },
+    ],
+    'root': 'Llama 3.2 1B Instruct',
+}
+
+EXPECTED_COMPLETIONS_RESPONSE = {
+    'choices': [
+        {
+            'finish_reason': 'stop',
+            'index': 0,
+            'logprobs': None,
+            'references': None,
+            'text': ' jumps over the lazy dog.',
+        },
+    ],
+    'id': 'placeholder',
+    'model': 'Llama 3.2 1B Instruct',
+    'object': 'text_completion',
+    'usage': {
+        'completion_tokens': 6,
+        'prompt_tokens': 5,
+        'total_tokens': 11,
+    },
+}
+
+
+def test_with_models(chat_server_with_model: None) -> None:
+    response = request.get('models', wait=True)
+    assert response == {
+        'data': [EXPECTED_MODEL_INFO],
+        'object': 'list',
+    }
+
+    # Test the specific model endpoint
+    response = request.get('models/Llama 3.2 1B Instruct')
+    assert response == EXPECTED_MODEL_INFO
+
+    # Test the completions endpoint
+    status_code, response = request.post('completions', data=None, raise_for_status=False)
+    assert status_code == 400
+    assert response == {'error': {
+        'code': None,
+        'message': 'error parsing request JSON: illegal value',
+        'param': None,
+        'type': 'invalid_request_error',
+    }}
+
+    data = {
+        'model': 'Llama 3.2 1B Instruct',
+        'prompt': 'The quick brown fox',
+        'temperature': 0,
+    }
+
+    response = request.post('completions', data=data)
+    assert len(response['choices']) == 1
+    assert response['choices'][0].keys() == {'text', 'index', 'logprobs', 'references', 'finish_reason'}
+    assert response['choices'][0]['text'] == ' jumps over the lazy dog.'
+    assert 'created' in response
+    response.pop('created')  # Remove the dynamic field for comparison
+    assert response == EXPECTED_COMPLETIONS_RESPONSE
+
+
+@pytest.mark.xfail(reason='Assertion failure in GPT4All. See nomic-ai/gpt4all#3133')
+def test_with_models_temperature(chat_server_with_model: None) -> None:
+    data = {
+        'model': 'Llama 3.2 1B Instruct',
+        'prompt': 'The quick brown fox',
+        'temperature': 0.5,
+    }
+
+    request.post('completions', data=data, wait=True, raise_for_status=True)