mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 02:50:36 +00:00
expose n_gpu_layers parameter of llama.cpp (#1890)
Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -42,6 +42,8 @@ public:
|
||||
static const Implementation *implementation(const char *fname, const std::string& buildVariant);
|
||||
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto", int n_ctx = 2048);
|
||||
static std::vector<GPUDevice> availableGPUDevices();
|
||||
static int32_t maxContextLength(const std::string &modelPath);
|
||||
static int32_t layerCount(const std::string &modelPath);
|
||||
static void setImplementationsSearchPath(const std::string& path);
|
||||
static const std::string& implementationsSearchPath();
|
||||
|
||||
@@ -77,9 +79,9 @@ public:
|
||||
|
||||
virtual bool supportsEmbedding() const = 0;
|
||||
virtual bool supportsCompletion() const = 0;
|
||||
virtual bool loadModel(const std::string &modelPath, int n_ctx) = 0;
|
||||
virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0;
|
||||
virtual bool isModelLoaded() const = 0;
|
||||
virtual size_t requiredMem(const std::string &modelPath, int n_ctx) = 0;
|
||||
virtual size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) = 0;
|
||||
virtual size_t stateSize() const { return 0; }
|
||||
virtual size_t saveState(uint8_t */*dest*/) const { return 0; }
|
||||
virtual size_t restoreState(const uint8_t */*src*/) { return 0; }
|
||||
@@ -101,18 +103,18 @@ public:
|
||||
return *m_implementation;
|
||||
}
|
||||
|
||||
virtual std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) {
|
||||
virtual std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) const {
|
||||
(void)memoryRequired;
|
||||
return {};
|
||||
}
|
||||
|
||||
virtual bool initializeGPUDevice(size_t memoryRequired, const std::string& name) {
|
||||
virtual bool initializeGPUDevice(size_t memoryRequired, const std::string& name) const {
|
||||
(void)memoryRequired;
|
||||
(void)name;
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool initializeGPUDevice(const GPUDevice & device, std::string *unavail_reason = nullptr) {
|
||||
virtual bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const {
|
||||
(void)device;
|
||||
if (unavail_reason) {
|
||||
*unavail_reason = "model has no GPU support";
|
||||
@@ -120,7 +122,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool initializeGPUDevice(int /*device*/) { return false; }
|
||||
virtual bool hasGPUDevice() { return false; }
|
||||
virtual bool usingGPUDevice() { return false; }
|
||||
|
||||
@@ -134,6 +135,18 @@ protected:
|
||||
virtual int32_t contextLength() const = 0;
|
||||
virtual const std::vector<Token>& endTokens() const = 0;
|
||||
|
||||
virtual int32_t maxContextLength(std::string const &modelPath) const
|
||||
{
|
||||
(void)modelPath;
|
||||
return -1;
|
||||
}
|
||||
|
||||
virtual int32_t layerCount(std::string const &modelPath) const
|
||||
{
|
||||
(void)modelPath;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This is a helper function called from the default implementation of 'prompt' but it can be
|
||||
// shared by all base classes so it isn't virtual
|
||||
void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate);
|
||||
|
Reference in New Issue
Block a user