backend: rebase llama.cpp submodule on latest upstream (#2694)

* Adds support for GPT-NeoX, Gemma 2, OpenELM, ChatGLM, and Jais architectures (all with Kompute support)
* Also enables Kompute support for StarCoder2, XVERSE, Command R, and OLMo
* Includes a number of Kompute resource management fixes

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-07-19 14:52:58 -04:00
committed by GitHub
parent 398ef34a87
commit 290c629442
4 changed files with 266 additions and 211 deletions

View File

@@ -90,25 +90,25 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
else()
set(GPT4ALL_ALLOW_NON_AVX ON)
endif()
set(LLAMA_AVX2 ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})
set(GGML_AVX2 ${GPT4ALL_ALLOW_NON_AVX})
set(GGML_F16C ${GPT4ALL_ALLOW_NON_AVX})
set(GGML_FMA ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_METAL OFF)
set(LLAMA_KOMPUTE OFF)
set(LLAMA_VULKAN OFF)
set(LLAMA_CUDA OFF)
set(LLAMA_ROCM OFF)
set(GGML_METAL OFF)
set(GGML_KOMPUTE OFF)
set(GGML_VULKAN OFF)
set(GGML_CUDA OFF)
set(GGML_ROCM OFF)
if (BUILD_VARIANT MATCHES metal)
set(LLAMA_METAL ON)
set(GGML_METAL ON)
elseif (BUILD_VARIANT MATCHES kompute)
set(LLAMA_KOMPUTE ON)
set(GGML_KOMPUTE ON)
elseif (BUILD_VARIANT MATCHES vulkan)
set(LLAMA_VULKAN ON)
set(GGML_VULKAN ON)
elseif (BUILD_VARIANT MATCHES cuda)
set(LLAMA_CUDA ON)
set(GGML_CUDA ON)
elseif (BUILD_VARIANT MATCHES rocm)
set(LLAMA_HIPBLAS ON)
set(GGML_HIPBLAS ON)
endif()
# Include GGML