Metal+LLama take two (#929)

Support latest llama with Metal
---------

Co-authored-by: Adam Treat <adam@nomic.ai>
Co-authored-by: niansa/tuxifan <tuxifan@posteo.de>
This commit is contained in:
Aaron Miller
2023-06-09 13:48:46 -07:00
committed by GitHub
parent b162b5c64e
commit d3ba1295a7
8 changed files with 141 additions and 66 deletions

View File

@@ -121,20 +121,30 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
if (!has_at_least_minimal_hardware())
return nullptr;
//TODO: Auto-detect CUDA/OpenCL
if (buildVariant == "auto") {
if (requires_avxonly()) {
buildVariant = "avxonly";
} else {
buildVariant = "default";
}
}
// Read magic
std::ifstream f(modelPath, std::ios::binary);
if (!f) return nullptr;
// Get correct implementation
auto impl = implementation(f, buildVariant);
if (!impl) return nullptr;
const LLModel::Implementation* impl = nullptr;
#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
if (buildVariant == "auto") {
impl = implementation(f, "metal");
}
#endif
if (!impl) {
//TODO: Auto-detect CUDA/OpenCL
if (buildVariant == "auto") {
if (requires_avxonly()) {
buildVariant = "avxonly";
} else {
buildVariant = "default";
}
}
impl = implementation(f, buildVariant);
if (!impl) return nullptr;
}
f.close();
// Construct and return llmodel implementation
return impl->construct();