mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-09-06 09:41:31 +00:00
feat(llm): adds serveral settings for llamacpp and ollama (#1703)
This commit is contained in:
@@ -39,16 +39,23 @@ class LLMComponent:
|
||||
) from e
|
||||
|
||||
prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
|
||||
|
||||
settings_kwargs = {
|
||||
"tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp
|
||||
"top_k": settings.llamacpp.top_k, # ollama and llama-cpp
|
||||
"top_p": settings.llamacpp.top_p, # ollama and llama-cpp
|
||||
"repeat_penalty": settings.llamacpp.repeat_penalty, # ollama llama-cpp
|
||||
"n_gpu_layers": -1,
|
||||
"offload_kqv": True,
|
||||
}
|
||||
self.llm = LlamaCPP(
|
||||
model_path=str(models_path / settings.llamacpp.llm_hf_model_file),
|
||||
temperature=0.1,
|
||||
temperature=settings.llm.temperature,
|
||||
max_new_tokens=settings.llm.max_new_tokens,
|
||||
context_window=settings.llm.context_window,
|
||||
generate_kwargs={},
|
||||
callback_manager=LlamaIndexSettings.callback_manager,
|
||||
# All to GPU
|
||||
model_kwargs={"n_gpu_layers": -1, "offload_kqv": True},
|
||||
model_kwargs=settings_kwargs,
|
||||
# transform inputs into Llama2 format
|
||||
messages_to_prompt=prompt_style.messages_to_prompt,
|
||||
completion_to_prompt=prompt_style.completion_to_prompt,
|
||||
@@ -108,8 +115,22 @@ class LLMComponent:
|
||||
) from e
|
||||
|
||||
ollama_settings = settings.ollama
|
||||
|
||||
settings_kwargs = {
|
||||
"tfs_z": ollama_settings.tfs_z, # ollama and llama-cpp
|
||||
"num_predict": ollama_settings.num_predict, # ollama only
|
||||
"top_k": ollama_settings.top_k, # ollama and llama-cpp
|
||||
"top_p": ollama_settings.top_p, # ollama and llama-cpp
|
||||
"repeat_last_n": ollama_settings.repeat_last_n, # ollama
|
||||
"repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp
|
||||
}
|
||||
|
||||
self.llm = Ollama(
|
||||
model=ollama_settings.llm_model, base_url=ollama_settings.api_base
|
||||
model=ollama_settings.llm_model,
|
||||
base_url=ollama_settings.api_base,
|
||||
temperature=settings.llm.temperature,
|
||||
context_window=settings.llm.context_window,
|
||||
additional_kwargs=settings_kwargs,
|
||||
)
|
||||
case "mock":
|
||||
self.llm = MockLLM()
|
||||
|
Reference in New Issue
Block a user