mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-09-17 23:57:58 +00:00
fix(config): make tokenizer optional and include a troubleshooting doc (#1998)
* docs: add troubleshooting * fix: pass HF token to setup script and prevent to download tokenizer when it is empty * fix: improve log and disable specific tokenizer by default * chore: change HF_TOKEN environment to be aligned with default config * ifx: mypy
This commit is contained in:
@@ -24,6 +24,7 @@ snapshot_download(
|
||||
repo_id=settings().huggingface.embedding_hf_model_name,
|
||||
cache_dir=models_cache_path,
|
||||
local_dir=embedding_path,
|
||||
token=settings().huggingface.access_token,
|
||||
)
|
||||
print("Embedding model downloaded!")
|
||||
|
||||
@@ -35,15 +36,18 @@ hf_hub_download(
|
||||
cache_dir=models_cache_path,
|
||||
local_dir=models_path,
|
||||
resume_download=resume_download,
|
||||
token=settings().huggingface.access_token,
|
||||
)
|
||||
print("LLM model downloaded!")
|
||||
|
||||
# Download Tokenizer
|
||||
print(f"Downloading tokenizer {settings().llm.tokenizer}")
|
||||
AutoTokenizer.from_pretrained(
|
||||
pretrained_model_name_or_path=settings().llm.tokenizer,
|
||||
cache_dir=models_cache_path,
|
||||
)
|
||||
print("Tokenizer downloaded!")
|
||||
if settings().llm.tokenizer:
|
||||
print(f"Downloading tokenizer {settings().llm.tokenizer}")
|
||||
AutoTokenizer.from_pretrained(
|
||||
pretrained_model_name_or_path=settings().llm.tokenizer,
|
||||
cache_dir=models_cache_path,
|
||||
token=settings().huggingface.access_token,
|
||||
)
|
||||
print("Tokenizer downloaded!")
|
||||
|
||||
print("Setup done")
|
||||
|
Reference in New Issue
Block a user