From 0f61cd8b42503785c258451a67751be6667b779f Mon Sep 17 00:00:00 2001 From: Zach Nussbaum Date: Mon, 1 May 2023 21:39:40 +0000 Subject: [PATCH] fix: retrieval dataset only has train split --- gpt4all/index/prep_index_for_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt4all/index/prep_index_for_train.py b/gpt4all/index/prep_index_for_train.py index 64e306d8..b0d32711 100644 --- a/gpt4all/index/prep_index_for_train.py +++ b/gpt4all/index/prep_index_for_train.py @@ -38,7 +38,7 @@ def prep_index(): if os.path.exists(config['index_database']): retrieval_dataset = Dataset.load_from_disk(config['index_database']) else: - retrieval_dataset = load_dataset(config['index_database'], split=args.split) + retrieval_dataset = load_dataset(config['index_database'], split="train") # vectorize queries query_vector_path = f"{ds_path}_queries_embedded/{ds_path}_embedded_{args.split}"