mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-17 16:28:20 +00:00
Merge pull request #174 from waybarrios/fixing_data_bug
DatasetDict to dataset object.
This commit is contained in:
2
data.py
2
data.py
@@ -68,7 +68,7 @@ def load_data(config, tokenizer):
|
||||
dataset = load_dataset("json", data_files=files, split="train")
|
||||
|
||||
else:
|
||||
dataset = load_dataset(dataset_path)
|
||||
dataset = load_dataset(dataset_path,split='train')
|
||||
|
||||
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
|
||||
|
||||
|
Reference in New Issue
Block a user