Slight cleanup of superfluous comment and space after comma

2025-11-13 05:58:13 +00:00 · 2023-04-06 19:56:49 -04:00
parent dc08c43867
commit b3be94a0ef
1 changed files with 1 additions and 2 deletions
--- a/data.py
+++ b/data.py
@@ -57,7 +57,6 @@ def load_data(config, tokenizer):
    dataset_path = config["dataset_path"]
    if os.path.exists(dataset_path):
        # check if path is a directory
        if os.path.isdir(dataset_path):
            files = glob.glob(os.path.join(dataset_path, "*_clean.jsonl"))
        else:
@@ -68,7 +67,7 @@ def load_data(config, tokenizer):
        dataset = load_dataset("json", data_files=files, split="train")
    else:
-        dataset = load_dataset(dataset_path,split='train')
+        dataset = load_dataset(dataset_path, split='train')
    dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])