fix: stop gap to remove unused colulmns

This commit is contained in:
Zach Nussbaum 2023-04-19 21:16:22 +00:00
parent 405d8c1bbc
commit 7debf52fc2

View File

@ -106,13 +106,13 @@ def load_data(config, tokenizer):
train_dataset = train_dataset.map( train_dataset = train_dataset.map(
lambda ele: tokenize_inputs(config, tokenizer, ele), lambda ele: tokenize_inputs(config, tokenizer, ele),
batched=True, batched=True,
remove_columns=["source", "prompt"], remove_columns=["source", "prompt", "id", "response"],
**kwargs **kwargs
) )
val_dataset = val_dataset.map( val_dataset = val_dataset.map(
lambda ele: tokenize_inputs(config, tokenizer, ele), lambda ele: tokenize_inputs(config, tokenizer, ele),
batched=True, batched=True,
remove_columns=["source", "prompt"], remove_columns=["source", "prompt", "id", "response"],
**kwargs **kwargs
) )