From 43ad0d9ef031c3c102ae5eee2378ee7fc3910090 Mon Sep 17 00:00:00 2001
From: Orion-Zheng <zheng_zian@u.nus.edu>
Date: Tue, 14 Nov 2023 09:58:00 +0800
Subject: [PATCH] fix wrong EOS token in ColossalChat

---
 applications/Chat/examples/community/peft/train_peft_prompts.py | 2 +-
 applications/Chat/examples/community/peft/train_peft_sft.py     | 2 +-
 applications/Chat/examples/inference.py                         | 2 +-
 applications/Chat/examples/train_prompts.py                     | 2 +-
 applications/Chat/examples/train_reward_model.py                | 2 +-
 applications/Chat/examples/train_sft.py                         | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/applications/Chat/examples/community/peft/train_peft_prompts.py b/applications/Chat/examples/community/peft/train_peft_prompts.py
index 99a024f14..1dd9ffcdf 100644
--- a/applications/Chat/examples/community/peft/train_peft_prompts.py
+++ b/applications/Chat/examples/community/peft/train_peft_prompts.py
@@ -118,7 +118,7 @@ def main(args):
         tokenizer.pad_token = tokenizer.eos_token
     elif args.model == "llama":
         tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
-        tokenizer.eos_token = "<\s>"
+        tokenizer.eos_token = "</s>"
         tokenizer.pad_token = tokenizer.unk_token
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
diff --git a/applications/Chat/examples/community/peft/train_peft_sft.py b/applications/Chat/examples/community/peft/train_peft_sft.py
index 3bbef7208..6d395dead 100644
--- a/applications/Chat/examples/community/peft/train_peft_sft.py
+++ b/applications/Chat/examples/community/peft/train_peft_sft.py
@@ -68,7 +68,7 @@ def train(args):
             padding_side="right",
             use_fast=False,
         )
-        tokenizer.eos_token = "<\s>"
+        tokenizer.eos_token = "</s>"
         tokenizer.pad_token = tokenizer.unk_token
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
diff --git a/applications/Chat/examples/inference.py b/applications/Chat/examples/inference.py
index 62e06bf7b..9df8649d9 100644
--- a/applications/Chat/examples/inference.py
+++ b/applications/Chat/examples/inference.py
@@ -39,7 +39,7 @@ def eval(args):
         tokenizer.pad_token = tokenizer.eos_token
     elif args.model == "llama":
         tokenizer = LlamaTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
-        tokenizer.eos_token = "<\s>"
+        tokenizer.eos_token = "</s>"
         tokenizer.pad_token = tokenizer.unk_token
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py
index 8868e278d..40e06043a 100644
--- a/applications/Chat/examples/train_prompts.py
+++ b/applications/Chat/examples/train_prompts.py
@@ -125,7 +125,7 @@ def main(args):
         tokenizer = LlamaTokenizer.from_pretrained(
             "hf-internal-testing/llama-tokenizer" if args.tokenizer is None else args.tokenizer
         )
-        tokenizer.eos_token = "<\s>"
+        tokenizer.eos_token = "</s>"
         tokenizer.pad_token = tokenizer.unk_token
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py
index df6e8b6bd..fcdd29b29 100644
--- a/applications/Chat/examples/train_reward_model.py
+++ b/applications/Chat/examples/train_reward_model.py
@@ -72,7 +72,7 @@ def train(args):
         tokenizer = LlamaTokenizer.from_pretrained(
             "hf-internal-testing/llama-tokenizer" if args.tokenizer is None else args.tokenizer
         )
-        tokenizer.eos_token = "<\s>"
+        tokenizer.eos_token = "</s>"
         tokenizer.pad_token = tokenizer.unk_token
     else:
         raise ValueError(f'Unsupported model "{args.model}"')
diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py
index 66d08da30..d00c04809 100644
--- a/applications/Chat/examples/train_sft.py
+++ b/applications/Chat/examples/train_sft.py
@@ -75,7 +75,7 @@ def train(args):
         tokenizer = LlamaTokenizer.from_pretrained(
             "hf-internal-testing/llama-tokenizer" if args.tokenizer is None else args.tokenizer
         )
-        tokenizer.eos_token = "<\s>"
+        tokenizer.eos_token = "</s>"
         tokenizer.pad_token = tokenizer.unk_token
     elif args.model == "chatglm":
         tokenizer = ChatGLMTokenizer.from_pretrained(