[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
2025-09-21 09:29:47 +00:00 · 2025-09-01 17:28:49 +00:00
parent 2b062ed6f0
commit e694ff45e2
17 changed files with 126 additions and 126 deletions
--- a/examples/community/roberta/preprocessing/get_mask.py
+++ b/examples/community/roberta/preprocessing/get_mask.py
@@ -34,8 +34,8 @@ class PreTrainingDataset:
        self.do_whole_word_mask = do_whole_word_mask
        self.max_predictions_per_seq = max_predictions_per_seq
        self.vocab_words = list(tokenizer.vocab.keys())
-        self.rec = re.compile("[\u4E00-\u9FA5]")
-        self.whole_rec = re.compile("##[\u4E00-\u9FA5]")
+        self.rec = re.compile("[\u4e00-\u9fa5]")
+        self.whole_rec = re.compile("##[\u4e00-\u9fa5]")

        self.mlm_p = 0.15
        self.mlm_mask_p = 0.8
--- a/examples/community/roberta/preprocessing/mask.cpp
+++ b/examples/community/roberta/preprocessing/mask.cpp
@@ -75,15 +75,15 @@ auto get_new_segment(
  return new_segment;
 }

-bool startsWith(const std::string &s, const std::string &sub) {
+bool startsWith(const std::string& s, const std::string& sub) {
  return s.find(sub) == 0 ? true : false;
 }

 auto create_whole_masked_lm_predictions(
-    std::vector<std::string> &tokens,
-    const std::vector<std::string> &original_tokens,
-    const std::vector<std::string> &vocab_words,
-    std::map<std::string, int> &vocab, const int max_predictions_per_seq,
+    std::vector<std::string>& tokens,
+    const std::vector<std::string>& original_tokens,
+    const std::vector<std::string>& vocab_words,
+    std::map<std::string, int>& vocab, const int max_predictions_per_seq,
    const double masked_lm_prob) {
  // for (auto item : vocab) {
  //     std::cout << "key=" << std::string(py::str(item.first)) << ", "
--- a/examples/community/roberta/pretraining/model/deberta_v2.py
+++ b/examples/community/roberta/pretraining/model/deberta_v2.py
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" PyTorch DeBERTa-v2 model."""
+"""PyTorch DeBERTa-v2 model."""

 import math
 from collections.abc import Sequence