Fix different behavior in status check pattern matching with double stars (#35474)

Drop the minimatch dependency, use our own glob compiler. Fix #35473
2025-09-14 06:33:03 +00:00 · 2025-09-13 11:53:27 +08:00
parent 866c636f52
commit 325e059a50
6 changed files with 325 additions and 6 deletions
--- a/web_src/js/features/repo-settings.ts
+++ b/web_src/js/features/repo-settings.ts
@@ -1,9 +1,9 @@
-import {minimatch} from 'minimatch';
 import {createMonaco} from './codeeditor.ts';
 import {onInputDebounce, queryElems, toggleElem} from '../utils/dom.ts';
 import {POST} from '../modules/fetch.ts';
 import {initRepoSettingsBranchesDrag} from './repo-settings-branches.ts';
 import {fomanticQuery} from '../modules/fomantic/base.ts';
+import {globMatch} from '../utils/glob.ts';

 const {appSubUrl, csrfToken} = window.config;

@@ -108,7 +108,7 @@ function initRepoSettingsBranches() {
      let matched = false;
      const statusCheck = el.getAttribute('data-status-check');
      for (const pattern of validPatterns) {
-        if (minimatch(statusCheck, pattern, {noext: true})) { // https://github.com/go-gitea/gitea/issues/33121 disable extended glob syntax
+        if (globMatch(statusCheck, pattern, '/')) {
          matched = true;
          break;
        }
--- a/web_src/js/utils/glob.test.ts
+++ b/web_src/js/utils/glob.test.ts
@@ -0,0 +1,129 @@
+import {readFile} from 'node:fs/promises';
+import * as path from 'node:path';
+import {globCompile} from './glob.ts';
+
+async function loadGlobTestData(): Promise<{caseNames: string[], caseDataMap: Record<string, string>}> {
+  const fileContent = await readFile(path.join(import.meta.dirname, 'glob.test.txt'), 'utf8');
+  const fileLines = fileContent.split('\n');
+  const caseDataMap: Record<string, string> = {};
+  const caseNameMap: Record<string, boolean> = {};
+  for (let line of fileLines) {
+    line = line.trim();
+    if (!line || line.startsWith('#')) continue;
+    const parts = line.split('=', 2);
+    if (parts.length !== 2) throw new Error(`Invalid test case line: ${line}`);
+
+    const key = parts[0].trim();
+    let value = parts[1].trim();
+    value = value.substring(1, value.length - 1); // remove quotes
+    value = value.replace(/\\\\/g, '\\').replaceAll(/\\\//g, '/');
+    caseDataMap[key] = value;
+    if (key.startsWith('pattern_')) caseNameMap[key.substring('pattern_'.length)] = true;
+  }
+  return {caseNames: Object.keys(caseNameMap), caseDataMap};
+}
+
+function loadGlobGolangCases() {
+  // https://github.com/gobwas/glob/blob/master/glob_test.go
+  function glob(matched: boolean, pattern: string, input: string, separators: string = '') {
+    return {matched, pattern, input, separators};
+  }
+  return [
+    glob(true, '* ?at * eyes', 'my cat has very bright eyes'),
+
+    glob(true, '', ''),
+    glob(false, '', 'b'),
+
+    glob(true, '*ä', 'åä'),
+    glob(true, 'abc', 'abc'),
+    glob(true, 'a*c', 'abc'),
+    glob(true, 'a*c', 'a12345c'),
+    glob(true, 'a?c', 'a1c'),
+    glob(true, 'a.b', 'a.b', '.'),
+    glob(true, 'a.*', 'a.b', '.'),
+    glob(true, 'a.**', 'a.b.c', '.'),
+    glob(true, 'a.?.c', 'a.b.c', '.'),
+    glob(true, 'a.?.?', 'a.b.c', '.'),
+    glob(true, '?at', 'cat'),
+    glob(true, '?at', 'fat'),
+    glob(true, '*', 'abc'),
+    glob(true, `\\*`, '*'),
+    glob(true, '**', 'a.b.c', '.'),
+
+    glob(false, '?at', 'at'),
+    glob(false, '?at', 'fat', 'f'),
+    glob(false, 'a.*', 'a.b.c', '.'),
+    glob(false, 'a.?.c', 'a.bb.c', '.'),
+    glob(false, '*', 'a.b.c', '.'),
+
+    glob(true, '*test', 'this is a test'),
+    glob(true, 'this*', 'this is a test'),
+    glob(true, '*is *', 'this is a test'),
+    glob(true, '*is*a*', 'this is a test'),
+    glob(true, '**test**', 'this is a test'),
+    glob(true, '**is**a***test*', 'this is a test'),
+
+    glob(false, '*is', 'this is a test'),
+    glob(false, '*no*', 'this is a test'),
+    glob(true, '[!a]*', 'this is a test3'),
+
+    glob(true, '*abc', 'abcabc'),
+    glob(true, '**abc', 'abcabc'),
+    glob(true, '???', 'abc'),
+    glob(true, '?*?', 'abc'),
+    glob(true, '?*?', 'ac'),
+    glob(false, 'sta', 'stagnation'),
+    glob(true, 'sta*', 'stagnation'),
+    glob(false, 'sta?', 'stagnation'),
+    glob(false, 'sta?n', 'stagnation'),
+
+    glob(true, '{abc,def}ghi', 'defghi'),
+    glob(true, '{abc,abcd}a', 'abcda'),
+    glob(true, '{a,ab}{bc,f}', 'abc'),
+    glob(true, '{*,**}{a,b}', 'ab'),
+    glob(false, '{*,**}{a,b}', 'ac'),
+
+    glob(true, '/{rate,[a-z][a-z][a-z]}*', '/rate'),
+    glob(true, '/{rate,[0-9][0-9][0-9]}*', '/rate'),
+    glob(true, '/{rate,[a-z][a-z][a-z]}*', '/usd'),
+
+    glob(true, '{*.google.*,*.yandex.*}', 'www.google.com', '.'),
+    glob(true, '{*.google.*,*.yandex.*}', 'www.yandex.com', '.'),
+    glob(false, '{*.google.*,*.yandex.*}', 'yandex.com', '.'),
+    glob(false, '{*.google.*,*.yandex.*}', 'google.com', '.'),
+
+    glob(true, '{*.google.*,yandex.*}', 'www.google.com', '.'),
+    glob(true, '{*.google.*,yandex.*}', 'yandex.com', '.'),
+    glob(false, '{*.google.*,yandex.*}', 'www.yandex.com', '.'),
+    glob(false, '{*.google.*,yandex.*}', 'google.com', '.'),
+
+    glob(true, '*//{,*.}example.com', 'https://www.example.com'),
+    glob(true, '*//{,*.}example.com', 'http://example.com'),
+    glob(false, '*//{,*.}example.com', 'http://example.com.net'),
+  ];
+}
+
+test('GlobCompiler', async () => {
+  const {caseNames, caseDataMap} = await loadGlobTestData();
+  expect(caseNames.length).toBe(10); // should have 10 test cases
+  for (const caseName of caseNames) {
+    const pattern = caseDataMap[`pattern_${caseName}`];
+    const regexp = caseDataMap[`regexp_${caseName}`];
+    expect(globCompile(pattern).regexpPattern).toBe(regexp);
+  }
+
+  const golangCases = loadGlobGolangCases();
+  expect(golangCases.length).toBe(60);
+  for (const c of golangCases) {
+    const compiled = globCompile(c.pattern, c.separators);
+    const msg = `pattern: ${c.pattern}, input: ${c.input}, separators: ${c.separators || '(none)'}, compiled: ${compiled.regexpPattern}`;
+    // eslint-disable-next-line @vitest/valid-expect -- Unlike Jest, Vitest supports a message as the second argument
+    expect(compiled.regexp.test(c.input), msg).toBe(c.matched);
+  }
+
+  // then our cases
+  expect(globCompile('*/**/x').regexpPattern).toBe('^.*/.*/x$');
+  expect(globCompile('*/**/x', '/').regexpPattern).toBe('^[^/]*/.*/x$');
+  expect(globCompile('[a-b][^-\\]]', '/').regexpPattern).toBe('^[a-b][^-\\]]$');
+  expect(globCompile('.+^$()|', '/').regexpPattern).toBe('^\\.\\+\\^\\$\\(\\)\\|$');
+});
--- a/web_src/js/utils/glob.test.txt
+++ b/web_src/js/utils/glob.test.txt
@@ -0,0 +1,44 @@
+# test cases are from https://github.com/gobwas/glob/blob/master/glob_test.go
+
+pattern_all          = "[a-z][!a-x]*cat*[h][!b]*eyes*"
+regexp_all           = `^[a-z][^a-x].*cat.*[h][^b].*eyes.*$`
+fixture_all_match    = "my cat has very bright eyes"
+fixture_all_mismatch = "my dog has very bright eyes"
+
+pattern_plain          = "google.com"
+regexp_plain           = `^google\.com$`
+fixture_plain_match    = "google.com"
+fixture_plain_mismatch = "gobwas.com"
+
+pattern_multiple          = "https://*.google.*"
+regexp_multiple           = `^https:\/\/.*\.google\..*$`
+fixture_multiple_match    = "https://account.google.com"
+fixture_multiple_mismatch = "https://google.com"
+
+pattern_alternatives          = "{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}"
+regexp_alternatives           = `^(https:\/\/.*\.google\..*|.*yandex\..*|.*yahoo\..*|.*mail\.ru)$`
+fixture_alternatives_match    = "http://yahoo.com"
+fixture_alternatives_mismatch = "http://google.com"
+
+pattern_alternatives_suffix                = "{https://*gobwas.com,http://exclude.gobwas.com}"
+regexp_alternatives_suffix                 = `^(https:\/\/.*gobwas\.com|http://exclude\.gobwas\.com)$`
+fixture_alternatives_suffix_first_match    = "https://safe.gobwas.com"
+fixture_alternatives_suffix_first_mismatch = "http://safe.gobwas.com"
+fixture_alternatives_suffix_second         = "http://exclude.gobwas.com"
+
+pattern_prefix                 = "abc*"
+regexp_prefix                  = `^abc.*$`
+pattern_suffix                 = "*def"
+regexp_suffix                  = `^.*def$`
+pattern_prefix_suffix          = "ab*ef"
+regexp_prefix_suffix           = `^ab.*ef$`
+fixture_prefix_suffix_match    = "abcdef"
+fixture_prefix_suffix_mismatch = "af"
+
+pattern_alternatives_combine_lite = "{abc*def,abc?def,abc[zte]def}"
+regexp_alternatives_combine_lite  = `^(abc.*def|abc.def|abc[zte]def)$`
+fixture_alternatives_combine_lite = "abczdef"
+
+pattern_alternatives_combine_hard = "{abc*[a-c]def,abc?[d-g]def,abc[zte]?def}"
+regexp_alternatives_combine_hard  = `^(abc.*[a-c]def|abc.[d-g]def|abc[zte].def)$`
+fixture_alternatives_combine_hard = "abczqdef"
--- a/web_src/js/utils/glob.ts
+++ b/web_src/js/utils/glob.ts
@@ -0,0 +1,127 @@
+// Reference: https://github.com/gobwas/glob/blob/master/glob.go
+//
+// Compile creates Glob for given pattern and strings (if any present after pattern) as separators.
+// The pattern syntax is:
+//
+//    pattern:
+//        { term }
+//
+//    term:
+//        `*`         matches any sequence of non-separator characters
+//        `**`        matches any sequence of characters
+//        `?`         matches any single non-separator character
+//        `[` [ `!` ] { character-range } `]`
+//                    character class (must be non-empty)
+//        `{` pattern-list `}`
+//                    pattern alternatives
+//        c           matches character c (c != `*`, `**`, `?`, `\`, `[`, `{`, `}`)
+//        `\` c       matches character c
+//
+//    character-range:
+//        c           matches character c (c != `\\`, `-`, `]`)
+//        `\` c       matches character c
+//        lo `-` hi   matches character c for lo <= c <= hi
+//
+//    pattern-list:
+//        pattern { `,` pattern }
+//                    comma-separated (without spaces) patterns
+//
+
+class GlobCompiler {
+  nonSeparatorChars: string;
+  globPattern: string;
+  regexpPattern: string;
+  regexp: RegExp;
+  pos: number = 0;
+
+  #compileChars(): string {
+    let result = '';
+    if (this.globPattern[this.pos] === '!') {
+      this.pos++;
+      result += '^';
+    }
+    while (this.pos < this.globPattern.length) {
+      const c = this.globPattern[this.pos];
+      this.pos++;
+      if (c === ']') {
+        return `[${result}]`;
+      }
+      if (c === '\\') {
+        if (this.pos >= this.globPattern.length) {
+          throw new Error('Unterminated character class escape');
+        }
+        this.pos++;
+        result += `\\${this.globPattern[this.pos]}`;
+      } else {
+        result += c;
+      }
+    }
+    throw new Error('Unterminated character class');
+  }
+
+  #compile(subPattern: boolean = false): string {
+    let result = '';
+    while (this.pos < this.globPattern.length) {
+      const c = this.globPattern[this.pos];
+      this.pos++;
+      if (subPattern && c === '}') {
+        return `(${result})`;
+      }
+      switch (c) {
+        case '*':
+          if (this.globPattern[this.pos] !== '*') {
+            result += `${this.nonSeparatorChars}*`; // match any sequence of non-separator characters
+          } else {
+            this.pos++;
+            result += '.*'; // match any sequence of characters
+          }
+          break;
+        case '?':
+          result += this.nonSeparatorChars; // match any single non-separator character
+          break;
+        case '[':
+          result += this.#compileChars();
+          break;
+        case '{':
+          result += this.#compile(true);
+          break;
+        case ',':
+          result += subPattern ? '|' : ',';
+          break;
+        case '\\':
+          if (this.pos >= this.globPattern.length) {
+            throw new Error('No character to escape');
+          }
+          result += `\\${this.globPattern[this.pos]}`;
+          this.pos++;
+          break;
+        case '.': case '+': case '^': case '$': case '(': case ')': case '|':
+          result += `\\${c}`; // escape regexp special characters
+          break;
+        default:
+          result += c;
+      }
+    }
+    return result;
+  }
+
+  constructor(pattern: string, separators: string = '') {
+    const escapedSeparators = separators.replaceAll(/[\^\]\-\\]/g, '\\$&');
+    this.nonSeparatorChars = escapedSeparators ? `[^${escapedSeparators}]` : '.';
+    this.globPattern = pattern;
+    this.regexpPattern = `^${this.#compile()}$`;
+    this.regexp = new RegExp(`^${this.regexpPattern}$`);
+  }
+}
+
+export function globCompile(pattern: string, separators: string = ''): GlobCompiler {
+  return new GlobCompiler(pattern, separators);
+}
+
+export function globMatch(str: string, pattern: string, separators: string = ''): boolean {
+  try {
+    return globCompile(pattern, separators).regexp.test(str);
+  } catch {
+    return false;
+  }
+}