ci: Select jobs by touched code

to allow selective testing as well as selective list of required tests let's add a mapping of required jobs/tests in "skips.py" and a "gatekeaper" workflow that will ensure the expected required jobs were successful. Then we can only mark the "gatekeaper" as the required job and modify the logic to suit our needs. Fixes: #9237 Signed-off-by: Lukáš Doktor <ldoktor@redhat.com>
2025-09-15 05:49:05 +00:00 · 2024-04-19 10:36:10 +02:00
parent 8412c09143
commit 5c1cea1601
9 changed files with 466 additions and 2 deletions
--- a/tools/testing/gatekeeper/jobs.py
+++ b/tools/testing/gatekeeper/jobs.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2024 Red Hat Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Keeps checking the current PR until all required jobs pass
+Env variables:
+* REQUIRED_JOBS: comma separated list of required jobs (in form of
+                 "$workflow / $job")
+* REQUIRED_REGEXPS: comma separated list of regexps for required jobs
+* COMMIT_HASH: Full commit hash we want to be watching
+* GITHUB_REPOSITORY: Github repository (user/repo)
+Sample execution (GH token can be excluded):
+GITHUB_TOKEN="..." REQUIRED_JOBS="skipper / skipper"
+REQUIRED_REGEXPS=".*"
+COMMIT_HASH=b8382cea886ad9a8f77d237bcfc0eba0c98775dd
+GITHUB_REPOSITORY=kata-containers/kata-containers
+python3 jobs.py
+"""
+
+import os
+import re
+import sys
+import time
+import requests
+
+
+PASS = 0
+FAIL = 1
+RUNNING = 127
+
+
+_GH_HEADERS = {"Accept": "application/vnd.github.v3+json"}
+if os.environ.get("GITHUB_TOKEN"):
+    _GH_HEADERS["Authorization"] = f"token {os.environ['GITHUB_TOKEN']}"
+_GH_RUNS_URL = ("https://api.github.com/repos/"
+                f"{os.environ['GITHUB_REPOSITORY']}/actions/runs")
+
+
+class Checker:
+    """Object to keep watching required GH action workflows"""
+    def __init__(self):
+        required_jobs = os.getenv("REQUIRED_JOBS")
+        if required_jobs:
+            required_jobs = required_jobs.split(",")
+        else:
+            required_jobs = []
+        required_regexps = os.getenv("REQUIRED_REGEXPS")
+        self.required_regexps = []
+        # TODO: Add way to specify minimum amount of tests
+        # (eg. via \d+: prefix) and check it in status
+        if required_regexps:
+            for regexp in required_regexps.split(","):
+                self.required_regexps.append(re.compile(regexp))
+        if not required_jobs and not self.required_regexps:
+            raise RuntimeError("No REQUIRED_JOBS or REQUIRED_REGEXPS defined")
+        # Set all required jobs as RUNNING to enforce waiting for them
+        self.results = {job: RUNNING for job in required_jobs}
+
+    def record(self, workflow, job):
+        """
+        Records a job run
+        """
+        job_name = f"{workflow} / {job['name']}"
+        if job_name not in self.results:
+            for re_job in self.required_regexps:
+                # Required job via regexp
+                if re_job.match(job_name):
+                    break
+            else:
+                # Not a required job
+                return
+        if job["status"] != "completed":
+            self.results[job_name] = RUNNING
+            return
+        if job["conclusion"] != "success":
+            self.results[job_name] = job['conclusion']
+            return
+        self.results[job_name] = PASS
+
+    def status(self):
+        """
+        :returns: 0 - all tests passing; 127 - no failures but some
+            tests in progress; 1 - any failure
+        """
+        running = False
+        if not self.results:
+            # No results reported so far
+            return FAIL
+        for status in self.results.values():
+            if status == RUNNING:
+                running |= True
+            elif status != PASS:
+                # Status not passed
+                return FAIL
+        if running:
+            return RUNNING
+        return PASS
+
+    def __str__(self):
+        """Sumarize the current status"""
+        good = []
+        bad = []
+        warn = []
+        for job, status in self.results.items():
+            if status == RUNNING:
+                warn.append(f"WARN: {job} - Still running")
+            elif status == PASS:
+                good.append(f"PASS: {job} - success")
+            else:
+                bad.append(f"FAIL: {job} - Not passed - {status}")
+        out = '\n'.join(sorted(good) + sorted(warn) + sorted(bad))
+        stat = self.status()
+        if stat == RUNNING:
+            status = "Some jobs are still running."
+        elif stat == PASS:
+            status = "All required jobs passed"
+        elif not self.results:
+            status = ("No required jobs for regexps: " +
+                      ";".join([_.pattern for _ in self.required_regexps]))
+        else:
+            status = "Not all required jobs passed!"
+        return f"{out}\n\n{status}"
+
+    def get_jobs_for_workflow_run(self, run_id):
+        """Get jobs from a workflow id"""
+        total_count = -1
+        jobs = []
+        page = 1
+        while True:
+            response = requests.get(
+                f"{_GH_RUNS_URL}/{run_id}/jobs?per_page=100&page={page}",
+                headers=_GH_HEADERS,
+                timeout=60
+            )
+            response.raise_for_status()
+            output = response.json()
+            jobs.extend(output["jobs"])
+            total_count = max(total_count, output["total_count"])
+            if len(jobs) >= total_count:
+                break
+            page += 1
+        return jobs
+
+    def check_workflow_runs_status(self):
+        """
+        Checks if all required jobs passed
+
+        :returns: 0 - all passing; 1 - any failure; 127 some jobs running
+        """
+        # TODO: Check if we need pagination here as well
+        latest_commit_sha = os.getenv("COMMIT_HASH")
+        response = requests.get(
+            _GH_RUNS_URL,
+            params={"head_sha": latest_commit_sha},
+            headers=_GH_HEADERS,
+            timeout=60
+        )
+        response.raise_for_status()
+        workflow_runs = response.json()["workflow_runs"]
+
+        for run in workflow_runs:
+            jobs = self.get_jobs_for_workflow_run(run["id"])
+            for job in jobs:
+                self.record(run["name"], job)
+        print(self)
+        return self.status()
+
+    def run(self):
+        """
+        Keep checking the PR until all required jobs finish
+
+        :returns: 0 on success; 1 on failure
+        """
+        while True:
+            ret = self.check_workflow_runs_status()
+            if ret == RUNNING:
+                running_jobs = len([job
+                                    for job, status in self.results.items()
+                                    if status == RUNNING])
+                print(f"{running_jobs} jobs are still running...")
+                time.sleep(180)
+                continue
+            sys.exit(ret)
+
+
+if __name__ == "__main__":
+    Checker().run()
--- a/tools/testing/gatekeeper/required-tests.yaml
+++ b/tools/testing/gatekeeper/required-tests.yaml
@@ -0,0 +1,36 @@
+required_tests:
+  # Always required tests
+  - "Commit Message Check / Commit Message Check"
+required_regexps:
+  # Always required regexps
+
+paths:
+  # Mapping of path (python) regexps to set-of-tests (sort by order of importance)
+  # CI
+  - "^ci/openshift-ci/": []
+  - "^\\.github/workflows/": []
+  # TODO: Expand filters
+  # Documentation
+  #- "\\.rst$": ["build"]
+  #- "\\.md$": ["build"]
+  # Sources
+  #- "^src/": ["static", "build", "test"]
+
+mapping:
+  # Mapping of set-of-tests to required test names and/or test name regexps
+  # TODO: Modify this according to actual required tests
+  test:
+    # Checks the basic functional tests work
+    regexps: "Kata Containers CI / .*run-basic-amd64-tests.*|Kata Containers CI / .*run-metrics-tests.*"
+    names:
+      - Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-ppc64le / run-k8s-tests (qemu, kubeadm)
+      - Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-aks / run-k8s-tests (ubuntu, qemu, small)
+      - Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-with-crio-on-garm / run-k8s-tests (qemu, k0s, garm-ubuntu-2204)
+    # TODO: Add support for "depends" to automatically add dependant set-of-tests
+    #       (eg. "build" is required for "test")
+  build:
+    # Checks that the kata-containers static tarball is created
+    regexps: "Kata Containers CI / .*build-kata-static-tarball.*"
+  static:
+    # Checks that static checks are passing
+    regexps: "Static checks.*"
--- a/tools/testing/gatekeeper/skips.py
+++ b/tools/testing/gatekeeper/skips.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2024 Red Hat Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Gets changes of the current git to env variable TARGET_BRANCH
+and reports feature skips in form of "skip_$feature=yes|no"
+or list of required tests (based on argv[1])
+"""
+
+from collections import OrderedDict
+import os
+import re
+import subprocess
+import sys
+
+import yaml
+
+
+class Checks:
+    def __init__(self):
+        config_path = os.path.join(os.path.dirname(__file__), "required-tests.yaml")
+        with open(config_path, "r", encoding="utf8") as config_fd:
+            config = yaml.load(config_fd, Loader=yaml.SafeLoader)
+        if config.get('required_tests'):
+            self.required_tests = config['required_tests']
+        else:
+            self.required_tests = []
+        if config.get('required_regexps'):
+            self.required_regexps = config['required_regexps']
+        else:
+            self.required_regexps = []
+        if config.get('paths'):
+            self.paths = OrderedDict((re.compile(key), value)
+                                       for items in config['paths']
+                                       for key, value in items.items())
+        if config.get('mapping'):
+            self.mapping = config['mapping']
+        else:
+            self.mapping = {}
+        self.all_set_of_tests = set(self.mapping.keys())
+
+    def run(self, tests, target_branch):
+        """
+        Find the required features/tests
+
+        :param: tests: report required tests+regexps (bool)
+        :param: target_branch: branch/commit to compare to
+        """
+        enabled_features = self.get_features(target_branch)
+        if not tests:
+            for feature in self.all_set_of_tests:
+                # Print all features status in "$key=$value" format to allow
+                # usage with $GITHUB_OUTPUT
+                print(f"skip_{feature}=" +
+                      ('no' if feature in enabled_features else 'yes'))
+            return 0
+        required_tests = set(self.required_tests)
+        required_regexps = set(self.required_regexps)
+        for feature in enabled_features:
+            values = self.mapping.get(feature, {})
+            if values.get("names"):
+                required_tests.update(values["names"])
+            if values.get("regexps"):
+                required_regexps.add(values["regexps"])
+        print(';'.join(required_tests))
+        print('|'.join(required_regexps))
+        return 0
+
+    def get_features(self, target_branch):
+        """
+        Get changed file to `target_branch` and map them to the
+        to-be-tested set-of-tests
+
+        :param target_branch: branch/commit to compare to
+        :returns: List of set-of-tests
+        """
+        cmd = ["git", "diff", "--name-only", f"origin/{target_branch}"]
+        changed_files = [_.decode("utf-8")
+                         for _ in subprocess.check_output(cmd).split(b'\n')
+                         if _.strip()]
+        print('\n'.join(changed_files), file=sys.stderr)
+        enabled_features = set()
+        # Go through lines and find what features should be covered
+        for changed_file in changed_files:
+            for regexp, features in self.paths.items():
+                if regexp.search(changed_file):
+                    for feature in features:
+                        enabled_features.add(feature)
+                    # this changed_file was treated, ignore other regexps
+                    break
+            else:
+                # Untreated changed_file, run all tests
+                return self.all_set_of_tests
+        return enabled_features
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 2:
+        _TESTS = sys.argv[1] == '-t'
+    else:
+        _TESTS = False
+    sys.exit(Checks().run(_TESTS, os.getenv("TARGET_BRANCH", "main")))