ci: Select jobs by touched code

to allow selective testing as well as selective list of required tests
let's add a mapping of required jobs/tests in "skips.py" and a
"gatekeaper" workflow that will ensure the expected required jobs were
successful. Then we can only mark the "gatekeaper" as the required job
and modify the logic to suit our needs.

Fixes: #9237

Signed-off-by: Lukáš Doktor <ldoktor@redhat.com>
This commit is contained in:
Lukáš Doktor
2024-04-19 10:36:10 +02:00
parent 8412c09143
commit 5c1cea1601
9 changed files with 466 additions and 2 deletions

View File

@@ -0,0 +1,190 @@
#!/usr/bin/env python3
#
# Copyright (c) 2024 Red Hat Inc.
#
# SPDX-License-Identifier: Apache-2.0
"""
Keeps checking the current PR until all required jobs pass
Env variables:
* REQUIRED_JOBS: comma separated list of required jobs (in form of
"$workflow / $job")
* REQUIRED_REGEXPS: comma separated list of regexps for required jobs
* COMMIT_HASH: Full commit hash we want to be watching
* GITHUB_REPOSITORY: Github repository (user/repo)
Sample execution (GH token can be excluded):
GITHUB_TOKEN="..." REQUIRED_JOBS="skipper / skipper"
REQUIRED_REGEXPS=".*"
COMMIT_HASH=b8382cea886ad9a8f77d237bcfc0eba0c98775dd
GITHUB_REPOSITORY=kata-containers/kata-containers
python3 jobs.py
"""
import os
import re
import sys
import time
import requests
PASS = 0
FAIL = 1
RUNNING = 127
_GH_HEADERS = {"Accept": "application/vnd.github.v3+json"}
if os.environ.get("GITHUB_TOKEN"):
_GH_HEADERS["Authorization"] = f"token {os.environ['GITHUB_TOKEN']}"
_GH_RUNS_URL = ("https://api.github.com/repos/"
f"{os.environ['GITHUB_REPOSITORY']}/actions/runs")
class Checker:
"""Object to keep watching required GH action workflows"""
def __init__(self):
required_jobs = os.getenv("REQUIRED_JOBS")
if required_jobs:
required_jobs = required_jobs.split(",")
else:
required_jobs = []
required_regexps = os.getenv("REQUIRED_REGEXPS")
self.required_regexps = []
# TODO: Add way to specify minimum amount of tests
# (eg. via \d+: prefix) and check it in status
if required_regexps:
for regexp in required_regexps.split(","):
self.required_regexps.append(re.compile(regexp))
if not required_jobs and not self.required_regexps:
raise RuntimeError("No REQUIRED_JOBS or REQUIRED_REGEXPS defined")
# Set all required jobs as RUNNING to enforce waiting for them
self.results = {job: RUNNING for job in required_jobs}
def record(self, workflow, job):
"""
Records a job run
"""
job_name = f"{workflow} / {job['name']}"
if job_name not in self.results:
for re_job in self.required_regexps:
# Required job via regexp
if re_job.match(job_name):
break
else:
# Not a required job
return
if job["status"] != "completed":
self.results[job_name] = RUNNING
return
if job["conclusion"] != "success":
self.results[job_name] = job['conclusion']
return
self.results[job_name] = PASS
def status(self):
"""
:returns: 0 - all tests passing; 127 - no failures but some
tests in progress; 1 - any failure
"""
running = False
if not self.results:
# No results reported so far
return FAIL
for status in self.results.values():
if status == RUNNING:
running |= True
elif status != PASS:
# Status not passed
return FAIL
if running:
return RUNNING
return PASS
def __str__(self):
"""Sumarize the current status"""
good = []
bad = []
warn = []
for job, status in self.results.items():
if status == RUNNING:
warn.append(f"WARN: {job} - Still running")
elif status == PASS:
good.append(f"PASS: {job} - success")
else:
bad.append(f"FAIL: {job} - Not passed - {status}")
out = '\n'.join(sorted(good) + sorted(warn) + sorted(bad))
stat = self.status()
if stat == RUNNING:
status = "Some jobs are still running."
elif stat == PASS:
status = "All required jobs passed"
elif not self.results:
status = ("No required jobs for regexps: " +
";".join([_.pattern for _ in self.required_regexps]))
else:
status = "Not all required jobs passed!"
return f"{out}\n\n{status}"
def get_jobs_for_workflow_run(self, run_id):
"""Get jobs from a workflow id"""
total_count = -1
jobs = []
page = 1
while True:
response = requests.get(
f"{_GH_RUNS_URL}/{run_id}/jobs?per_page=100&page={page}",
headers=_GH_HEADERS,
timeout=60
)
response.raise_for_status()
output = response.json()
jobs.extend(output["jobs"])
total_count = max(total_count, output["total_count"])
if len(jobs) >= total_count:
break
page += 1
return jobs
def check_workflow_runs_status(self):
"""
Checks if all required jobs passed
:returns: 0 - all passing; 1 - any failure; 127 some jobs running
"""
# TODO: Check if we need pagination here as well
latest_commit_sha = os.getenv("COMMIT_HASH")
response = requests.get(
_GH_RUNS_URL,
params={"head_sha": latest_commit_sha},
headers=_GH_HEADERS,
timeout=60
)
response.raise_for_status()
workflow_runs = response.json()["workflow_runs"]
for run in workflow_runs:
jobs = self.get_jobs_for_workflow_run(run["id"])
for job in jobs:
self.record(run["name"], job)
print(self)
return self.status()
def run(self):
"""
Keep checking the PR until all required jobs finish
:returns: 0 on success; 1 on failure
"""
while True:
ret = self.check_workflow_runs_status()
if ret == RUNNING:
running_jobs = len([job
for job, status in self.results.items()
if status == RUNNING])
print(f"{running_jobs} jobs are still running...")
time.sleep(180)
continue
sys.exit(ret)
if __name__ == "__main__":
Checker().run()

View File

@@ -0,0 +1,36 @@
required_tests:
# Always required tests
- "Commit Message Check / Commit Message Check"
required_regexps:
# Always required regexps
paths:
# Mapping of path (python) regexps to set-of-tests (sort by order of importance)
# CI
- "^ci/openshift-ci/": []
- "^\\.github/workflows/": []
# TODO: Expand filters
# Documentation
#- "\\.rst$": ["build"]
#- "\\.md$": ["build"]
# Sources
#- "^src/": ["static", "build", "test"]
mapping:
# Mapping of set-of-tests to required test names and/or test name regexps
# TODO: Modify this according to actual required tests
test:
# Checks the basic functional tests work
regexps: "Kata Containers CI / .*run-basic-amd64-tests.*|Kata Containers CI / .*run-metrics-tests.*"
names:
- Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-ppc64le / run-k8s-tests (qemu, kubeadm)
- Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-on-aks / run-k8s-tests (ubuntu, qemu, small)
- Kata Containers CI / kata-containers-ci-on-push / run-k8s-tests-with-crio-on-garm / run-k8s-tests (qemu, k0s, garm-ubuntu-2204)
# TODO: Add support for "depends" to automatically add dependant set-of-tests
# (eg. "build" is required for "test")
build:
# Checks that the kata-containers static tarball is created
regexps: "Kata Containers CI / .*build-kata-static-tarball.*"
static:
# Checks that static checks are passing
regexps: "Static checks.*"

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python3
#
# Copyright (c) 2024 Red Hat Inc.
#
# SPDX-License-Identifier: Apache-2.0
"""
Gets changes of the current git to env variable TARGET_BRANCH
and reports feature skips in form of "skip_$feature=yes|no"
or list of required tests (based on argv[1])
"""
from collections import OrderedDict
import os
import re
import subprocess
import sys
import yaml
class Checks:
def __init__(self):
config_path = os.path.join(os.path.dirname(__file__), "required-tests.yaml")
with open(config_path, "r", encoding="utf8") as config_fd:
config = yaml.load(config_fd, Loader=yaml.SafeLoader)
if config.get('required_tests'):
self.required_tests = config['required_tests']
else:
self.required_tests = []
if config.get('required_regexps'):
self.required_regexps = config['required_regexps']
else:
self.required_regexps = []
if config.get('paths'):
self.paths = OrderedDict((re.compile(key), value)
for items in config['paths']
for key, value in items.items())
if config.get('mapping'):
self.mapping = config['mapping']
else:
self.mapping = {}
self.all_set_of_tests = set(self.mapping.keys())
def run(self, tests, target_branch):
"""
Find the required features/tests
:param: tests: report required tests+regexps (bool)
:param: target_branch: branch/commit to compare to
"""
enabled_features = self.get_features(target_branch)
if not tests:
for feature in self.all_set_of_tests:
# Print all features status in "$key=$value" format to allow
# usage with $GITHUB_OUTPUT
print(f"skip_{feature}=" +
('no' if feature in enabled_features else 'yes'))
return 0
required_tests = set(self.required_tests)
required_regexps = set(self.required_regexps)
for feature in enabled_features:
values = self.mapping.get(feature, {})
if values.get("names"):
required_tests.update(values["names"])
if values.get("regexps"):
required_regexps.add(values["regexps"])
print(';'.join(required_tests))
print('|'.join(required_regexps))
return 0
def get_features(self, target_branch):
"""
Get changed file to `target_branch` and map them to the
to-be-tested set-of-tests
:param target_branch: branch/commit to compare to
:returns: List of set-of-tests
"""
cmd = ["git", "diff", "--name-only", f"origin/{target_branch}"]
changed_files = [_.decode("utf-8")
for _ in subprocess.check_output(cmd).split(b'\n')
if _.strip()]
print('\n'.join(changed_files), file=sys.stderr)
enabled_features = set()
# Go through lines and find what features should be covered
for changed_file in changed_files:
for regexp, features in self.paths.items():
if regexp.search(changed_file):
for feature in features:
enabled_features.add(feature)
# this changed_file was treated, ignore other regexps
break
else:
# Untreated changed_file, run all tests
return self.all_set_of_tests
return enabled_features
if __name__ == "__main__":
if len(sys.argv) == 2:
_TESTS = sys.argv[1] == '-t'
else:
_TESTS = False
sys.exit(Checks().run(_TESTS, os.getenv("TARGET_BRANCH", "main")))