diff --git a/ci/openshift-ci/cleanup.sh b/ci/openshift-ci/cleanup.sh new file mode 100755 index 0000000000..587c431478 --- /dev/null +++ b/ci/openshift-ci/cleanup.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# Copyright (c) 2024 Red Hat, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This script tries to removes most of the resources added by `test.sh` script +# from the cluster. + +scripts_dir=$(dirname $0) +deployments_dir=${scripts_dir}/cluster/deployments +configs_dir=${scripts_dir}/configs + +source ${scripts_dir}/lib.sh + +# Set to 'yes' if you want to configure SELinux to permissive on the cluster +# workers. +# +SELINUX_PERMISSIVE=${SELINUX_PERMISSIVE:-no} + +# Enable workaround for OCP 4.13 https://github.com/kata-containers/kata-containers/pull/9206 +# +WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no} + +# Ignore errors as we want best-effort-approach here +trap - ERR + +# Delete potential smoke-test resources +oc delete -f "${scripts_dir}/smoke/service.yaml" +oc delete -f "${scripts_dir}/smoke/service_kubernetes.yaml" +oc delete -f "${scripts_dir}/smoke/http-server.yaml" + +# Delete test.sh resources +oc delete -f "${deployments_dir}/relabel_selinux.yaml" +if [[ "$WORKAROUND_9206_CRIO" == "yes" ]]; then + oc delete -f "${deployments_dir}/workaround-9206-crio-ds.yaml" + oc delete -f "${deployments_dir}/workaround-9206-crio.yaml" +fi +[ ${SELINUX_PERMISSIVE} == "yes" ] && oc delete -f "${deployments_dir}/machineconfig_selinux.yaml.in" + +# Delete kata-containers +pushd "$katacontainers_repo_dir/tools/packaging/kata-deploy" +oc delete -f kata-deploy/base/kata-deploy.yaml +oc -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod +oc apply -f kata-cleanup/base/kata-cleanup.yaml +echo "Wait for all related pods to be gone" +( repeats=1; for i in $(seq 1 600); do + oc get pods -l name="kubelet-kata-cleanup" --no-headers=true -n kube-system 2>&1 | grep "No resources found" -q && ((repeats++)) || repeats=1 + [ "$repeats" -gt 5 ] && echo kata-cleanup finished && break + sleep 1 +done) || { echo "There are still some kata-cleanup related pods after 600 iterations"; oc get all -n kube-system; exit -1; } +oc delete -f kata-cleanup/base/kata-cleanup.yaml +oc delete -f kata-rbac/base/kata-rbac.yaml +oc delete -f runtimeclasses/kata-runtimeClasses.yaml + diff --git a/ci/openshift-ci/cluster/deployments/relabel_selinux.yaml b/ci/openshift-ci/cluster/deployments/relabel_selinux.yaml index ab6cdf8c3f..de814c9091 100644 --- a/ci/openshift-ci/cluster/deployments/relabel_selinux.yaml +++ b/ci/openshift-ci/cluster/deployments/relabel_selinux.yaml @@ -23,6 +23,7 @@ spec: set -e; echo Starting the relabel; nsenter --target 1 --mount bash -xc ' + command -v semanage &>/dev/null || { echo Does not look like a SELINUX cluster, skipping; exit 0; }; for ENTRY in \ \"/(.*/)?opt/kata/bin(/.*)?\" \ \"/(.*/)?opt/kata/runtime-rs/bin(/.*)?\" \ @@ -31,7 +32,7 @@ spec: \"/(.*/)?opt/kata/share/tdvf(/.*)?\" \ \"/(.*/)?opt/kata/libexec(/.*)?\"; do - semanage fcontext -a -t qemu_exec_t \"$ENTRY\" || { echo \"Error in semanage command\"; exit 1; } + semanage fcontext -a -t qemu_exec_t \"$ENTRY\" || semanage fcontext -m -t qemu_exec_t \"$ENTRY\" || { echo \"Error in semanage command\"; exit 1; } done; restorecon -v -R /opt/kata || { echo \"Error in restorecon command\"; exit 1; } '; diff --git a/ci/openshift-ci/cluster/deployments/workaround-9206-crio-ds.yaml b/ci/openshift-ci/cluster/deployments/workaround-9206-crio-ds.yaml new file mode 100644 index 0000000000..0a5cf8a5ee --- /dev/null +++ b/ci/openshift-ci/cluster/deployments/workaround-9206-crio-ds.yaml @@ -0,0 +1,28 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: workaround-9206-crio-ds +spec: + selector: + matchLabels: + app: workaround-9206-crio-ds + template: + metadata: + labels: + app: workaround-9206-crio-ds + spec: + containers: + - name: workaround-9206-crio-ds + image: alpine + volumeMounts: + - name: host-dir + mountPath: /tmp/config + securityContext: + runAsUser: 0 + privileged: true + command: ["/bin/sh", "-c", "while [ ! -f '/tmp/config/10-workaround-9206-crio' ]; do sleep 1; done; echo 'Config file present'; sleep infinity"] + volumes: + - name: host-dir + hostPath: + path: /etc/crio/crio.conf.d/ diff --git a/ci/openshift-ci/cluster/deployments/workaround-9206-crio.yaml b/ci/openshift-ci/cluster/deployments/workaround-9206-crio.yaml new file mode 100644 index 0000000000..18313a0b5e --- /dev/null +++ b/ci/openshift-ci/cluster/deployments/workaround-9206-crio.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: worker + name: 10-workaround-9206-crio +spec: + config: + ignition: + version: 2.2.0 + storage: + files: + - contents: + source: data:text/plain;charset=utf-8;base64,W2NyaW9dCnN0b3JhZ2Vfb3B0aW9uID0gWwoJIm92ZXJsYXkuc2tpcF9tb3VudF9ob21lPXRydWUiLApdCg== + filesystem: root + mode: 0644 + path: /etc/crio/crio.conf.d/10-workaround-9206-crio diff --git a/ci/openshift-ci/cluster/install_kata.sh b/ci/openshift-ci/cluster/install_kata.sh index 82e80d6432..d544aa70c3 100755 --- a/ci/openshift-ci/cluster/install_kata.sh +++ b/ci/openshift-ci/cluster/install_kata.sh @@ -27,16 +27,21 @@ KATA_WITH_SYSTEM_QEMU=${KATA_WITH_SYSTEM_QEMU:-no} # KATA_WITH_HOST_KERNEL=${KATA_WITH_HOST_KERNEL:-no} +# kata-deploy image to be used to deploy the kata (by default use CI image +# that is built for each pull request) +# +KATA_DEPLOY_IMAGE=${KATA_DEPLOY_IMAGE:-quay.io/kata-containers/kata-deploy-ci:kata-containers-latest} + +# Enable workaround for OCP 4.13 https://github.com/kata-containers/kata-containers/pull/9206 +# +WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no} + # Leverage kata-deploy to install Kata Containers in the cluster. # apply_kata_deploy() { local deploy_file="tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" - local old_img="quay.io/kata-containers/kata-deploy:latest" - # Use the kata-deploy CI image which is built for each pull request merged - local new_img="quay.io/kata-containers/kata-deploy-ci:kata-containers-latest" - pushd "$katacontainers_repo_dir" - sed -i "s#${old_img}#${new_img}#" "$deploy_file" + sed -ri "s#(\s+image:) .*#\1 ${KATA_DEPLOY_IMAGE}#" "$deploy_file" info "Applying kata-deploy" oc apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml @@ -91,7 +96,7 @@ wait_for_reboot() { } wait_mcp_update() { - local delta="${1:-900}" + local delta="${1:-1200}" local sleep_time=30 # The machineconfigpool is fine when all the workers updated and are ready, # and none are degraded. @@ -147,6 +152,51 @@ debug_pod() { oc logs "$pod" } +# Wait for all pods of the app label to contain expected message +# +# Params: +# $1 - app labela +# $2 - expected pods count (>=1) +# $3 - message to be present in the logs +# $4 - timeout (60) +# $5 - namespace (the current one) +wait_for_app_pods_message() { + local app="$1" + local pod_count="$2" + local message="$3" + local timeout="$4" + local namespace="$5" + [ -z "$pod_count" ] && pod_count=1 + [ -z "$timeout" ] && timeout=60 + [ -n "$namespace" ] && namespace=" -n $namespace " + local pod + local pods + local i + SECONDS=0 + while :; do + pods=($(oc get pods -l app="$app" --no-headers=true $namespace | awk '{print $1}')) + [ "${#pods}" -ge "$pod_count" ] && break + if [ "$SECONDS" -gt "$timeout" ]; then + echo "Unable to find ${pod_count} pods for '-l app=\"$app\"' in ${SECONDS}s (${pods[@]})" + return -1 + fi + done + for pod in "${pods[@]}"; do + while :; do + local log=$(oc logs $namespace "$pod") + echo "$log" | grep "$message" -q && echo "Found $(echo "$log" | grep "$message") in $pod's log ($SECONDS)" && break; + if [ "$SECONDS" -gt "$timeout" ]; then + echo -n "Message '$message' not present in '${pod}' pod of the '-l app=\"$app\"' " + echo "pods after ${SECONDS}s (${pods[@]})" + echo "Pod $pod's output so far:" + echo "$log" + return -1 + fi + sleep 1; + done + done +} + oc config set-context --current --namespace=default worker_nodes=$(oc get nodes | awk '{if ($3 == "worker") { print $1 } }') @@ -182,10 +232,14 @@ if [ ${SELINUX_PERMISSIVE} == "yes" ]; then wait_for_reboot fi +if [[ "$WORKAROUND_9206_CRIO" == "yes" ]]; then + info "Applying workaround to enable skip_mount_home in crio on OCP 4.13" + oc apply -f "${deployments_dir}/workaround-9206-crio.yaml" + oc apply -f "${deployments_dir}/workaround-9206-crio-ds.yaml" + wait_for_app_pods_message workaround-9206-crio-ds "$num_nodes" "Config file present" 1200 || echo "Failed to apply the workaround, proceeding anyway..." +fi + # FIXME: Remove when https://github.com/kata-containers/kata-containers/pull/8417 is resolved # Selinux context is currently not handled by kata-deploy oc apply -f ${deployments_dir}/relabel_selinux.yaml -( for I in $(seq 30); do - sleep 10 - oc logs -n kube-system ds/relabel-selinux-daemonset | grep "NSENTER_FINISHED_WITH:" && exit -done ) || { echo "Selinux relabel failed, check the logs"; exit -1; } +wait_for_app_pods_message restorecon "$num_nodes" "NSENTER_FINISHED_WITH:" 120 "kube-system" || echo "Failed to treat selinux, proceeding anyway..."