mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-27 11:31:05 +00:00
> Can only exit with status 0-255. Other data should be written to stdout/stderr. Switch exit -1 to exit 1 Signed-off-by: stevenhorsman <steven@uk.ibm.com>
246 lines
7.7 KiB
Bash
Executable File
246 lines
7.7 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Copyright (c) 2020 Red Hat, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# This script installs the built kata-containers in the test cluster,
|
|
# and configure a runtime.
|
|
|
|
scripts_dir=$(dirname $0)
|
|
deployments_dir=${scripts_dir}/deployments
|
|
configs_dir=${scripts_dir}/configs
|
|
|
|
source ${scripts_dir}/../lib.sh
|
|
|
|
# Set to 'yes' if you want to configure SELinux to permissive on the cluster
|
|
# workers.
|
|
#
|
|
SELINUX_PERMISSIVE=${SELINUX_PERMISSIVE:-no}
|
|
|
|
# Set to 'yes' if you want to configure Kata Containers to use the system's
|
|
# QEMU (from the RHCOS extension).
|
|
#
|
|
KATA_WITH_SYSTEM_QEMU=${KATA_WITH_SYSTEM_QEMU:-no}
|
|
|
|
# Set to 'yes' if you want to configure Kata Containers to use the host kernel.
|
|
#
|
|
KATA_WITH_HOST_KERNEL=${KATA_WITH_HOST_KERNEL:-no}
|
|
|
|
# kata-deploy image to be used to deploy the kata (by default use CI image
|
|
# that is built for each pull request)
|
|
#
|
|
KATA_DEPLOY_IMAGE=${KATA_DEPLOY_IMAGE:-quay.io/kata-containers/kata-deploy-ci:kata-containers-latest}
|
|
|
|
# Enable workaround for OCP 4.13 https://github.com/kata-containers/kata-containers/pull/9206
|
|
#
|
|
WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no}
|
|
|
|
# Leverage kata-deploy to install Kata Containers in the cluster.
|
|
#
|
|
apply_kata_deploy() {
|
|
local deploy_file="tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
|
|
pushd "$katacontainers_repo_dir"
|
|
sed -ri "s#(\s+image:) .*#\1 ${KATA_DEPLOY_IMAGE}#" "$deploy_file"
|
|
|
|
info "Applying kata-deploy"
|
|
oc apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
|
oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
|
|
oc apply -f "$deploy_file"
|
|
oc -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
|
|
|
|
info "Adding the kata runtime classes"
|
|
oc apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
|
popd
|
|
}
|
|
|
|
|
|
# Wait all worker nodes reboot.
|
|
#
|
|
# Params:
|
|
# $1 - timeout in seconds (default to 900).
|
|
#
|
|
wait_for_reboot() {
|
|
local delta="${1:-900}"
|
|
local sleep_time=60
|
|
declare -A BOOTIDS
|
|
local workers=($(oc get nodes | \
|
|
awk '{if ($3 == "worker") { print $1 } }'))
|
|
# Get the boot ID to compared it changed over time.
|
|
for node in "${workers[@]}"; do
|
|
BOOTIDS[$node]=$(oc get -o jsonpath='{.status.nodeInfo.bootID}'\
|
|
node/$node)
|
|
echo "Wait $node reboot"
|
|
done
|
|
|
|
echo "Set timeout to $delta seconds"
|
|
timer_start=$(date +%s)
|
|
while [ ${#workers[@]} -gt 0 ]; do
|
|
sleep $sleep_time
|
|
now=$(date +%s)
|
|
if [ $(($timer_start + $delta)) -lt $now ]; then
|
|
echo "Timeout: not all workers rebooted"
|
|
return 1
|
|
fi
|
|
echo "Checking after $(($now - $timer_start)) seconds"
|
|
for i in "${!workers[@]}"; do
|
|
current_id=$(oc get \
|
|
-o jsonpath='{.status.nodeInfo.bootID}' \
|
|
node/${workers[i]})
|
|
if [ "$current_id" != ${BOOTIDS[${workers[i]}]} ]; then
|
|
echo "${workers[i]} rebooted"
|
|
unset workers[i]
|
|
fi
|
|
done
|
|
done
|
|
}
|
|
|
|
wait_mcp_update() {
|
|
local delta="${1:-3600}"
|
|
local sleep_time=30
|
|
# The machineconfigpool is fine when all the workers updated and are ready,
|
|
# and none are degraded.
|
|
local ready_count=0
|
|
local degraded_count=0
|
|
local machine_count=$(oc get mcp worker -o jsonpath='{.status.machineCount}')
|
|
|
|
if [[ -z "$machine_count" && "$machine_count" -lt 1 ]]; then
|
|
warn "Unabled to obtain the machine count"
|
|
return 1
|
|
fi
|
|
|
|
echo "Set timeout to $delta seconds"
|
|
local deadline=$(($(date +%s) + $delta))
|
|
# The ready count might not have changed yet, so wait a little.
|
|
while [[ "$ready_count" != "$machine_count" && \
|
|
"$degraded_count" == 0 ]]; do
|
|
# Let's check it hit the timeout (or not).
|
|
local now=$(date +%s)
|
|
if [ $deadline -lt $now ]; then
|
|
echo "Timeout: not all workers updated" >&2
|
|
return 1
|
|
fi
|
|
sleep $sleep_time
|
|
ready_count=$(oc get mcp worker \
|
|
-o jsonpath='{.status.readyMachineCount}')
|
|
degraded_count=$(oc get mcp worker \
|
|
-o jsonpath='{.status.degradedMachineCount}')
|
|
echo "check machineconfigpool - ready_count: $ready_count degraded_count: $degraded_count"
|
|
done
|
|
[ $degraded_count -eq 0 ]
|
|
}
|
|
|
|
# Enable the RHCOS extension for the Sandboxed Containers.
|
|
#
|
|
enable_sandboxedcontainers_extension() {
|
|
info "Enabling the RHCOS extension for Sandboxed Containers"
|
|
local deployment_file="${deployments_dir}/machineconfig_sandboxedcontainers_extension.yaml"
|
|
oc apply -f ${deployment_file}
|
|
oc get -f ${deployment_file} || \
|
|
die "Sandboxed Containers extension machineconfig not found"
|
|
wait_mcp_update || die "Failed to update the machineconfigpool"
|
|
}
|
|
|
|
# Print useful information for debugging.
|
|
#
|
|
# Params:
|
|
# $1 - the pod name
|
|
debug_pod() {
|
|
local pod="$1"
|
|
info "Debug pod: ${pod}"
|
|
oc describe pods "$pod"
|
|
oc logs "$pod"
|
|
}
|
|
|
|
# Wait for all pods of the app label to contain expected message
|
|
#
|
|
# Params:
|
|
# $1 - app labela
|
|
# $2 - expected pods count (>=1)
|
|
# $3 - message to be present in the logs
|
|
# $4 - timeout (60)
|
|
# $5 - namespace (the current one)
|
|
wait_for_app_pods_message() {
|
|
local app="$1"
|
|
local pod_count="$2"
|
|
local message="$3"
|
|
local timeout="$4"
|
|
local namespace="$5"
|
|
[ -z "$pod_count" ] && pod_count=1
|
|
[ -z "$timeout" ] && timeout=60
|
|
[ -n "$namespace" ] && namespace=" -n $namespace "
|
|
local pod
|
|
local pods
|
|
local i
|
|
SECONDS=0
|
|
while :; do
|
|
pods=($(oc get pods -l app="$app" --no-headers=true $namespace | awk '{print $1}'))
|
|
[ "${#pods}" -ge "$pod_count" ] && break
|
|
if [ "$SECONDS" -gt "$timeout" ]; then
|
|
printf "Unable to find ${pod_count} pods for '-l app=\"$app\"' in ${SECONDS}s (%s)" "${pods[@]}"
|
|
return 1
|
|
fi
|
|
done
|
|
for pod in "${pods[@]}"; do
|
|
while :; do
|
|
local log=$(oc logs $namespace "$pod")
|
|
echo "$log" | grep "$message" -q && echo "Found $(echo "$log" | grep "$message") in $pod's log ($SECONDS)" && break;
|
|
if [ "$SECONDS" -gt "$timeout" ]; then
|
|
echo -n "Message '$message' not present in '${pod}' pod of the '-l app=\"$app\"' "
|
|
printf "pods after ${SECONDS}s :(%s)\n" "${pods[@]}"
|
|
echo "Pod $pod's output so far:"
|
|
echo "$log"
|
|
return 1
|
|
fi
|
|
sleep 1;
|
|
done
|
|
done
|
|
}
|
|
|
|
oc config set-context --current --namespace=default
|
|
|
|
worker_nodes=$(oc get nodes | awk '{if ($3 == "worker") { print $1 } }')
|
|
num_nodes=$(echo $worker_nodes | wc -w)
|
|
[ $num_nodes -ne 0 ] || \
|
|
die "No worker nodes detected. Something is wrong with the cluster"
|
|
|
|
if [ "${KATA_WITH_SYSTEM_QEMU}" == "yes" ]; then
|
|
# QEMU is deployed on the workers via RCHOS extension.
|
|
enable_sandboxedcontainers_extension
|
|
oc apply -f ${deployments_dir}/configmap_installer_qemu.yaml
|
|
fi
|
|
|
|
if [ "${KATA_WITH_HOST_KERNEL}" == "yes" ]; then
|
|
oc apply -f ${deployments_dir}/configmap_installer_kernel.yaml
|
|
fi
|
|
|
|
apply_kata_deploy
|
|
|
|
# Set SELinux to permissive mode
|
|
if [ ${SELINUX_PERMISSIVE} == "yes" ]; then
|
|
info "Configuring SELinux"
|
|
if [ -z "$SELINUX_CONF_BASE64" ]; then
|
|
export SELINUX_CONF_BASE64=$(echo \
|
|
$(cat $configs_dir/selinux.conf|base64) | \
|
|
sed -e 's/\s//g')
|
|
fi
|
|
envsubst < ${deployments_dir}/machineconfig_selinux.yaml.in | \
|
|
oc apply -f -
|
|
oc get machineconfig/51-kata-selinux || \
|
|
die "SELinux machineconfig not found"
|
|
# The new SELinux configuration will trigger another reboot.
|
|
wait_for_reboot
|
|
fi
|
|
|
|
if [[ "$WORKAROUND_9206_CRIO" == "yes" ]]; then
|
|
info "Applying workaround to enable skip_mount_home in crio on OCP 4.13"
|
|
oc apply -f "${deployments_dir}/workaround-9206-crio.yaml"
|
|
oc apply -f "${deployments_dir}/workaround-9206-crio-ds.yaml"
|
|
wait_for_app_pods_message workaround-9206-crio-ds "$num_nodes" "Config file present" 1200 || echo "Failed to apply the workaround, proceeding anyway..."
|
|
fi
|
|
|
|
# FIXME: Remove when https://github.com/kata-containers/kata-containers/pull/8417 is resolved
|
|
# Selinux context is currently not handled by kata-deploy
|
|
oc apply -f ${deployments_dir}/relabel_selinux.yaml
|
|
wait_for_app_pods_message restorecon "$num_nodes" "NSENTER_FINISHED_WITH:" 120 "kube-system" || echo "Failed to treat selinux, proceeding anyway..."
|