1
0
mirror of https://github.com/kata-containers/kata-containers.git synced 2025-05-08 08:27:24 +00:00

Merge pull request from fidencio/topic/helm-add-post-delete-job

helm: Several fixes, including some reasonable re-work on kata-deploy.sh script
This commit is contained in:
Fabiano Fidêncio 2024-09-04 09:34:57 +02:00 committed by GitHub
commit 13517cf9c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 217 additions and 120 deletions
tests
common.bash
integration/kubernetes
tools/packaging/kata-deploy
helm-chart/kata-deploy/templates
scripts
versions.yaml

View File

@ -529,6 +529,29 @@ function ensure_yq() {
hash -d yq 2> /dev/null || true # yq is preinstalled on GHA Ubuntu 22.04 runners so we clear Bash's PATH cache.
}
function ensure_helm() {
ensure_yq
# The get-helm-3 script will take care of downloaading and installing Helm
# properly on the system respecting ARCH, OS and other configurations.
DESIRED_VERSION=$(get_from_kata_deps ".externals.helm.version")
export DESIRED_VERSION
# Check if helm is available in the system's PATH
if ! command -v helm &> /dev/null; then
echo "Helm is not installed. Installing Helm..."
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
# Verify the installation
if command -v helm &> /dev/null; then
echo "Helm installed successfully."
else
echo "Failed to install Helm."
exit 1
fi
else
echo "Helm is already installed."
fi
}
# dependency: What we want to get the version from the versions.yaml file
function get_from_kata_deps() {
versions_file="${repo_root_dir}/versions.yaml"

View File

@ -17,6 +17,7 @@ source "${kubernetes_dir}/../../gha-run-k8s-common.sh"
source "${kubernetes_dir}/confidential_kbs.sh"
# shellcheck disable=2154
tools_dir="${repo_root_dir}/tools"
helm_chart_dir="${tools_dir}/packaging/kata-deploy/helm-chart/kata-deploy"
kata_tarball_dir="${2:-kata-artifacts}"
DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io}
@ -146,6 +147,7 @@ function deploy_coco_kbs() {
function deploy_kata() {
platform="${1:-}"
ensure_helm
ensure_yq
[ "$platform" = "kcli" ] && \
@ -157,84 +159,58 @@ function deploy_kata() {
set_default_cluster_namespace
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
local values_yaml
values_yaml=$(mktemp /tmp/values_yaml.XXXXXX)
# Enable debug for Kata Containers
yq -i \
'.spec.template.spec.containers[0].env[1].value = "true"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
# Create the runtime class only for the shim that's being tested
yq -i \
".spec.template.spec.containers[0].env[2].value = \"${KATA_HYPERVISOR}\"" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
# Set the tested hypervisor as the default `kata` shim
yq -i \
".spec.template.spec.containers[0].env[3].value = \"${KATA_HYPERVISOR}\"" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
# Let the `kata-deploy` script take care of the runtime class creation / removal
yq -i \
'.spec.template.spec.containers[0].env[4].value = "true"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
# Let the `kata-deploy` create the default `kata` runtime class
yq -i \
'.spec.template.spec.containers[0].env[5].value = "true"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
# Enable 'default_vcpus' hypervisor annotation
yq -i \
'.spec.template.spec.containers[0].env[6].value = "default_vcpus"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i ".k8sDistribution = \"${KUBERNETES}\"" "${values_yaml}"
yq -i ".image.reference = \"${DOCKER_REGISTRY}/${DOCKER_REPO}\"" "${values_yaml}"
yq -i ".image.tag = \"${DOCKER_TAG}\"" "${values_yaml}"
yq -i ".env.debug = \"true\"" "${values_yaml}"
yq -i ".env.shims = \"${KATA_HYPERVISOR}\"" "${values_yaml}"
yq -i ".env.defaultShim = \"${KATA_HYPERVISOR}\"" "${values_yaml}"
yq -i ".env.createRuntimeClasses = \"true\"" "${values_yaml}"
yq -i ".env.createDefaultRuntimeClass = \"true\"" "${values_yaml}"
yq -i ".env.allowedHypervisorAnnotations = \"default_vcpus\"" "${values_yaml}"
yq -i ".env.snapshotterHandlerMapping = \"\"" "${values_yaml}"
yq -i ".env.agentHttpsProxy = \"\"" "${values_yaml}"
yq -i ".env.agentNoProxy = \"\"" "${values_yaml}"
yq -i ".env.pullTypeMapping = \"\"" "${values_yaml}"
yq -i ".env.hostOS = \"\"" "${values_yaml}"
if [ -n "${SNAPSHOTTER}" ]; then
yq -i \
".spec.template.spec.containers[0].env[7].value = \"${KATA_HYPERVISOR}:${SNAPSHOTTER}\"" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i ".env.snapshotterHandlerMapping = \"${KATA_HYPERVISOR}:${SNAPSHOTTER}\"" "${values_yaml}"
fi
if [ "${KATA_HOST_OS}" = "cbl-mariner" ]; then
yq -i \
'.spec.template.spec.containers[0].env[6].value = "initrd kernel default_vcpus"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i \
".spec.template.spec.containers[0].env += [{\"name\": \"HOST_OS\", \"value\": \"${KATA_HOST_OS}\"}]" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i ".env.allowedHypervisorAnnotations = \"initrd kernel default_vcpus\"" "${values_yaml}"
yq -i ".env.hostOS = \"${KATA_HOST_OS}\"" "${values_yaml}"
fi
if [ "${KATA_HYPERVISOR}" = "qemu" ]; then
yq -i \
'.spec.template.spec.containers[0].env[6].value = "image initrd kernel default_vcpus"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i ".env.allowedHypervisorAnnotations = \"image initrd kernel default_vcpus\"" "${values_yaml}"
fi
if [ "${KATA_HYPERVISOR}" = "qemu-tdx" ]; then
yq -i \
".spec.template.spec.containers[0].env[8].value = \"${HTTPS_PROXY}\"" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i \
".spec.template.spec.containers[0].env[9].value = \"${NO_PROXY}\"" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i ".env.agentHttpsProxy = \"${HTTPS_PROXY}\"" "${values_yaml}"
yq -i ".env.agentNoProxy = \"${NO_PROXY}\"" "${values_yaml}"
fi
# Set the PULL_TYPE_MAPPING
if [ "${PULL_TYPE}" != "default" ]; then
yq -i \
".spec.template.spec.containers[0].env[10].value = \"${KATA_HYPERVISOR}:${PULL_TYPE}\"" \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
yq -i ".env.pullTypeMapping = \"${KATA_HYPERVISOR}:${PULL_TYPE}\"" "${values_yaml}"
fi
echo "::group::Final kata-deploy.yaml that is used in the test"
cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image"
echo "::group::Final kata-deploy manifests used in the test"
cat "${values_yaml}"
helm template "${helm_chart_dir}" --values "${values_yaml}" --namespace kube-system
[ "$(yq .image.reference ${values_yaml})" = "${DOCKER_REGISTRY}/${DOCKER_REPO}" ] || die "Failed to set image reference"
[ "$(yq .image.tag ${values_yaml})" = "${DOCKER_TAG}" ] || die "Failed to set image tag"
echo "::endgroup::"
kubectl_retry apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml"
case "${KUBERNETES}" in
k0s) kubectl_retry apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k0s" ;;
k3s) kubectl_retry apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s" ;;
rke2) kubectl_retry apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/rke2" ;;
*) kubectl_retry apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
esac
helm install kata-deploy "${helm_chart_dir}" --values "${values_yaml}" --namespace kube-system --debug
# `helm install --wait` does not take effect on single replicas and maxUnavailable=1 DaemonSets
# like kata-deploy on CI. So wait for pods being Running in the "tradicional" way.
local cmd="kubectl -n kube-system get -l name=kata-deploy pod 2>/dev/null | grep '\<Running\>'"
waitForProcess "${KATA_DEPLOY_WAIT_TIMEOUT}" 10 "$cmd"
@ -406,58 +382,11 @@ function collect_artifacts() {
}
function cleanup_kata_deploy() {
ensure_yq
ensure_helm
case "${KUBERNETES}" in
k0s)
deploy_spec="-k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k0s""
cleanup_spec="-k "${tools_dir}/packaging/kata-deploy/kata-cleanup/overlays/k0s""
;;
k3s)
deploy_spec="-k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s""
cleanup_spec="-k "${tools_dir}/packaging/kata-deploy/kata-cleanup/overlays/k3s""
;;
rke2)
deploy_spec="-k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/rke2""
cleanup_spec="-k "${tools_dir}/packaging/kata-deploy/kata-cleanup/overlays/rke2""
;;
*)
deploy_spec="-f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml""
cleanup_spec="-f "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml""
;;
esac
# shellcheck disable=2086
kubectl_retry delete --ignore-not-found ${deploy_spec}
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
# Let the `kata-deploy` script take care of the runtime class creation / removal
yq -i \
'.spec.template.spec.containers[0].env[4].value = "true"' \
"${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
# Create the runtime class only for the shim that's being tested
yq -i \
".spec.template.spec.containers[0].env[2].value = \"${KATA_HYPERVISOR}\"" \
"${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
# Set the tested hypervisor as the default `kata` shim
yq -i \
".spec.template.spec.containers[0].env[3].value = \"${KATA_HYPERVISOR}\"" \
"${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
# Let the `kata-deploy` create the default `kata` runtime class
yq -i \
'.spec.template.spec.containers[0].env[5].value = "true"' \
"${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"
grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image"
# shellcheck disable=2086
kubectl_retry apply ${cleanup_spec}
sleep 180s
# shellcheck disable=2086
kubectl_retry delete --ignore-not-found ${cleanup_spec}
kubectl_retry delete --ignore-not-found -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml"
# Do not return after deleting only the parent object cascade=foreground
# means also wait for child/dependent object deletion
helm uninstall kata-deploy --ignore-not-found --wait --cascade foreground --timeout 10m --namespace kube-system --debug
}
function cleanup() {

View File

@ -22,10 +22,6 @@ spec:
- name: kube-kata
image: {{ .Values.image.reference }}:{{ default .Chart.AppVersion .Values.image.tag }}
imagePullPolicy: {{ .Values.imagePullPolicy }}
lifecycle:
preStop:
exec:
command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"]
command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install"]
env:
- name: NODE_NAME

View File

@ -0,0 +1,116 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Chart.Name }}-sa-cleanup
namespace: {{ .Release.Namespace }}
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-weight": "-3"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Chart.Name }}-role-cleanup
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-weight": "-2"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "patch"]
- apiGroups: ["node.k8s.io"]
resources: ["runtimeclasses"]
verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ .Chart.Name }}-rb-cleanup
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-weight": "-1"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ .Chart.Name }}-role-cleanup
subjects:
- kind: ServiceAccount
name: {{ .Chart.Name }}-sa-cleanup
namespace: {{ .Release.Namespace }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Chart.Name }}-cleanup
namespace: {{ .Release.Namespace }}
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-weight": "0"
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
template:
metadata:
labels:
role: cleanup
spec:
serviceAccountName: {{ .Chart.Name }}-sa-cleanup
hostPID: true
containers:
- name: kube-kata-cleanup
image: {{ .Values.image.reference }}:{{ default .Chart.AppVersion .Values.image.tag }}
imagePullPolicy: IfNotPresent
command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"]
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: DEBUG
value: {{ .Values.env.debug | quote }}
- name: SHIMS
value: {{ .Values.env.shims | quote }}
- name: DEFAULT_SHIM
value: {{ .Values.env.defaultShim | quote }}
- name: CREATE_RUNTIMECLASSES
value: {{ .Values.env.createRuntimeClasses | quote }}
- name: CREATE_DEFAULT_RUNTIMECLASS
value: {{ .Values.env.createDefaultRuntimeClass | quote }}
- name: ALLOWED_HYPERVISOR_ANNOTATIONS
value: {{ .Values.env.allowedHypervisorAnnotations | quote }}
- name: SNAPSHOTTER_HANDLER_MAPPING
value: {{ .Values.env.snapshotterHandlerMapping | quote }}
- name: AGENT_HTTPS_PROXY
value: {{ .Values.env.agentHttpsProxy | quote }}
- name: AGENT_NO_PROXY
value: {{ .Values.env.agentNoProxy | quote }}
- name: PULL_TYPE_MAPPING
value: {{ .Values.env.pullTypeMapping | quote }}
- name: HELM_POST_DELETE_HOOK
value: "true"
{{- with .Values.env.hostOS }}
- name: HOST_OS
value: {{ . | quote }}
{{- end }}
securityContext:
privileged: true
volumeMounts:
- name: crio-conf
mountPath: /etc/crio/
- name: containerd-conf
mountPath: /etc/containerd/
- name: host
mountPath: /host/
volumes:
- name: crio-conf
hostPath:
path: /etc/crio/
- name: containerd-conf
hostPath:
path: '{{- template "containerdConfPath" .Values }}'
- name: host
hostPath:
path: /
restartPolicy: Never

View File

@ -48,6 +48,8 @@ fi
# doubled here as well, as: `/host//opt/kata`
host_install_dir="/host${dest_dir}"
HELM_POST_DELETE_HOOK="${HELM_POST_DELETE_HOOK:-"false"}"
# If we fail for any reason a message will be displayed
die() {
msg="$*"
@ -550,6 +552,18 @@ function remove_artifacts() {
fi
}
function restart_cri_runtime() {
local runtime="${1}"
if [ "${runtime}" == "k0s-worker" ] || [ "${runtime}" == "k0s-controller" ]; then
# do nothing, k0s will automatically unload the config on the fly
:
else
host_systemctl daemon-reload
host_systemctl restart "${runtime}"
fi
}
function cleanup_cri_runtime() {
case $1 in
crio)
@ -560,6 +574,10 @@ function cleanup_cri_runtime() {
;;
esac
[ "${HELM_POST_DELETE_HOOK}" == "false" ] && return
# Only run this code in the HELM_POST_DELETE_HOOK
restart_cri_runtime "$1"
}
function cleanup_crio() {
@ -578,13 +596,7 @@ function cleanup_containerd() {
function reset_runtime() {
kubectl label node "$NODE_NAME" katacontainers.io/kata-runtime-
if [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then
# do nothing, k0s will auto restart
:
else
host_systemctl daemon-reload
host_systemctl restart "$1"
fi
restart_cri_runtime "$1"
if [ "$1" == "crio" ] || [ "$1" == "containerd" ]; then
host_systemctl restart kubelet
@ -659,6 +671,7 @@ function main() {
echo "* AGENT_NO_PROXY: ${AGENT_NO_PROXY}"
echo "* PULL_TYPE_MAPPING: ${PULL_TYPE_MAPPING}"
echo "* INSTALLATION_PREFIX: ${INSTALLATION_PREFIX}"
echo "* HELM_POST_DELETE_HOOK: ${HELM_POST_DELETE_HOOK}"
# script requires that user is root
euid=$(id -u)
@ -716,9 +729,24 @@ function main() {
containerd_conf_file="${containerd_conf_tmpl_file}"
fi
if [ "${HELM_POST_DELETE_HOOK}" == "true" ]; then
# Remove the label as the first thing, so we ensure no more kata-containers
# pods would be scheduled here.
kubectl label node "$NODE_NAME" katacontainers.io/kata-runtime-
fi
cleanup_cri_runtime "$runtime"
kubectl label node "$NODE_NAME" --overwrite katacontainers.io/kata-runtime=cleanup
if [ "${HELM_POST_DELETE_HOOK}" == "false" ]; then
# The Confidential Containers operator relies on this label
kubectl label node "$NODE_NAME" --overwrite katacontainers.io/kata-runtime=cleanup
fi
remove_artifacts
if [ "${HELM_POST_DELETE_HOOK}" == "true" ]; then
# After everything was cleaned up, there's no reason to continue
# and sleep forever. Let's just return success..
exit 0
fi
;;
reset)
reset_runtime $runtime

View File

@ -218,6 +218,11 @@ externals:
version: "1.36.1"
url: "https://busybox.net/downloads"
helm:
description: "Kubernetes package manager"
url: "https://get.helm.sh/"
version: "v3.15.2"
cni-plugins:
description: "CNI network plugins"
url: "https://github.com/containernetworking/plugins"