Merge pull request #6648 from fidencio/topic/gha-tdx-improvements-and-fixes

gha: tdx: Ensure kata-deploy is removed after the tests run
This commit is contained in:
Fabiano Fidêncio
2023-04-15 00:21:31 +02:00
committed by GitHub
13 changed files with 59 additions and 20 deletions

View File

@@ -20,6 +20,8 @@ jobs:
vmm:
- qemu-tdx
runs-on: tdx
env:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
steps:
- uses: actions/checkout@v3
with:
@@ -32,11 +34,9 @@ jobs:
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
kubectl apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
kubectl apply -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
kubectl apply -k tools/packaging/kata-deploy/kata-deploy/overlay/k3s
kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
env:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
- name: Run tests
timeout-minutes: 30
@@ -47,4 +47,19 @@ jobs:
popd
env:
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
- name: Delete kata-deploy
if: always()
run: |
kubectl delete -k tools/packaging/kata-deploy/kata-deploy/overlay/k3s
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
kubectl apply -f tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
sleep 180s
kubectl delete -f tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
kubectl delete -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
kubectl delete -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml

View File

@@ -206,8 +206,8 @@ container_pipe_size=@PIPESIZE@
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
# (default: 45)
dial_timeout = 45
[runtime]
# If enabled, the runtime will log additional debug messages to the

View File

@@ -154,8 +154,8 @@ disable_selinux=@DEFDISABLESELINUX@
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
# (default: 45)
dial_timeout = 45
[runtime]
# If enabled, the runtime will log additional debug messages to the

View File

@@ -305,8 +305,8 @@ block_device_driver = "virtio-blk"
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
# (default: 45)
dial_timeout = 45
[runtime]
# If enabled, the runtime will log additional debug messages to the

View File

@@ -284,8 +284,8 @@ kernel_modules=[]
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
# (default: 45)
dial_timeout = 45
[runtime]
# If enabled, the runtime will log additional debug messages to the

View File

@@ -529,8 +529,8 @@ kernel_modules=[]
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
# (default: 60)
dial_timeout = 60
[runtime]
# If enabled, the runtime will log additional debug messages to the

View File

@@ -535,8 +535,8 @@ kernel_modules=[]
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
# (default: 45)
dial_timeout = 45
[runtime]
# If enabled, the runtime will log additional debug messages to the

View File

@@ -73,12 +73,12 @@ const (
// Values based on:
clhTimeout = 10
clhAPITimeout = 1
clhAPITimeoutConfidentialGuest = 10
clhAPITimeoutConfidentialGuest = 20
// Timeout for hot-plug - hotplug devices can take more time, than usual API calls
// Use longer time timeout for it.
clhHotPlugAPITimeout = 5
clhStopSandboxTimeout = 3
clhStopSandboxTimeoutConfidentialGuest = 5
clhStopSandboxTimeoutConfidentialGuest = 10
clhSocket = "clh.sock"
clhAPISocket = "clh-api.sock"
virtioFsSocket = "virtiofsd.sock"

View File

@@ -11,6 +11,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh"
setup() {
[ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}"
[ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}"
[ "${KATA_HYPERVISOR}" == "qemu-tdx" ] && skip "TEEs do not support memory / CPU hotplug"
pod_name="constraints-cpu-test"
container_name="first-cpu-container"
@@ -27,6 +28,7 @@ setup() {
@test "Check CPU constraints" {
[ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}"
[ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}"
[ "${KATA_HYPERVISOR}" == "qemu-tdx" ] && skip "TEEs do not support memory / CPU hotplug"
# Create the pod
kubectl create -f "${pod_config_dir}/pod-cpu.yaml"
@@ -71,6 +73,7 @@ setup() {
teardown() {
[ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}"
[ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}"
[ "${KATA_HYPERVISOR}" == "qemu-tdx" ] && skip "TEEs do not support memory / CPU hotplug"
# Debugging information
kubectl describe "pod/$pod_name"

View File

@@ -21,6 +21,9 @@ spec:
image: quay.io/kata-containers/kata-deploy:latest
imagePullPolicy: Always
command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset" ]
readinessProbe:
exec:
command: [ "bash", "-c", "[ -f /opt/kata/kata-deployed ]", "&&", "bash", "-c", "[ $? == 1 ]" ]
env:
- name: NODE_NAME
valueFrom:

View File

@@ -18,6 +18,9 @@ spec:
- name: kube-kata
image: quay.io/kata-containers/kata-deploy:latest
imagePullPolicy: Always
readinessProbe:
exec:
command: [ "bash", "-c", "[ -f /opt/kata/kata-deployed ]", "&&", "bash", "-c", "[ $? == 0 ]" ]
lifecycle:
preStop:
exec:

View File

@@ -19,8 +19,8 @@ metadata:
handler: kata-qemu-tdx
overhead:
podFixed:
memory: "160Mi"
cpu: "250m"
memory: "2048Mi"
cpu: "1.0"
scheduling:
nodeSelector:
katacontainers.io/kata-runtime: "true"

View File

@@ -63,6 +63,15 @@ function install_artifacts() {
chmod +x /opt/kata/runtime-rs/bin/*
}
function wait_till_node_is_ready() {
local ready="False"
while ! [[ "${ready}" == "True" ]]; do
sleep 2s
ready=$(kubectl get node $NODE_NAME -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')
done
}
function configure_cri_runtime() {
configure_different_shims_base
@@ -76,6 +85,8 @@ function configure_cri_runtime() {
esac
systemctl daemon-reload
systemctl restart "$1"
wait_till_node_is_ready
}
function configure_different_shims_base() {
@@ -266,6 +277,8 @@ function reset_runtime() {
if [ "$1" == "crio" ] || [ "$1" == "containerd" ]; then
systemctl restart kubelet
fi
wait_till_node_is_ready
}
function main() {
@@ -310,11 +323,13 @@ function main() {
install_artifacts
configure_cri_runtime "$runtime"
kubectl label node "$NODE_NAME" --overwrite katacontainers.io/kata-runtime=true
touch /opt/kata/kata-deployed
;;
cleanup)
cleanup_cri_runtime "$runtime"
kubectl label node "$NODE_NAME" --overwrite katacontainers.io/kata-runtime=cleanup
remove_artifacts
rm /opt/kata/kata-deployed
;;
reset)
reset_runtime $runtime