From 519860bbfdc8ef80f69425d9f46c42b24d8389fa Mon Sep 17 00:00:00 2001 From: dougbtv Date: Thu, 10 Apr 2025 14:02:06 -0400 Subject: [PATCH] If we want to fix upgrade issues, we should test upgrade issues. This includes a test for a node reboot and an "upgrade", which just HUPs the Multus pods to see that a fresh binary is installed. --- .github/workflows/kind-e2e.yml | 22 +++++++++++++++ e2e/test-check-binaries.sh | 49 ++++++++++++++++++++++++++++++++++ e2e/test-reboot.sh | 28 +++++++++++++++++++ e2e/test-upgrade.sh | 33 +++++++++++++++++++++++ pkg/cmdutils/utils.go | 9 ++++++- 5 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 e2e/test-check-binaries.sh create mode 100755 e2e/test-reboot.sh create mode 100755 e2e/test-upgrade.sh diff --git a/.github/workflows/kind-e2e.yml b/.github/workflows/kind-e2e.yml index 02424e463..e34ffa9b5 100644 --- a/.github/workflows/kind-e2e.yml +++ b/.github/workflows/kind-e2e.yml @@ -9,15 +9,23 @@ jobs: - docker-file: images/Dockerfile.thick cni-version: "0.3.1" multus-manifest: multus-daemonset-thick.yml + expected-binaries: "/opt/cni/bin/multus-shim" + init-container-name: "install-multus-shim" - docker-file: images/Dockerfile cni-version: "0.3.1" multus-manifest: multus-daemonset.yml + expected-binaries: "/opt/cni/bin/multus" + init-container-name: "install-multus-binary" - docker-file: images/Dockerfile.thick cni-version: "0.4.0" multus-manifest: multus-daemonset-thick.yml + expected-binaries: "/opt/cni/bin/multus-shim" + init-container-name: "install-multus-shim" - docker-file: images/Dockerfile cni-version: "0.4.0" multus-manifest: multus-daemonset.yml + expected-binaries: "/opt/cni/bin/multus" + init-container-name: "install-multus-binary" # need to wait kind to support CNI 1.0.0 (now kind 0.11 supports up to 0.4.0) # - docker-file: images/Dockerfile.thick # cni-version: "1.0.0" @@ -95,6 +103,20 @@ jobs: mkdir -p /tmp/kind/logs kind export logs /tmp/kind/logs -v 2147483647 + - name: Test reboot node + working-directory: ./e2e + env: + EXPECTED_BINARIES: ${{ matrix.expected-binaries }} + INSTALL_INIT_CONTAINER: ${{ matrix.init-container-name }} + run: ./test-reboot.sh + + - name: Test upgrade simulation + working-directory: ./e2e + env: + EXPECTED_BINARIES: ${{ matrix.expected-binaries }} + INSTALL_INIT_CONTAINER: ${{ matrix.init-container-name }} + run: ./test-upgrade.sh + - name: Upload kind logs if: always() uses: actions/upload-artifact@v4 diff --git a/e2e/test-check-binaries.sh b/e2e/test-check-binaries.sh new file mode 100644 index 000000000..1ef80c7a7 --- /dev/null +++ b/e2e/test-check-binaries.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -o errexit + +# Wait for init containers... +for pod in $(kubectl get pods -n ${NAMESPACE} -l name=multus -o jsonpath='{.items[*].metadata.name}'); do + echo "Waiting for init container to complete in pod: ${pod}" + + # Timeout loop: 60 tries, 5 seconds sleep = 5 minutes max + for i in {1..60}; do + state=$(kubectl get pod ${pod} -n ${NAMESPACE} -o jsonpath="{.status.initContainerStatuses[?(@.name==\"${INSTALL_INIT_CONTAINER}\")].state.terminated.reason}" 2>/dev/null || true) + + if [ "$state" = "Completed" ]; then + echo "SUCCESS: Init container completed in pod ${pod}" + break + fi + + echo "Still waiting for init container in pod ${pod} (current state: ${state})..." + sleep 1 + done + + # After waiting, make sure it's done + state=$(kubectl get pod ${pod} -n ${NAMESPACE} -o jsonpath="{.status.initContainerStatuses[?(@.name==\"${INSTALL_INIT_CONTAINER}\")].state.terminated.reason}" 2>/dev/null || true) + if [ "$state" != "Completed" ]; then + echo "FAIL: Init container did not complete in pod ${pod} after timeout." + exit 1 + fi +done + +echo "Sleeping for 5 seconds (for fs sync, possibly)..." +sleep 5 + +# verify binaries +for bin in $EXPECTED_BINARIES; do + if ! docker exec "${NODE_NAME}" test -f "${bin}"; then + echo "FAIL: Expected binary ${bin} not found on node ${NODE_NAME}" + exit 1 + fi + echo "SUCCESS: Binary ${bin} found." + + after_ts=$(docker exec "${NODE_NAME}" stat -c %Y "${bin}") + echo "After reboot: ${bin} mtime = ${after_ts}" + + if [ "${after_ts}" -le "${before_mtime[${bin}]}" ]; then + echo "FAIL: mtime for ${bin} did not update after reboot (before: ${before_mtime[${bin}]}, after: ${after_ts})" + exit 1 + fi + + echo "SUCCESS: mtime for ${bin} updated correctly after reboot." +done diff --git a/e2e/test-reboot.sh b/e2e/test-reboot.sh new file mode 100755 index 000000000..8685194cc --- /dev/null +++ b/e2e/test-reboot.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -o errexit + +NODE_NAME="kind-worker" +DAEMONSET_NAME="kube-multus-ds-amd64" +NAMESPACE="kube-system" +EXPECTED_BINARIES="${EXPECTED_BINARIES:-/opt/cni/bin/multus-shim}" +INSTALL_INIT_CONTAINER="${INSTALL_INIT_CONTAINER:-install-multus-shim}" + +declare -A before_mtime + +for bin in $EXPECTED_BINARIES; do + before_ts=$(docker exec "${NODE_NAME}" stat -c %Y "${bin}") + before_mtime["${bin}"]=$before_ts + echo "Before reboot: ${bin} mtime = ${before_ts}" +done + +echo "Rebooting node..." +docker restart "${NODE_NAME}" +sleep 2 +docker start "${NODE_NAME}" + +kubectl wait --for=condition=Ready node/${NODE_NAME} --timeout=300s +kubectl rollout status daemonset/${DAEMONSET_NAME} -n ${NAMESPACE} --timeout=300s + +source ./test-check-binaries.sh + +echo "SUCCESS: reboot test passed" diff --git a/e2e/test-upgrade.sh b/e2e/test-upgrade.sh new file mode 100755 index 000000000..50a9a35f1 --- /dev/null +++ b/e2e/test-upgrade.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -o errexit + +NODE_NAME="kind-worker" +DAEMONSET_NAME="kube-multus-ds-amd64" +NAMESPACE="kube-system" +EXPECTED_BINARIES="${EXPECTED_BINARIES:-/opt/cni/bin/multus-shim}" +INSTALL_INIT_CONTAINER="${INSTALL_INIT_CONTAINER:-install-multus-shim}" + +declare -A before_mtime + +# Capture the mtimes before upgrade +echo "Capturing binary mtimes before upgrade on node ${NODE_NAME}..." + +for bin in $EXPECTED_BINARIES; do + echo "Getting mtime for ${bin}..." + before_ts=$(docker exec "${NODE_NAME}" stat -c %Y "${bin}") + before_mtime["${bin}"]=$before_ts + echo "Before reboot: ${bin} mtime = ${before_ts}" +done + + +# Delete all Multus DaemonSet pods to simulate an upgrade. +echo "Deleting all Multus DaemonSet pods to simulate upgrade..." +kubectl delete pods -n ${NAMESPACE} -l name=multus + +# Wait for the Multus DaemonSet pods to come back up. +echo "Waiting for Multus DaemonSet ${DAEMONSET_NAME} pods to be Ready after upgrade..." +kubectl rollout status daemonset/${DAEMONSET_NAME} -n ${NAMESPACE} --timeout=300s + +source ./test-check-binaries.sh + +echo "Upgrade test PASSED" diff --git a/pkg/cmdutils/utils.go b/pkg/cmdutils/utils.go index 90df8851c..eb4402287 100644 --- a/pkg/cmdutils/utils.go +++ b/pkg/cmdutils/utils.go @@ -20,6 +20,7 @@ import ( "io" "os" "path/filepath" + "time" ) // CopyFileAtomic does file copy atomically @@ -35,10 +36,10 @@ func CopyFileAtomic(srcFilePath, destDir, tempFileName, destFileName string) err // create temp file f, err := os.CreateTemp(destDir, tempFileName) - defer f.Close() if err != nil { return fmt.Errorf("cannot create temp file %q in %q: %v", tempFileName, destDir, err) } + defer f.Close() srcFile, err := os.Open(srcFilePath) if err != nil { @@ -80,5 +81,11 @@ func CopyFileAtomic(srcFilePath, destDir, tempFileName, destFileName string) err return fmt.Errorf("cannot replace %q with temp file %q: %v", destFilePath, tempFilePath, err) } + // touch the file + now := time.Now() + if err := os.Chtimes(destFilePath, now, now); err != nil { + return fmt.Errorf("failed to update timestamp on %q: %v", destFilePath, err) + } + return nil }