If we want to fix upgrade issues, we should test upgrade issues.

This includes a test for a node reboot and an "upgrade", which just HUPs the Multus pods to see that a fresh binary is installed.
This commit is contained in:
dougbtv
2025-04-10 14:02:06 -04:00
parent 55ef3b1f0b
commit 519860bbfd
5 changed files with 140 additions and 1 deletions

View File

@@ -9,15 +9,23 @@ jobs:
- docker-file: images/Dockerfile.thick
cni-version: "0.3.1"
multus-manifest: multus-daemonset-thick.yml
expected-binaries: "/opt/cni/bin/multus-shim"
init-container-name: "install-multus-shim"
- docker-file: images/Dockerfile
cni-version: "0.3.1"
multus-manifest: multus-daemonset.yml
expected-binaries: "/opt/cni/bin/multus"
init-container-name: "install-multus-binary"
- docker-file: images/Dockerfile.thick
cni-version: "0.4.0"
multus-manifest: multus-daemonset-thick.yml
expected-binaries: "/opt/cni/bin/multus-shim"
init-container-name: "install-multus-shim"
- docker-file: images/Dockerfile
cni-version: "0.4.0"
multus-manifest: multus-daemonset.yml
expected-binaries: "/opt/cni/bin/multus"
init-container-name: "install-multus-binary"
# need to wait kind to support CNI 1.0.0 (now kind 0.11 supports up to 0.4.0)
# - docker-file: images/Dockerfile.thick
# cni-version: "1.0.0"
@@ -95,6 +103,20 @@ jobs:
mkdir -p /tmp/kind/logs
kind export logs /tmp/kind/logs -v 2147483647
- name: Test reboot node
working-directory: ./e2e
env:
EXPECTED_BINARIES: ${{ matrix.expected-binaries }}
INSTALL_INIT_CONTAINER: ${{ matrix.init-container-name }}
run: ./test-reboot.sh
- name: Test upgrade simulation
working-directory: ./e2e
env:
EXPECTED_BINARIES: ${{ matrix.expected-binaries }}
INSTALL_INIT_CONTAINER: ${{ matrix.init-container-name }}
run: ./test-upgrade.sh
- name: Upload kind logs
if: always()
uses: actions/upload-artifact@v4

View File

@@ -0,0 +1,49 @@
#!/bin/bash
set -o errexit
# Wait for init containers...
for pod in $(kubectl get pods -n ${NAMESPACE} -l name=multus -o jsonpath='{.items[*].metadata.name}'); do
echo "Waiting for init container to complete in pod: ${pod}"
# Timeout loop: 60 tries, 5 seconds sleep = 5 minutes max
for i in {1..60}; do
state=$(kubectl get pod ${pod} -n ${NAMESPACE} -o jsonpath="{.status.initContainerStatuses[?(@.name==\"${INSTALL_INIT_CONTAINER}\")].state.terminated.reason}" 2>/dev/null || true)
if [ "$state" = "Completed" ]; then
echo "SUCCESS: Init container completed in pod ${pod}"
break
fi
echo "Still waiting for init container in pod ${pod} (current state: ${state})..."
sleep 1
done
# After waiting, make sure it's done
state=$(kubectl get pod ${pod} -n ${NAMESPACE} -o jsonpath="{.status.initContainerStatuses[?(@.name==\"${INSTALL_INIT_CONTAINER}\")].state.terminated.reason}" 2>/dev/null || true)
if [ "$state" != "Completed" ]; then
echo "FAIL: Init container did not complete in pod ${pod} after timeout."
exit 1
fi
done
echo "Sleeping for 5 seconds (for fs sync, possibly)..."
sleep 5
# verify binaries
for bin in $EXPECTED_BINARIES; do
if ! docker exec "${NODE_NAME}" test -f "${bin}"; then
echo "FAIL: Expected binary ${bin} not found on node ${NODE_NAME}"
exit 1
fi
echo "SUCCESS: Binary ${bin} found."
after_ts=$(docker exec "${NODE_NAME}" stat -c %Y "${bin}")
echo "After reboot: ${bin} mtime = ${after_ts}"
if [ "${after_ts}" -le "${before_mtime[${bin}]}" ]; then
echo "FAIL: mtime for ${bin} did not update after reboot (before: ${before_mtime[${bin}]}, after: ${after_ts})"
exit 1
fi
echo "SUCCESS: mtime for ${bin} updated correctly after reboot."
done

28
e2e/test-reboot.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/bin/bash
set -o errexit
NODE_NAME="kind-worker"
DAEMONSET_NAME="kube-multus-ds-amd64"
NAMESPACE="kube-system"
EXPECTED_BINARIES="${EXPECTED_BINARIES:-/opt/cni/bin/multus-shim}"
INSTALL_INIT_CONTAINER="${INSTALL_INIT_CONTAINER:-install-multus-shim}"
declare -A before_mtime
for bin in $EXPECTED_BINARIES; do
before_ts=$(docker exec "${NODE_NAME}" stat -c %Y "${bin}")
before_mtime["${bin}"]=$before_ts
echo "Before reboot: ${bin} mtime = ${before_ts}"
done
echo "Rebooting node..."
docker restart "${NODE_NAME}"
sleep 2
docker start "${NODE_NAME}"
kubectl wait --for=condition=Ready node/${NODE_NAME} --timeout=300s
kubectl rollout status daemonset/${DAEMONSET_NAME} -n ${NAMESPACE} --timeout=300s
source ./test-check-binaries.sh
echo "SUCCESS: reboot test passed"

33
e2e/test-upgrade.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/bin/bash
set -o errexit
NODE_NAME="kind-worker"
DAEMONSET_NAME="kube-multus-ds-amd64"
NAMESPACE="kube-system"
EXPECTED_BINARIES="${EXPECTED_BINARIES:-/opt/cni/bin/multus-shim}"
INSTALL_INIT_CONTAINER="${INSTALL_INIT_CONTAINER:-install-multus-shim}"
declare -A before_mtime
# Capture the mtimes before upgrade
echo "Capturing binary mtimes before upgrade on node ${NODE_NAME}..."
for bin in $EXPECTED_BINARIES; do
echo "Getting mtime for ${bin}..."
before_ts=$(docker exec "${NODE_NAME}" stat -c %Y "${bin}")
before_mtime["${bin}"]=$before_ts
echo "Before reboot: ${bin} mtime = ${before_ts}"
done
# Delete all Multus DaemonSet pods to simulate an upgrade.
echo "Deleting all Multus DaemonSet pods to simulate upgrade..."
kubectl delete pods -n ${NAMESPACE} -l name=multus
# Wait for the Multus DaemonSet pods to come back up.
echo "Waiting for Multus DaemonSet ${DAEMONSET_NAME} pods to be Ready after upgrade..."
kubectl rollout status daemonset/${DAEMONSET_NAME} -n ${NAMESPACE} --timeout=300s
source ./test-check-binaries.sh
echo "Upgrade test PASSED"

View File

@@ -20,6 +20,7 @@ import (
"io"
"os"
"path/filepath"
"time"
)
// CopyFileAtomic does file copy atomically
@@ -35,10 +36,10 @@ func CopyFileAtomic(srcFilePath, destDir, tempFileName, destFileName string) err
// create temp file
f, err := os.CreateTemp(destDir, tempFileName)
defer f.Close()
if err != nil {
return fmt.Errorf("cannot create temp file %q in %q: %v", tempFileName, destDir, err)
}
defer f.Close()
srcFile, err := os.Open(srcFilePath)
if err != nil {
@@ -80,5 +81,11 @@ func CopyFileAtomic(srcFilePath, destDir, tempFileName, destFileName string) err
return fmt.Errorf("cannot replace %q with temp file %q: %v", destFilePath, tempFilePath, err)
}
// touch the file
now := time.Now()
if err := os.Chtimes(destFilePath, now, now); err != nil {
return fmt.Errorf("failed to update timestamp on %q: %v", destFilePath, err)
}
return nil
}