From b97b20295bc8dc3e2ea696f5fdfd7a52b37eb781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Fri, 2 May 2025 11:03:35 +0200 Subject: [PATCH 1/5] ci.ocp: Make peer-pods setup executable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set permissions of the peer-pods-azure.sh script to executable Signed-off-by: Lukáš Doktor --- ci/openshift-ci/peer-pods-azure.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/openshift-ci/peer-pods-azure.sh diff --git a/ci/openshift-ci/peer-pods-azure.sh b/ci/openshift-ci/peer-pods-azure.sh old mode 100644 new mode 100755 From c203d7eba618c678a64a6d7e154fc1961a346765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Mon, 12 May 2025 09:50:30 +0200 Subject: [PATCH 2/5] ci.ocp: Set peer-pods-azure license MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We forgot to add the license header when introducing this test. Signed-off-by: Lukáš Doktor --- ci/openshift-ci/peer-pods-azure.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/openshift-ci/peer-pods-azure.sh b/ci/openshift-ci/peer-pods-azure.sh index 40bf583a8d..77f457b5ce 100755 --- a/ci/openshift-ci/peer-pods-azure.sh +++ b/ci/openshift-ci/peer-pods-azure.sh @@ -1,4 +1,9 @@ #!/bin/bash -e +# +# Copyright (c) 2025 Red Hat, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# # Setup peer-pods using cloud-api-adaptor on azure # # WARNING: When running outside "eastus" region this script creates a new From 0e4fb62bb4465277e95fe3a84e67e9545eee34a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Tue, 13 May 2025 09:29:01 +0200 Subject: [PATCH 3/5] ci.ocp: Retry first az command as login takes time to propagate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In CI we hit problem where just after `az login` the first `az network vnet list` command fails due to permission. We see "insufficient permissions" or "pending permissions", suggesting we should retry later. Manual tests and successful runs indicate we do have the permissions, but not immediately after login. Azure docs suggest using extra `az account set` but still the propagation might take some time. Add a loop retrying the first command a few times before declaring failure. Signed-off-by: Lukáš Doktor --- ci/openshift-ci/peer-pods-azure.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ci/openshift-ci/peer-pods-azure.sh b/ci/openshift-ci/peer-pods-azure.sh index 77f457b5ce..3dbad0d618 100755 --- a/ci/openshift-ci/peer-pods-azure.sh +++ b/ci/openshift-ci/peer-pods-azure.sh @@ -32,8 +32,21 @@ AZURE_SUBSCRIPTION_ID="$(jq -r .data.azure_subscription_id azure_credentials.jso rm -f azure_credentials.json AZURE_RESOURCE_GROUP=$(oc get infrastructure/cluster -o jsonpath='{.status.platformStatus.azure.resourceGroupName}') az login --service-principal -u "${AZURE_CLIENT_ID}" -p "${AZURE_CLIENT_SECRET}" --tenant "${AZURE_TENANT_ID}" - -AZURE_VNET_NAME=$(az network vnet list --resource-group "${AZURE_RESOURCE_GROUP}" --query "[].{Name:name}" --output tsv) +# Recommended on az sites to refresh the subscription +az account set --subscription "${AZURE_SUBSCRIPTION_ID}" +# This command still sometimes fails directly after login +for I in {1..30}; do + AZURE_VNET_NAME=$(az network vnet list --resource-group "${AZURE_RESOURCE_GROUP}" --query "[].{Name:name}" --output tsv ||:) + if [[ -z "${AZURE_VNET_NAME}" ]]; then + sleep "${I}" + else # VNET set, we are done + break + fi +done +if [[ -z "${AZURE_VNET_NAME}" ]]; then + echo "Failed to get AZURE_VNET_NAME in 30 iterations" + exit 1 +fi AZURE_SUBNET_NAME=$(az network vnet subnet list --resource-group "${AZURE_RESOURCE_GROUP}" --vnet-name "${AZURE_VNET_NAME}" --query "[].{Id:name} | [? contains(Id, 'worker')]" --output tsv) AZURE_SUBNET_ID=$(az network vnet subnet list --resource-group "${AZURE_RESOURCE_GROUP}" --vnet-name "${AZURE_VNET_NAME}" --query "[].{Id:id} | [? contains(Id, 'worker')]" --output tsv) AZURE_REGION=$(az group show --resource-group "${AZURE_RESOURCE_GROUP}" --query "{Location:location}" --output tsv) From 32dbc5d2a9e0811d3d3309b01dd7d39519e28cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Tue, 13 May 2025 09:30:25 +0200 Subject: [PATCH 4/5] ci.ocp: Use SCRIPT_DIR to allow execution from any folder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used hardcoded "ci/openshift-ci/cluster" location which expects this script to be only executed from the root. Let's use SCRIPT_DIR instead to allow execution from elsewhere eg. by user bisecting a failed CI run. Signed-off-by: Lukáš Doktor --- ci/openshift-ci/peer-pods-azure.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/openshift-ci/peer-pods-azure.sh b/ci/openshift-ci/peer-pods-azure.sh index 3dbad0d618..d86863fb3c 100755 --- a/ci/openshift-ci/peer-pods-azure.sh +++ b/ci/openshift-ci/peer-pods-azure.sh @@ -10,6 +10,8 @@ # resource group in "eastus" region and peers the network. You # have to remove these manually (or use temporary accounts) +SCRIPT_DIR=$(dirname "$0") + ############################### # Disable security to allow e2e ############################### @@ -228,7 +230,7 @@ done; exit 1 ) || { echo "kata-remote runtimeclass not initialized in 60s"; kube ################ # Deploy webhook ################ -pushd ci/openshift-ci/cluster/ +pushd "${SCRIPT_DIR}/cluster/" kubectl create ns default || true kubectl config set-context --current --namespace=default KATA_RUNTIME=kata-remote ./deploy_webhook.sh From 67ee9f3425c78005c70bc6d088b7dc84f002ba8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Tue, 13 May 2025 09:31:55 +0200 Subject: [PATCH 5/5] ci.ocp: Improve logging of extra new resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this script relies on temporary subscriptions and won't cleanup any resources. Let's improve the logging to better describe what resources were created and how to clean them, if the user needs to do so. Signed-off-by: Lukáš Doktor --- ci/openshift-ci/peer-pods-azure.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ci/openshift-ci/peer-pods-azure.sh b/ci/openshift-ci/peer-pods-azure.sh index d86863fb3c..8177db609f 100755 --- a/ci/openshift-ci/peer-pods-azure.sh +++ b/ci/openshift-ci/peer-pods-azure.sh @@ -66,16 +66,19 @@ USER_ASSIGNED_CLIENT_ID="$(az identity show --resource-group "${AZURE_RESOURCE_G PP_REGION=eastus if [[ "${AZURE_REGION}" == "${PP_REGION}" ]]; then echo "Using the current region ${AZURE_REGION}" + PEERING=0 PP_RESOURCE_GROUP="${AZURE_RESOURCE_GROUP}" PP_VNET_NAME="${AZURE_VNET_NAME}" PP_SUBNET_NAME="${AZURE_SUBNET_NAME}" PP_SUBNET_ID="${AZURE_SUBNET_ID}" else echo "Creating peering between ${AZURE_REGION} and ${PP_REGION}" + PEERING=1 PP_RESOURCE_GROUP="${AZURE_RESOURCE_GROUP}-eastus" PP_VNET_NAME="${AZURE_VNET_NAME}-eastus" PP_SUBNET_NAME="${AZURE_SUBNET_NAME}-eastus" PP_NSG_NAME="${AZURE_VNET_NAME}-nsg-eastus" + echo " creating new PP_RESOURCE_GROUP=${PP_RESOURCE_GROUP}" az group create --name "${PP_RESOURCE_GROUP}" --location "${PP_REGION}" az network vnet create --resource-group "${PP_RESOURCE_GROUP}" --name "${PP_VNET_NAME}" --location "${PP_REGION}" --address-prefixes 10.2.0.0/16 --subnet-name "${PP_SUBNET_NAME}" --subnet-prefixes 10.2.1.0/24 az network nsg create --resource-group "${PP_RESOURCE_GROUP}" --name "${PP_NSG_NAME}" --location "${PP_REGION}" @@ -235,3 +238,17 @@ kubectl create ns default || true kubectl config set-context --current --namespace=default KATA_RUNTIME=kata-remote ./deploy_webhook.sh popd + + +################################## +# Log warning when peering created +################################## +if [[ ${PEERING} -ne 0 ]]; then + echo "This script created additional resources to create peering between ${AZURE_REGION} and ${PP_REGION}. Ensure you release those resources after the testing (or use temporary subscription)" + PP_VARS=("PP_RESOURCE_GROUP" "PP_VNET_NAME" "PP_SUBNET_NAME" "PP_NSG_NAME" "AZURE_VNET_ID" "PP_VNET_ID" "PP_SUBNET_ID") + for PP_VAR in "${PP_VARS[@]}"; do + echo "${PP_VAR}=${!PP_VAR}" + done + echo + echo "by running 'az group delete --name ${PP_RESOURCE_GROUP}'" +fi