From ad63ad7d4065ab0fd8b0bc6bd3242cfdb00455d8 Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Wed, 22 Apr 2020 03:43:04 +0300 Subject: [PATCH] cluster: cleanup unused scripts Verified with k/k and k/test-infra. - restore-from-backup.sh Performs restoration of etcd snapshots and the logic seems valid on a quick look, but doesn't seem to be used anywhere in our infra. I suspect this is used on demand for local testing, but /cluster root doesn't seem like the right place for it at this point. - test-*.sh Gikgo related calls but these bash scripts seem unused in the infra. --- cluster/restore-from-backup.sh | 251 --------------------------------- cluster/test-e2e.sh | 31 ---- cluster/test-network.sh | 29 ---- cluster/test-smoke.sh | 30 ---- 4 files changed, 341 deletions(-) delete mode 100755 cluster/restore-from-backup.sh delete mode 100755 cluster/test-e2e.sh delete mode 100755 cluster/test-network.sh delete mode 100755 cluster/test-smoke.sh diff --git a/cluster/restore-from-backup.sh b/cluster/restore-from-backup.sh deleted file mode 100755 index 811fd0e8af2..00000000000 --- a/cluster/restore-from-backup.sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2016 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script performs disaster recovery of etcd from the backup data. -# Assumptions: -# - backup was done using etcdctl command: -# a) in case of etcd2 -# $ etcdctl backup --data-dir= -# produced .snap and .wal files -# b) in case of etcd3 -# $ etcdctl --endpoints=
snapshot save -# produced .db file -# - version.txt file is in the current directory (if it isn't it will be -# defaulted to "3.0.17/etcd3"). Based on this file, the script will -# decide to which version we are restoring (procedures are different -# for etcd2 and etcd3). -# - in case of etcd2 - *.snap and *.wal files are in current directory -# - in case of etcd3 - *.db file is in the current directory -# - the script is run as root -# - for event etcd, we only support clearing it - to do it, you need to -# set RESET_EVENT_ETCD=true env var. - -set -o errexit -set -o nounset -set -o pipefail - -# Version file contains information about current version in the format: -# / (e.g. "3.0.12/etcd3"). -# -# If the file doesn't exist we assume "3.0.17/etcd3" configuration is -# the current one and create a file with such configuration. -# The restore procedure is chosen based on this information. -VERSION_FILE="version.txt" - -# Make it possible to overwrite version file (or default version) -# with VERSION_CONTENTS env var. -if [ -n "${VERSION_CONTENTS:-}" ]; then - echo "${VERSION_CONTENTS}" > "${VERSION_FILE}" -fi -if [ ! -f "${VERSION_FILE}" ]; then - echo "3.0.17/etcd3" > "${VERSION_FILE}" -fi -VERSION_CONTENTS="$(cat ${VERSION_FILE})" -ETCD_VERSION="$(echo "$VERSION_CONTENTS" | cut -d '/' -f 1)" -ETCD_API="$(echo "$VERSION_CONTENTS" | cut -d '/' -f 2)" - -# Name is used only in case of etcd3 mode, to appropriate set the metadata -# for the etcd data. -# NOTE: NAME HAS TO BE EQUAL TO WHAT WE USE IN --name flag when starting etcd. -NAME="${NAME:-etcd-$(hostname)}" - -INITIAL_CLUSTER="${INITIAL_CLUSTER:-${NAME}=http://localhost:2380}" -INITIAL_ADVERTISE_PEER_URLS="${INITIAL_ADVERTISE_PEER_URLS:-http://localhost:2380}" - -# Port on which etcd is exposed. -etcd_port=2379 -event_etcd_port=4002 - -# Wait until both etcd instances are up -wait_for_etcd_up() { - port=$1 - # TODO: As of 3.0.x etcd versions, all 2.* and 3.* versions return - # {"health": "true"} on /health endpoint in healthy case. - # However, we should come with a regex for it to avoid future break. - health_ok="{\"health\": \"true\"}" - for _ in $(seq 120); do - # TODO: Is it enough to look into /health endpoint? - health=$(curl --silent "http://127.0.0.1:${port}/health") - if [ "${health}" == "${health_ok}" ]; then - return 0 - fi - sleep 1 - done - return 1 -} - -# Wait until apiserver is up. -wait_for_cluster_healthy() { - for _ in $(seq 120); do - cs_status=$(kubectl get componentstatuses -o template --template='{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}}{{end}}{{"\n"}}{{end}}') || true - componentstatuses=$(echo "${cs_status}" | grep -c 'Healthy:') || true - healthy=$(echo "${cs_status}" | grep -c 'Healthy:True') || true - if [ "${componentstatuses}" -eq "${healthy}" ]; then - return 0 - fi - sleep 1 - done - return 1 -} - -# Wait until etcd and apiserver pods are down. -wait_for_etcd_and_apiserver_down() { - for _ in $(seq 120); do - etcd=$(docker ps | grep -c etcd-server) - apiserver=$(docker ps | grep -c apiserver) - # TODO: Theoretically it is possible, that apiserver and or etcd - # are currently down, but Kubelet is now restarting them and they - # will reappear again. We should avoid it. - if [ "${etcd}" -eq "0" ] && [ "${apiserver}" -eq "0" ]; then - return 0 - fi - sleep 1 - done - return 1 -} - -# Move the manifest files to stop etcd and kube-apiserver -# while we swap the data out from under them. -MANIFEST_DIR="/etc/kubernetes/manifests" -MANIFEST_BACKUP_DIR="/etc/kubernetes/manifests-backups" -mkdir -p "${MANIFEST_BACKUP_DIR}" -echo "Moving etcd(s) & apiserver manifest files to ${MANIFEST_BACKUP_DIR}" -# If those files were already moved (e.g. during previous -# try of backup) don't fail on it. -mv "${MANIFEST_DIR}/kube-apiserver.manifest" "${MANIFEST_BACKUP_DIR}" || true -mv "${MANIFEST_DIR}/etcd.manifest" "${MANIFEST_BACKUP_DIR}" || true -mv "${MANIFEST_DIR}/etcd-events.manifest" "${MANIFEST_BACKUP_DIR}" || true - -# Wait for the pods to be stopped -echo "Waiting for etcd and kube-apiserver to be down" -if ! wait_for_etcd_and_apiserver_down; then - # Couldn't kill etcd and apiserver. - echo "Downing etcd and apiserver failed" - exit 1 -fi - -read -rsp $'Press enter when all etcd instances are down...\n' - -# Create the sort of directory structure that etcd expects. -# If this directory already exists, remove it. -BACKUP_DIR="/var/tmp/backup" -rm -rf "${BACKUP_DIR}" -if [ "${ETCD_API}" == "etcd2" ]; then - echo "Preparing etcd backup data for restore" - # In v2 mode, we simply copy both snap and wal files to a newly created - # directory. After that, we start etcd with --force-new-cluster option - # that (according to the etcd documentation) is required to recover from - # a backup. - echo "Copying data to ${BACKUP_DIR} and restoring there" - mkdir -p "${BACKUP_DIR}/member/snap" - mkdir -p "${BACKUP_DIR}/member/wal" - # If the cluster is relatively new, there can be no .snap file. - mv ./*.snap "${BACKUP_DIR}/member/snap/" || true - mv ./*.wal "${BACKUP_DIR}/member/wal/" - - # TODO(jsz): This won't work with HA setups (e.g. do we need to set --name flag)? - echo "Starting etcd ${ETCD_VERSION} to restore data" - if ! image=$(docker run -d -v ${BACKUP_DIR}:/var/etcd/data \ - --net=host -p ${etcd_port}:${etcd_port} \ - "k8s.gcr.io/etcd:${ETCD_VERSION}" /bin/sh -c \ - "/usr/local/bin/etcd --data-dir /var/etcd/data --force-new-cluster"); then - echo "Docker container didn't started correctly" - exit 1 - fi - echo "Container ${image} created, waiting for etcd to report as healthy" - - if ! wait_for_etcd_up "${etcd_port}"; then - echo "Etcd didn't come back correctly" - exit 1 - fi - - # Kill that etcd instance. - echo "Etcd healthy - killing ${image} container" - docker kill "${image}" -elif [ "${ETCD_API}" == "etcd3" ]; then - echo "Preparing etcd snapshot for restore" - mkdir -p "${BACKUP_DIR}" - echo "Copying data to ${BACKUP_DIR} and restoring there" - number_files=$(find . -maxdepth 1 -type f -name "*.db" | wc -l) - if [ "${number_files}" -ne "1" ]; then - echo "Incorrect number of *.db files - expected 1" - exit 1 - fi - mv ./*.db "${BACKUP_DIR}/" - snapshot="$(ls ${BACKUP_DIR})" - - # Run etcdctl snapshot restore command and wait until it is finished. - # setting with --name in the etcd manifest file and then it seems to work. - if ! docker run -v ${BACKUP_DIR}:/var/tmp/backup --env ETCDCTL_API=3 \ - "k8s.gcr.io/etcd:${ETCD_VERSION}" /bin/sh -c \ - "/usr/local/bin/etcdctl snapshot restore ${BACKUP_DIR}/${snapshot} --name ${NAME} --initial-cluster ${INITIAL_CLUSTER} --initial-advertise-peer-urls ${INITIAL_ADVERTISE_PEER_URLS}; mv /${NAME}.etcd/member /var/tmp/backup/"; then - echo "Docker container didn't started correctly" - exit 1 - fi - - rm -f "${BACKUP_DIR}/${snapshot}" -fi -# Also copy version.txt file. -cp "${VERSION_FILE}" "${BACKUP_DIR}" - -export MNT_DISK="/mnt/disks/master-pd" - -# Save the corrupted data (clean directory if it is already non-empty). -rm -rf "${MNT_DISK}/var/etcd-corrupted" -mkdir -p "${MNT_DISK}/var/etcd-corrupted" -echo "Saving corrupted data to ${MNT_DISK}/var/etcd-corrupted" -mv /var/etcd/data "${MNT_DISK}/var/etcd-corrupted" - -# Replace the corrupted data dir with the restored data. -echo "Copying restored data to /var/etcd/data" -mv "${BACKUP_DIR}" /var/etcd/data - -if [ "${RESET_EVENT_ETCD:-}" == "true" ]; then - echo "Removing event-etcd corrupted data" - EVENTS_CORRUPTED_DIR="${MNT_DISK}/var/etcd-events-corrupted" - # Save the corrupted data (clean directory if it is already non-empty). - rm -rf "${EVENTS_CORRUPTED_DIR}" - mkdir -p "${EVENTS_CORRUPTED_DIR}" - mv /var/etcd/data-events "${EVENTS_CORRUPTED_DIR}" -fi - -# Start etcd and kube-apiserver again. -echo "Restarting etcd and apiserver from restored snapshot" -mv "${MANIFEST_BACKUP_DIR}"/* "${MANIFEST_DIR}/" -rm -rf "${MANIFEST_BACKUP_DIR}" - -# Verify that etcd is back. -echo "Waiting for etcd to come back" -if ! wait_for_etcd_up "${etcd_port}"; then - echo "Etcd didn't come back correctly" - exit 1 -fi - -# Verify that event etcd is back. -echo "Waiting for event etcd to come back" -if ! wait_for_etcd_up "${event_etcd_port}"; then - echo "Event etcd didn't come back correctly" - exit 1 -fi - -# Verify that kube-apiserver is back and cluster is healthy. -echo "Waiting for apiserver to come back" -if ! wait_for_cluster_healthy; then - echo "Apiserver didn't come back correctly" - exit 1 -fi - -echo "Cluster successfully restored!" diff --git a/cluster/test-e2e.sh b/cluster/test-e2e.sh deleted file mode 100755 index 1c0c0c134c1..00000000000 --- a/cluster/test-e2e.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2014 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Tests a running Kubernetes cluster. -# TODO: move code from hack/ginkgo-e2e.sh to here - -set -o errexit -set -o nounset -set -o pipefail - -KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. -source "${KUBE_ROOT}/cluster/kube-util.sh" - -echo "Testing cluster with provider: ${KUBERNETES_PROVIDER}" 1>&2 - -echo "Running e2e tests:" 1>&2 -echo "./hack/ginkgo-e2e.sh $*" 1>&2 -exec "${KUBE_ROOT}/hack/ginkgo-e2e.sh" "$@" diff --git a/cluster/test-network.sh b/cluster/test-network.sh deleted file mode 100755 index acc51742844..00000000000 --- a/cluster/test-network.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2014 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Bring up a Kubernetes cluster. -# -# If the full release name (gs:///) is passed in then we take -# that directly. If not then we assume we are doing development stuff and take -# the defaults in the release config. - -set -o errexit -set -o nounset -set -o pipefail - -KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. - -"${KUBE_ROOT}/hack/ginkgo-e2e.sh" --ginkgo.focus=Networking diff --git a/cluster/test-smoke.sh b/cluster/test-smoke.sh deleted file mode 100755 index fdedf37a165..00000000000 --- a/cluster/test-smoke.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2014 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Smoke Tests a running Kubernetes cluster. -# Validates that the cluster was deployed, is accessible, and at least -# satisfies minimal functional requirements. -# Emphasis on speed and being non-destructive over thoroughness. - -set -o errexit -set -o nounset -set -o pipefail - -KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. - -SMOKE_TEST_FOCUS_REGEX="Guestbook.application" - -exec "${KUBE_ROOT}/cluster/test-e2e.sh" -ginkgo.focus="${SMOKE_TEST_FOCUS_REGEX}" "$@"