From dc070254704d4aafad0cf7a04c84c90a8bc896f4 Mon Sep 17 00:00:00 2001 From: Mike Spreitzer Date: Fri, 5 Nov 2021 19:58:48 +0000 Subject: [PATCH 1/2] Add periodic etcd scraping to integration tests .. to help understand where and why things go bad. --- hack/lib/etcd.sh | 28 ++++++++++++++++++++++++++++ hack/make-rules/test-integration.sh | 3 +++ 2 files changed, 31 insertions(+) diff --git a/hack/lib/etcd.sh b/hack/lib/etcd.sh index e7a62cab15d..84518695f1c 100755 --- a/hack/lib/etcd.sh +++ b/hack/lib/etcd.sh @@ -91,7 +91,35 @@ kube::etcd::start() { curl -fs -X POST "${KUBE_INTEGRATION_ETCD_URL}/v3/kv/put" -d '{"key": "X3Rlc3Q=", "value": ""}' } +kube::etcd::start_scraping() { + if [[ -d "${ARTIFACTS:-}" ]]; then + ETCD_SCRAPE_DIR="${ARTIFACTS}/etcd-scrapes" + else + ETCD_SCRAPE_DIR=$(mktemp -d -t test-etcd-scrapes.XXXXXX) + fi + kube::log::info "Periodically scraping etcd to ${ETCD_SCRAPE_DIR} ." + mkdir -p "${ETCD_SCRAPE_DIR}" + ( + while sleep 30; do + kube::etcd::scrape + done + ) & + ETCD_SCRAPE_PID=$! +} + +kube::etcd::scrape() { + curl -s -S "${KUBE_INTEGRATION_ETCD_URL}/metrics" > "${ETCD_SCRAPE_DIR}/next" && mv "${ETCD_SCRAPE_DIR}/next" "${ETCD_SCRAPE_DIR}/$(date +%H%M%S).scrape" +} + + kube::etcd::stop() { + if [[ -n "${ETCD_SCRAPE_PID:-}" ]]; then + kill "${ETCD_SCRAPE_PID}" &>/dev/null || : + wait "${ETCD_SCRAPE_PID}" &>/dev/null || : + kube::etcd::scrape || : + # shellcheck disable=SC2015 + tar czf "${ETCD_SCRAPE_DIR}/scrapes.tgz" "${ETCD_SCRAPE_DIR}"/*.scrape && rm "${ETCD_SCRAPE_DIR}"/*.scrape || : + fi if [[ -n "${ETCD_PID-}" ]]; then kill "${ETCD_PID}" &>/dev/null || : wait "${ETCD_PID}" &>/dev/null || : diff --git a/hack/make-rules/test-integration.sh b/hack/make-rules/test-integration.sh index 9707fefd37d..68789119280 100755 --- a/hack/make-rules/test-integration.sh +++ b/hack/make-rules/test-integration.sh @@ -64,6 +64,9 @@ runTests() { kube::log::status "Starting etcd instance" CLEANUP_REQUIRED=1 kube::etcd::start + # shellcheck disable=SC2034 + local ETCD_SCRAPE_PID # Set in kube::etcd::start_scraping, used in cleanup + kube::etcd::start_scraping kube::log::status "Running integration test cases" make -C "${KUBE_ROOT}" test \ From 4ca4ccdcc21963fd57732c87993760dc85508cd9 Mon Sep 17 00:00:00 2001 From: Mike Spreitzer Date: Sat, 6 Nov 2021 06:10:05 +0000 Subject: [PATCH 2/2] Add serving of scrapes as Prometheus metrics --- hack/lib/etcd.sh | 14 ++-- hack/run-prometheus-on-etcd-scrapes.sh | 98 ++++++++++++++++++++++++++ hack/serve-prom-scrapes.sh | 67 ++++++++++++++++++ 3 files changed, 174 insertions(+), 5 deletions(-) create mode 100755 hack/run-prometheus-on-etcd-scrapes.sh create mode 100755 hack/serve-prom-scrapes.sh diff --git a/hack/lib/etcd.sh b/hack/lib/etcd.sh index 84518695f1c..9a9cd8cf18a 100755 --- a/hack/lib/etcd.sh +++ b/hack/lib/etcd.sh @@ -95,7 +95,7 @@ kube::etcd::start_scraping() { if [[ -d "${ARTIFACTS:-}" ]]; then ETCD_SCRAPE_DIR="${ARTIFACTS}/etcd-scrapes" else - ETCD_SCRAPE_DIR=$(mktemp -d -t test-etcd-scrapes.XXXXXX) + ETCD_SCRAPE_DIR=$(mktemp -d -t test.XXXXXX)/etcd-scrapes fi kube::log::info "Periodically scraping etcd to ${ETCD_SCRAPE_DIR} ." mkdir -p "${ETCD_SCRAPE_DIR}" @@ -108,17 +108,21 @@ kube::etcd::start_scraping() { } kube::etcd::scrape() { - curl -s -S "${KUBE_INTEGRATION_ETCD_URL}/metrics" > "${ETCD_SCRAPE_DIR}/next" && mv "${ETCD_SCRAPE_DIR}/next" "${ETCD_SCRAPE_DIR}/$(date +%H%M%S).scrape" + curl -s -S "${KUBE_INTEGRATION_ETCD_URL}/metrics" > "${ETCD_SCRAPE_DIR}/next" && mv "${ETCD_SCRAPE_DIR}/next" "${ETCD_SCRAPE_DIR}/$(date +%s).scrape" } kube::etcd::stop() { - if [[ -n "${ETCD_SCRAPE_PID:-}" ]]; then + if [[ -n "${ETCD_SCRAPE_PID:-}" ]] && [[ -n "${ETCD_SCRAPE_DIR:-}" ]] ; then kill "${ETCD_SCRAPE_PID}" &>/dev/null || : wait "${ETCD_SCRAPE_PID}" &>/dev/null || : kube::etcd::scrape || : - # shellcheck disable=SC2015 - tar czf "${ETCD_SCRAPE_DIR}/scrapes.tgz" "${ETCD_SCRAPE_DIR}"/*.scrape && rm "${ETCD_SCRAPE_DIR}"/*.scrape || : + ( + # shellcheck disable=SC2015 + cd "${ETCD_SCRAPE_DIR}"/.. && \ + tar czf etcd-scrapes.tgz etcd-scrapes && \ + rm -rf etcd-scrapes || : + ) fi if [[ -n "${ETCD_PID-}" ]]; then kill "${ETCD_PID}" &>/dev/null || : diff --git a/hack/run-prometheus-on-etcd-scrapes.sh b/hack/run-prometheus-on-etcd-scrapes.sh new file mode 100755 index 00000000000..e1edd582864 --- /dev/null +++ b/hack/run-prometheus-on-etcd-scrapes.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +# Copyright 2021 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Unpacks a tarfile of etcd scrapes and runs a simple web server exposing it +# and a Prometheus server scraping that simple web server. +# The simple web server listens on port 9091. +# The Prometheus server is run in a container and looks for the +# simple web server at the host's first global IPv4 address. + +# Usage: $0 scrapes_tar_pathname +# +# Where scrapes_tar_pathname is a gzipped tar archive containing +# files whose name is of the form +# .scrape +# where is seconds since Jan 1, 1970 UTC. +# Each such file is taken to be a scrape that lacks timestamps, +# and the timestamp from the filename is multiplied by the necessary 1000 +# and added to the data in that file. + +# This requires a: +# - `docker run` command +# - an `ip` or `ifconfig` command that this script knows how to wrangle +# - an `nc` command that serve-prom-scrapes.sh knows how to wrangle + +if (( $# != 1 )); then + echo "Usage: $0 \$scrapes_tar_pathname" >&2 + exit 1 +fi + +scrapes_file="$1" + +if ! [[ -r "$scrapes_file" ]]; then + echo "$0: $scrapes_file is not a readable file" >&2 + exit 2 +fi + +SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}") + +CONFIG="/tmp/$(cd /tmp && mktemp config.XXXXXX)" +UNPACKDIR="/tmp/$(cd /tmp && mktemp -d unpack.XXXXXX)" +SERVER_PID="" + +cleanup_prom() { + rm -f "$CONFIG" + rm -rf "$UNPACKDIR" + if [[ -n "$SERVER_PID" ]]; then + kill "$SERVER_PID" + fi +} + +trap cleanup_prom EXIT + +chmod +r "$CONFIG" "$UNPACKDIR" + +tar xzf "$scrapes_file" -C "$UNPACKDIR" + +if which ip > /dev/null; then + IPADDR=$(ip addr show scope global up | + grep -w inet | head -1 | + awk '{ print $2 }' | awk -F/ '{ print $1 }') +else + IPADDR=$(ifconfig | grep -w inet | grep -Fv 127.0.0. | head -1 | + awk '{ print $2 }' | awk -F/ '{ print $1 }') +fi + +echo +echo "Historic metrics will be at http://\${any_local_address}:9091/\${any_path}" +echo "Prometheus will listen on port 9090 and scrape historic metrics from http://${IPADDR}:9091/metrics" +sleep 1 +echo + +cat > "$CONFIG" <.scrape +# where is seconds since Jan 1, 1970 UTC. +# Each such file is taken to be a scrape that lacks timestamps, +# and the timestamp from the filename is multiplied by the necessary 1000 +# and added to the data in that file. + +# This requires an `nc` comment that this script knows how to wrangle. + +if (( $# != 2 )); then + echo "Usage: $0 port_num scrapes_dir" >&2 + exit 1 +fi + +port_num="$1" +scrapes_dir="$2" +response_file="/tmp/$(cd /tmp && mktemp response.XXXXXX)" + +cleanup_serve() { + rm -rf "$response_file" +} + +trap cleanup_serve EXIT + +chmod +r "$response_file" + +transform() { + path="$1" + base="$(basename "$path")" + seconds="${base%.scrape}" + sed 's/^\([^#].*\)$/\1 '"${seconds}000/" "$path" +} + +find_and_transform() { + echo -n $'HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\n' > "$response_file" + find "$scrapes_dir" -name "*.scrape" -print0 | sort -z | while read -d '' -r scrapename; do transform "$scrapename" >> "$response_file"; done +} + +find_and_transform + +if man nc | grep -wq -e -N +then dashen=-N +else dashen= +fi + +# shellcheck disable=SC2086 +while true; do nc -l $dashen 0.0.0.0 "$port_num" < "$response_file" > /dev/null; sleep 10; done