diff --git a/hack/jenkins/metadata-cache/README.md b/hack/jenkins/metadata-cache/README.md new file mode 100644 index 00000000000..3b3f89c86e3 --- /dev/null +++ b/hack/jenkins/metadata-cache/README.md @@ -0,0 +1,65 @@ +# Metadata server cache + +Simple utility to cache requests sent to the metadata server. + +The utility is composed of the following pieces: + +* An http server which listens for metadata requests + * If it is a token request, it caches the token + - Automatically refreshes + - Returns correct expiration time for each request + * Otherwise it caches the response forever +* A script with commands to prepare the machine for the cache + * Installs necessary packages + * Starts/stops the cache + * Updates /etc/hosts to control the resolution of `metadata.google.internal` + - Resolves to the internal ip when cache is on. + - Resolves to the real metadata server on `169.254.169.254` when off. +* The script can also run commands without sshing to the remote machine: + * Creates/deletes an instance + * Copies files to the instance + * Runs script commands on the instance + * Grabs diagnostic information + +## Instructions + +### Quick setup + +This is the ultimate lazy version, which does everything for you: + +```sh +# Create a new instance +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_create $INSTANCE +# Update that instance +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_update $INSTANCE +``` + + +### Detailed instructions + +Please get help from the command for most up to date info: + +```sh +# Command list +hack/jenkins/metadata-cache/metadata-cache-control.sh help +# Remote command list +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_help +``` + +### Debugging info + +```sh +# Run basic tests +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_ssh $INSTANCE test +# Print the configuration +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_ssh $INSTANCE cat +# Get logs +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_logs $INSTANCE +# Connect to the instance +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_ssh $INSTANCE connect +# Disable cache +hack/jenkins/metadata-cache/metadata-cache-control.sh remote_ssh $INSTANCE off +``` + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/hack/jenkins/metadata-cache/README.md?pixel)]() diff --git a/hack/jenkins/metadata-cache/metadata-cache-control.sh b/hack/jenkins/metadata-cache/metadata-cache-control.sh new file mode 100755 index 00000000000..4158e3b971a --- /dev/null +++ b/hack/jenkins/metadata-cache/metadata-cache-control.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# Copyright 2016 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See README.md for system usage instructions. + +set -o errexit +set -o nounset + +BASENAME="$(basename "${0}")" +DIRNAME="$(dirname "${0}")" + +fail() { + echo "${@}" + exit 1 +} + +usage() { + fail "Usage: ${BASENAME} " +} + +if [[ -z "${1:-}" ]]; then + usage +fi + +MGI='metadata.google.internal' +METADATA_SERVER="169.254.169.254" +TARGET="/etc/hosts" +IP_URL="http://${MGI}/computeMetadata/v1/instance/network-interfaces/0/ip" + +remote_ip() { + curl -s -f -H Metadata-Flavor:Google "${IP_URL}" +} + +drop_metadata_server() { + sed -i -e "/${MGI}/d" "${TARGET}" || fail "Could not drop metadata entries" +} + +print_metadata_cache() { + internal_ip="$(remote_ip)" + + if [[ ! "${internal_ip}" =~ 10(\.[0-9]{1,3}){3} ]]; then + fail "Could not find local 10. address at ${IP_URL}: ${internal_ip}" + fi + echo + echo "# Metadata server configuration" + echo "# Route most requests to the cache at 10." + echo "# However testOnGCE requires the real entry to exist." + for i in {1..10}; do + echo "${internal_ip} metadata.google.internal # Metadata cache" + done + echo "${METADATA_SERVER} metadata.google.internal # Real metadata server" +} + +configure_metadata_server() { + new_info="$(print_metadata_cache)" + echo "${new_info}" >> "${TARGET}" || fail "Could not add metadata entries" +} + +do_local() { + case $1 in + on) + echo -n "Adding metadata cache to configuration at ${TARGET}: " + drop_metadata_server + configure_metadata_server + echo "updated." + ;; + off) + echo -n "Removing metadata cache from configuration at ${TARGET}: " + drop_metadata_server + echo "removed." + ;; + stop) + echo -n "Stopping metadata-cache: " + pids="$(ps ax | grep 'python metadata-cache.py' | grep -v grep | grep -o -E '^[ 0-9]+' || echo)" + if [[ -z "${pids}" ]]; then + echo 'Not running' + elif [[ -n "${pids}" ]]; then + echo "Killing ${pids}" + kill ${pids} || fail "Could not kill ${pids}" + fi + echo "stopped" + ;; + start) + echo -n "Starting metadata-cache session: " + screen -d -m -S metadata-cache python metadata-cache.py + echo "started" + ps ax | grep metadata + ps ax | grep screen + ;; + connect) + echo "Connecting to metadata-cache session (press C-a a to detach, C-c to kill)..." + sleep 1 + screen -r -S metadata-cache + "${0}" test + ;; + bootstrap) + echo "Installing package prerequisites:" + apt-get install -y python-pip screen curl + pip install flask requests + ;; + cat) + cat /etc/hosts + ;; + test) + echo "Ping metadata server:" + ping -c 1 "${MGI}" + echo "Download local ip from metadata server:" + remote_ip || fail "Could not find internal ip address from metadata server." + echo + ;; + update) + "${0}" bootstrap + "${0}" off + "${0}" stop + "${0}" start + "${0}" on + "${0}" test + ;; + *) + usage + ;; + esac +} + +do_remote() { + cmd="${1}" + instance="${2:-}" + if [[ -z "${instance}" ]]; then + cmd="" + fi + shift + shift + case "${cmd}" in + remote_create) + echo "Creating ${instance}" + gcloud compute instances create "${instance}" + ;; + remote_delete) + echo "Deleting ${instance}" + gcloud compute instances delete "${instance}" + ;; + remote_logs) + echo "Grabbing logs from ${instance}" + gcloud compute instances get-serial-port-output "${instance}" + ;; + remote_copy) + echo "Copy files to ${instance}" + gcloud compute copy-files "${DIRNAME}"/* "${instance}:/home/${USER}/" + ;; + remote_ssh) + echo "Running ${BASENAME} on ${instance}" + gcloud compute ssh "${instance}" -t -- sudo "/home/${USER}/${BASENAME}" "${@}" + ;; + remote_update) + "${0}" remote_copy "${instance}" + "${0}" remote_ssh "${instance}" update + ;; + *) + fail "Remote usage: ${BASENAME} remote_ [args ...]" + ;; + esac +} + +case "${1}" in + remote_*) + do_remote "$@" + ;; + *) + do_local "$@" + ;; +esac diff --git a/hack/jenkins/metadata-cache/metadata-cache.py b/hack/jenkins/metadata-cache/metadata-cache.py new file mode 100644 index 00000000000..9b553bd1220 --- /dev/null +++ b/hack/jenkins/metadata-cache/metadata-cache.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +# Copyright 2016 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Caches requests to the GCE metadata server. + +Reduces load on metadata server to once for most requests and once per +~10m for access tokens. + +See README.md for instructions for the whole system. + +Usage: + screen python metadata-cache.py +""" +import collections +import json +import logging +import logging.handlers +import socket +import threading +import time + +import flask +import requests + +app = flask.Flask(__name__) + +LOCK = threading.Lock() +SESSION = requests +URL = 'http://169.254.169.254/computeMetadata/v1/instance/service-accounts/default/token' +logger = None +_cached_tokens = collections.defaultdict(dict) +_global_cache = {} + + +def fetch_token(): + """Fetch a new token from the metadata server, retrying any errors.""" + pause = False + while True: + if pause: + time.sleep(1) + pause = True + seconds = time.time() + try: + logger.info('GET: %s' % URL) + resp = SESSION.get( + URL, + headers={ + 'Metadata-Flavor': 'Google', + 'Host': 'metadata.google.internal', + }, + allow_redirects=False, + ) + logger.info('GET: %d %s' % (resp.status_code, URL)) + resp.raise_for_status() + except IOError: + logger.exception('Error reading response from metadata server') + continue + try: + content = resp.content + except IOError: + logger.exception('Error reading response') + continue + safe_content = content.encode('utf-8', errors='ignore') + try: + data = json.loads(content) + except ValueError: + logger.exception('Could not decode response: %s' % safe_content) + continue + if data.get('token_type') != 'Bearer': + logger.error('Not a bearer token: %s' % json.dumps(data, indent=1)) + continue + return seconds, data + + +def cached_token(uri): + """Return the access token, adjusting expires_in and potentially fetching.""" + while time.time() + 10 > _cached_tokens[uri].get('expiration', 0): + logger.info('Refreshing expired token: %s' % _cached_tokens[uri]) + seconds, token = fetch_token() + logger.info('New token: %s' % json.dumps(token, indent=1)) + token['expiration'] = seconds + token['expires_in'] + _cached_tokens[uri].clear() + _cached_tokens[uri].update(token) + this_token = {k: v for (k, v) in _cached_tokens[uri].items() if k != 'expiration'} + this_token['expires_in'] = int(_cached_tokens[uri]['expiration'] - time.time()) + return json.dumps(this_token) + + +def cache_request(uri): + if uri not in _global_cache: + with LOCK: + if uri not in _global_cache: + r2, ok = proxy_request(uri) + if not ok: + logger.warn('Request failed: %s %s' % (uri, r2)) + return r2 + _global_cache[uri] = r2 + return _global_cache[uri] + + +def proxy_request(uri): + """Proxy a request to uri using a connection to 169.254.169.254.""" + logger.info('GET: %s' % uri) + headers = dict(flask.request.headers) + headers['Host'] = 'metadata.google.internal' + resp = SESSION.get( + 'http://169.254.169.254/%s' % uri, + headers=headers, + allow_redirects=False, + ) + logger.info('GET: %d %s' % (resp.status_code, uri)) + r2 = flask.make_response(resp.content, resp.status_code) + for k, v in resp.headers.items(): + r2.headers.set(k, v) + return r2, resp.ok + + +@app.route('/') +def get_root_response(): + return cache_request('') + + +@app.route('/') +def get_path_response(uri): + """Return the cached token as a string.""" + if uri.endswith('/token'): + return cached_token(uri) + return cache_request(uri) + + +def listen_address(): + """Return the ip address to bind, which should be an internal one.""" + ip = socket.gethostbyname(socket.gethostname()) + if not ip.startswith('10.'): raise ValueError('Not a private ip', ip) + return ip + + +def setup_logger(): + """Configure to log everything to the screen and /var/log/syslog.""" + logs = logging.getLogger('metadata-cache') + logs.setLevel(logging.DEBUG) + handler = logging.handlers.SysLogHandler( + address='/dev/log', + facility=logging.handlers.SysLogHandler.LOG_SYSLOG) + formatter = logging.Formatter('metadata-cache: %(levelname)s %(message)s') + handler.setFormatter(formatter) + handler.setLevel(logging.DEBUG) + logs.addHandler(handler) + sh = logging.StreamHandler() + sh.setFormatter(formatter) + sh.setLevel(logging.DEBUG) + logs.addHandler(sh) + return logs + + +if __name__ == '__main__': + logger = setup_logger() + app.run(host=listen_address(), port=80)