diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index fdfe68be13c..7d8e8cb530d 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -2310,19 +2310,34 @@ function download-extra-addons { } # A function that fetches a GCE metadata value and echoes it out. +# Args: +# $1 : URL path after /computeMetadata/v1/ (without heading slash). +# $2 : An optional default value to echo out if the fetch fails. # -# $1: URL path after /computeMetadata/v1/ (without heading slash). +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. function get-metadata-value { - # We do not want quotes for CURL_RETRY_CONNREFUSED - # shellcheck disable=SC2086 - curl \ - --retry 5 \ - --retry-delay 3 \ - ${CURL_RETRY_CONNREFUSED} \ - --fail \ - --silent \ - -H 'Metadata-Flavor: Google' \ - "http://metadata/computeMetadata/v1/${1}" + local default="${2:-}" + + local status + # We do not want quotes for CURL_RETRY_CONNREFUSED + # shellcheck disable=SC2086 + curl \ + --retry 5 \ + --retry-delay 3 \ + ${CURL_RETRY_CONNREFUSED} \ + --fail \ + --silent \ + -H 'Metadata-Flavor: Google' \ + "http://metadata/computeMetadata/v1/${1}" \ + || status="$?" + status="${status:-0}" + + if [[ "${status}" -eq 0 || -z "${default}" ]]; then + return "${status}" + else + echo "${default}" + fi } # A helper function for copying manifests and setting dir/files @@ -3098,10 +3113,164 @@ function detect_host_info() { esac } +# Initializes variables used by the log-* functions. +# +# get-metadata-value must be defined before calling this function. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-init { + # CURL_RETRY_CONNREFUSED needs to be defined before calling get-metadata-value. + # Use --retry-connrefused opt only if it's supported by curl. + CURL_RETRY_CONNREFUSED="" + if curl --help | grep -q -- '--retry-connrefused'; then + CURL_RETRY_CONNREFUSED='--retry-connrefused' + fi + + # Used by log-* functions. + LOG_CLUSTER_ID=$(get-metadata-value 'instance/attributes/cluster-uid' 'get-metadata-value-error') + LOG_INSTANCE_NAME=$(hostname) + LOG_BOOT_ID=$(journalctl --list-boots | grep -E '^ *0' | awk '{print $2}') + declare -Ag LOG_START_TIMES + declare -ag LOG_TRAP_STACK + + LOG_STATUS_STARTED='STARTED' + LOG_STATUS_COMPLETED='COMPLETED' + LOG_STATUS_ERROR='ERROR' +} + +# Sets an EXIT trap. +# Args: +# $1:... : the trap command. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-trap-push { + local t="${*:1}" + LOG_TRAP_STACK+=("${t}") + # shellcheck disable=2064 + trap "${t}" EXIT +} + +# Removes and restores an EXIT trap. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-trap-pop { + # Remove current trap. + unset 'LOG_TRAP_STACK[-1]' + + # Restore previous trap. + if [ ${#LOG_TRAP_STACK[@]} -ne 0 ]; then + local t="${LOG_TRAP_STACK[-1]}" + # shellcheck disable=2064 + trap "${t}" EXIT + else + # If no traps in stack, clear. + trap EXIT + fi +} + +# Logs the end of a bootstrap step that errored. +# Args: +# $1 : bootstrap step name. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-error { + local bootstep="$1" + + log-proto "${bootstep}" "${LOG_STATUS_ERROR}" "error calling '${BASH_COMMAND}'" +} + +# Wraps a command with bootstrap logging. +# Args: +# $1 : bootstrap step name. +# $2... : the command to run. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-wrap { + local bootstep="$1" + local command="${*:2}" + + log-trap-push "log-error ${bootstep}" + log-proto "${bootstep}" "${LOG_STATUS_STARTED}" + $command + log-proto "${bootstep}" "${LOG_STATUS_COMPLETED}" + log-trap-pop +} + +# Logs a bootstrap step start. Prefer log-wrap. +# Args: +# $1 : bootstrap step name. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-start { + local bootstep="$1" + + log-trap-push "log-error ${bootstep}" + log-proto "${bootstep}" "${LOG_STATUS_STARTED}" +} + +# Logs a bootstrap step end. Prefer log-wrap. +# Args: +# $1 : bootstrap step name. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-end { + local bootstep="$1" + + log-proto "${bootstep}" "${LOG_STATUS_COMPLETED}" + log-trap-pop +} + +# Writes a log proto to stdout. +# Args: +# $1: bootstrap step name. +# $2: status. Either 'STARTED', 'COMPLETED', or 'ERROR'. +# $3: optional status reason. +# +# NOTE: this function is duplicated in configure.sh, any changes here should be +# duplicated there as well. +function log-proto { + local bootstep="$1" + local status="$2" + local status_reason="${3:-}" + + # Get current time. + local current_time + current_time="$(date --utc '+%s.%N')" + # ...formatted as UTC RFC 3339. + local timestamp + timestamp="$(date --utc --date="@${current_time}" '+%FT%T.%NZ')" + + # Calculate latency. + local latency='null' + if [ "${status}" == "${LOG_STATUS_STARTED}" ]; then + LOG_START_TIMES["${bootstep}"]="${current_time}" + else + local start_time="${LOG_START_TIMES["${bootstep}"]}" + unset 'LOG_START_TIMES['"${bootstep}"']' + + # Bash cannot do non-integer math, shell out to awk. + latency="$(echo "${current_time} ${start_time}" | awk '{print $1 - $2}')s" + + # The default latency is null which cannot be wrapped as a string so we must + # do it here instead of the printf. + latency="\"${latency}\"" + fi + + printf '[cloud.kubernetes.monitoring.proto.SerialportLog] {"cluster_hash":"%s","vm_instance_name":"%s","boot_id":"%s","timestamp":"%s","bootstrap_status":{"step_name":"%s","status":"%s","status_reason":"%s","latency":%s}}\n' \ + "${LOG_CLUSTER_ID}" "${LOG_INSTANCE_NAME}" "${LOG_BOOT_ID}" "${timestamp}" "${bootstep}" "${status}" "${status_reason}" "${latency}" +} + ########### Main Function ########### function main() { echo "Start to configure instance for kubernetes" - detect_host_info + log-wrap 'DetectHostInfo' detect_host_info readonly UUID_MNT_PREFIX="/mnt/disks/by-uuid/google-local-ssds" readonly UUID_BLOCK_PREFIX="/dev/disk/by-uuid/google-local-ssds" @@ -3112,17 +3281,12 @@ function main() { KUBE_CONTROLLER_MANAGER_CPU_REQUEST="${KUBE_CONTROLLER_MANAGER_CPU_REQUEST:-200m}" KUBE_SCHEDULER_CPU_REQUEST="${KUBE_SCHEDULER_CPU_REQUEST:-75m}" - # Use --retry-connrefused opt only if it's supported by curl. - CURL_RETRY_CONNREFUSED="" - if curl --help | grep -q -- '--retry-connrefused'; then - CURL_RETRY_CONNREFUSED='--retry-connrefused' - fi - KUBE_HOME="/home/kubernetes" KUBE_BIN=${KUBE_HOME}/bin CONTAINERIZED_MOUNTER_HOME="${KUBE_HOME}/containerized_mounter" PV_RECYCLER_OVERRIDE_TEMPLATE="${KUBE_HOME}/kube-manifests/kubernetes/pv-recycler-template.yaml" + log-start 'SetPythonVersion' if [[ "$(python -V 2>&1)" =~ "Python 2" ]]; then # found python2, just use that PYTHON="python" @@ -3138,13 +3302,15 @@ function main() { fi fi echo "Version : $(${PYTHON} -V 2>&1)" + log-end 'SetPythonVersion' + log-start 'SourceKubeEnv' if [[ ! -e "${KUBE_HOME}/kube-env" ]]; then echo "The ${KUBE_HOME}/kube-env file does not exist!! Terminate cluster initialization." exit 1 fi - source "${KUBE_HOME}/kube-env" + log-end 'SourceKubeEnv' if [[ -f "${KUBE_HOME}/kubelet-config.yaml" ]]; then echo "Found Kubelet config file at ${KUBE_HOME}/kubelet-config.yaml" @@ -3152,16 +3318,19 @@ function main() { fi if [[ -e "${KUBE_HOME}/kube-master-certs" ]]; then - source "${KUBE_HOME}/kube-master-certs" + log-wrap 'SourceKubeMasterCerts' source "${KUBE_HOME}/kube-master-certs" fi + log-start 'VerifyKubeUser' if [[ -n "${KUBE_USER:-}" ]]; then if ! [[ "${KUBE_USER}" =~ ^[-._@a-zA-Z0-9]+$ ]]; then echo "Bad KUBE_USER format." exit 1 fi fi + log-end 'VerifyKubeUser' + log-start 'GenerateTokens' KUBE_CONTROLLER_MANAGER_TOKEN="$(secure_random 32)" KUBE_SCHEDULER_TOKEN="$(secure_random 32)" KUBE_CLUSTER_AUTOSCALER_TOKEN="$(secure_random 32)" @@ -3178,35 +3347,36 @@ function main() { if [[ "${ENABLE_MONITORING_TOKEN:-false}" == "true" ]]; then MONITORING_TOKEN="$(secure_random 32)" fi + log-end 'GenerateTokens' - setup-os-params - config-ip-firewall - create-dirs - ensure-local-ssds - setup-kubelet-dir - setup-logrotate + log-wrap 'SetupOSParams' setup-os-params + log-wrap 'ConfigIPFirewall' config-ip-firewall + log-wrap 'CreateDirs' create-dirs + log-wrap 'EnsureLocalSSDs' ensure-local-ssds + log-wrap 'SetupKubeletDir' setup-kubelet-dir + log-wrap 'SetupLogrotate' setup-logrotate if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then - mount-master-pd - create-node-pki - create-master-pki - create-master-auth - ensure-master-bootstrap-kubectl-auth - create-master-kubelet-auth - create-master-etcd-auth - create-master-etcd-apiserver-auth - override-pv-recycler - gke-master-start + log-wrap 'MountMasterPD' mount-master-pd + log-wrap 'CreateNodePKI' create-node-pki + log-wrap 'CreateMasterPKI' create-master-pki + log-wrap 'CreateMasterAuth' create-master-auth + log-wrap 'EnsureMasterBootstrapKubectlAuth' ensure-master-bootstrap-kubectl-auth + log-wrap 'CreateMasterKubeletAuth' create-master-kubelet-auth + log-wrap 'CreateMasterEtcdAuth' create-master-etcd-auth + log-wrap 'CreateMasterEtcdApiserverAuth' create-master-etcd-apiserver-auth + log-wrap 'OverridePVRecycler' override-pv-recycler + log-wrap 'GKEMasterStart' gke-master-start else - create-node-pki - create-kubelet-kubeconfig "${KUBERNETES_MASTER_NAME}" + log-wrap 'CreateNodePKI' create-node-pki + log-wrap 'CreateKubeletKubeconfig' create-kubelet-kubeconfig "${KUBERNETES_MASTER_NAME}" if [[ "${KUBE_PROXY_DAEMONSET:-}" != "true" ]] && [[ "${KUBE_PROXY_DISABLE:-}" != "true" ]]; then - create-kubeproxy-user-kubeconfig + log-wrap 'CreateKubeproxyUserKubeconfig' create-kubeproxy-user-kubeconfig fi if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then if [[ -n "${NODE_PROBLEM_DETECTOR_TOKEN:-}" ]]; then - create-node-problem-detector-kubeconfig "${KUBERNETES_MASTER_NAME}" + log-wrap 'CreateNodeProblemDetectorKubeconfig' create-node-problem-detector-kubeconfig "${KUBERNETES_MASTER_NAME}" elif [[ -f "/var/lib/kubelet/kubeconfig" ]]; then - create-node-problem-detector-kubeconfig-from-kubelet + log-wrap 'CreateNodeProblemDetectorKubeconfigFromKubelet' create-node-problem-detector-kubeconfig-from-kubelet else echo "Either NODE_PROBLEM_DETECTOR_TOKEN or /var/lib/kubelet/kubeconfig must be set" exit 1 @@ -3214,60 +3384,71 @@ function main() { fi fi - override-kubectl + log-wrap 'OverrideKubectl' override-kubectl container_runtime="${CONTAINER_RUNTIME:-docker}" # Run the containerized mounter once to pre-cache the container image. if [[ "${container_runtime}" == "docker" ]]; then - assemble-docker-flags + log-wrap 'AssembleDockerFlags' assemble-docker-flags elif [[ "${container_runtime}" == "containerd" ]]; then if docker-installed; then # We still need to configure docker so it wouldn't reserver the 172.17.0/16 subnet # And if somebody will start docker to build or pull something, logging will also be set up - assemble-docker-flags + log-wrap 'AssembleDockerFlags' assemble-docker-flags # stop docker if it is present as we want to use just containerd - systemctl stop docker || echo "unable to stop docker" + log-wrap 'StopDocker' systemctl stop docker || echo "unable to stop docker" fi - setup-containerd + log-wrap 'SetupContainerd' setup-containerd fi + log-start 'SetupKubePodLogReadersGroupDir' if [[ -n "${KUBE_POD_LOG_READERS_GROUP:-}" ]]; then mkdir -p /var/log/pods/ chgrp -R "${KUBE_POD_LOG_READERS_GROUP:-}" /var/log/pods/ chmod -R g+s /var/log/pods/ fi + log-end 'SetupKubePodLogReadersGroupDir' - start-kubelet + log-wrap 'StartKubelet' start-kubelet if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then - compute-master-manifest-variables + log-wrap 'ComputeMasterManifestVariables' compute-master-manifest-variables if [[ -z "${ETCD_SERVERS:-}" ]]; then - start-etcd-servers + log-wrap 'StartEtcdServers' start-etcd-servers fi - source ${KUBE_BIN}/configure-kubeapiserver.sh - start-kube-apiserver + log-wrap 'SourceConfigureKubeApiserver' source ${KUBE_BIN}/configure-kubeapiserver.sh + log-wrap 'StartKubeApiserver' start-kube-apiserver if [[ "${RUN_KONNECTIVITY_PODS:-false}" == "true" ]]; then - start-konnectivity-server + log-wrap 'StartKonnectivityServer' start-konnectivity-server fi - start-kube-controller-manager - start-kube-scheduler - wait-till-apiserver-ready - start-kube-addons - start-cluster-autoscaler - start-lb-controller - update-legacy-addon-node-labels & + log-wrap 'StartKubeControllerManager' start-kube-controller-manager + log-wrap 'StartKubeScheduler' start-kube-scheduler + log-wrap 'WaitTillApiserverReady' wait-till-apiserver-ready + log-wrap 'StartKubeAddons' start-kube-addons + log-wrap 'StartClusterAutoscaler' start-cluster-autoscaler + log-wrap 'StartLBController' start-lb-controller + log-wrap 'UpdateLegacyAddonNodeLabels' update-legacy-addon-node-labels & else if [[ "${KUBE_PROXY_DAEMONSET:-}" != "true" ]] && [[ "${KUBE_PROXY_DISABLE:-}" != "true" ]]; then - start-kube-proxy + log-wrap 'StartKubeProxy' start-kube-proxy fi if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then - start-node-problem-detector + log-wrap 'StartNodeProblemDetector' start-node-problem-detector fi fi - reset-motd - prepare-mounter-rootfs + log-wrap 'ResetMotd' reset-motd + log-wrap 'PrepareMounterRootfs' prepare-mounter-rootfs + + # Wait for all background jobs to finish. + wait echo "Done for the configuration for kubernetes" } if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - main "${@}" + log-init + log-wrap 'ConfigureHelperMain' main "${@}" + + if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then + # Give kube-bootstrap-logs-forwarder.service some time to write all logs. + sleep 3 + fi fi diff --git a/cluster/gce/gci/configure.sh b/cluster/gce/gci/configure.sh index 049075e9abc..8ddac716d39 100644 --- a/cluster/gce/gci/configure.sh +++ b/cluster/gce/gci/configure.sh @@ -33,12 +33,6 @@ DEFAULT_CRICTL_HASH='e4fb9822cb5f71ab8f85021c66170613aae972f4b32030e42868fb36a3b DEFAULT_MOUNTER_TAR_SHA='7956fd42523de6b3107ddc3ce0e75233d2fcb78436ff07a1389b6eaac91fb2b1b72a08f7a219eaf96ba1ca4da8d45271002e0d60e0644e796c665f99bb356516' ### -# Use --retry-connrefused opt only if it's supported by curl. -CURL_RETRY_CONNREFUSED="" -if curl --help | grep -q -- '--retry-connrefused'; then - CURL_RETRY_CONNREFUSED='--retry-connrefused' -fi - function set-broken-motd { cat > /etc/motd <&1)" =~ "Python 2" ]]; then # found python2, just use that PYTHON="python" @@ -692,22 +894,24 @@ else fi fi echo "Version : $(${PYTHON} -V 2>&1)" +log-end 'SetPythonVersion' # download and source kube-env -download-kube-env -source "${KUBE_HOME}/kube-env" +log-wrap 'DownloadKubeEnv' retry-forever 30 download-kube-env +log-wrap 'SourceKubeEnv' source "${KUBE_HOME}/kube-env" -download-kubelet-config "${KUBE_HOME}/kubelet-config.yaml" +log-wrap 'DownloadKubeletConfig' retry-forever 10 download-kubelet-config "${KUBE_HOME}/kubelet-config.yaml" # master certs if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then - download-kube-master-certs + log-wrap 'DownloadKubeMasterCerts' retry-forever 10 download-kube-master-certs fi # ensure chosen container runtime is present -ensure-container-runtime +log-wrap 'EnsureContainerRuntime' ensure-container-runtime # binaries and kube-system manifests -install-kube-binary-config +log-wrap 'InstallKubeBinaryConfig' install-kube-binary-config echo "Done for installing kubernetes files" +log-end 'ConfigureMain' diff --git a/cluster/gce/gci/master.yaml b/cluster/gce/gci/master.yaml index a178e42c247..55523766d05 100644 --- a/cluster/gce/gci/master.yaml +++ b/cluster/gce/gci/master.yaml @@ -6,7 +6,31 @@ users: lock_passwd: true ssh_redirect_user: true +- name: kube-bootstrap-logs-forwarder + gecos: User the kube-bootstrap-logs-forwarder.service runs as. + system: true + sudo: false + write_files: + - path: /etc/systemd/system/kube-bootstrap-logs-forwarder.service + permissions: 0644 + owner: root + content: | + [Unit] + Description=Forwards Kubernetes bootstrap logs to serial port. + Before=kube-master-installation.service + + [Service] + User=kube-bootstrap-logs-forwarder + Group=systemd-journal + SupplementaryGroups=serial + ExecStart=journalctl --no-tail --no-pager --follow --utc --output short-iso --unit kube-master-installation --unit kube-master-configuration --unit kubelet + StandardOutput=tty + TTYPath=/dev/ttyS2 + + [Install] + WantedBy=kubernetes.target + - path: /etc/systemd/system/kube-master-installation.service permissions: 0644 owner: root @@ -60,6 +84,7 @@ write_files: RemainAfterExit=yes ExecStartPre=/bin/chmod 544 /home/kubernetes/bin/configure-helper.sh ExecStart=/home/kubernetes/bin/configure-helper.sh + ExecStartPost=systemctl stop kube-bootstrap-logs-forwarder.service [Install] WantedBy=kubernetes.target @@ -142,6 +167,7 @@ write_files: runcmd: - systemctl daemon-reload + - systemctl enable kube-bootstrap-logs-forwarder.service - systemctl enable kube-master-installation.service - systemctl enable kube-master-internal-route.service - systemctl enable kube-master-configuration.service