From d5c2cdcbbbd16bafeb7477fe2f33663e6789e534 Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Thu, 22 Mar 2018 12:02:55 -0700 Subject: [PATCH] Update GCP fluentd configmap for GKE node journal logging --- .../fluentd-gcp/fluentd-gcp-configmap.yaml | 19 +++++++++++++++++++ cluster/gce/config-default.sh | 3 +++ cluster/gce/config-test.sh | 3 +++ cluster/gce/gci/configure-helper.sh | 10 ++++++++++ cluster/gce/util.sh | 7 ++++--- 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml b/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml index c9c47692558..115011c0fa0 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml @@ -282,6 +282,25 @@ data: read_from_head true tag node-problem-detector + + # BEGIN_NODE_JOURNAL + # Whether to include node-journal or not is determined when starting the + # cluster. It is not changed when the cluster is already running. + + @type systemd + pos_file /var/log/gcp-journald.pos + read_from_head true + tag node-journal + + + + @type grep + + key _SYSTEMD_UNIT + pattern ^(docker|{{ container_runtime }}|kubelet|node-problem-detector)\.service$ + + + # END_NODE_JOURNAL monitoring.conf: |- # This source is used to acquire approximate process start timestamp, # which purpose is explained before the corresponding output plugin. diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index c10c38bad5e..b7d180444cf 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -376,6 +376,9 @@ HEAPSTER_GCP_CPU_PER_NODE="${HEAPSTER_GCP_CPU_PER_NODE:-0.5}" # Adding to PROVIDER_VARS, since this is GCP-specific. PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER" +# Fluentd configuration for node-journal +ENABLE_NODE_JOURNAL="${ENABLE_NODE_JOURNAL:-false}" + # prometheus-to-sd configuration PROMETHEUS_TO_SD_ENDPOINT="${PROMETHEUS_TO_SD_ENDPOINT:-https://monitoring.googleapis.com/}" PROMETHEUS_TO_SD_PREFIX="${PROMETHEUS_TO_SD_PREFIX:-custom.googleapis.com}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 5b60f560cdb..4b3bd08538e 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -407,6 +407,9 @@ HEAPSTER_GCP_CPU_PER_NODE="${HEAPSTER_GCP_CPU_PER_NODE:-0.5}" # Adding to PROVIDER_VARS, since this is GCP-specific. PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER" +# Fluentd configuration for node-journal +ENABLE_NODE_JOURNAL="${ENABLE_NODE_JOURNAL:-false}" + # prometheus-to-sd configuration PROMETHEUS_TO_SD_ENDPOINT="${PROMETHEUS_TO_SD_ENDPOINT:-https://monitoring.googleapis.com/}" PROMETHEUS_TO_SD_PREFIX="${PROMETHEUS_TO_SD_PREFIX:-custom.googleapis.com}" diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index e22392ff021..9bfc6054180 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -2008,6 +2008,15 @@ function update-container-runtime { sed -i -e "s@{{ *container_runtime *}}@${CONTAINER_RUNTIME_NAME:-docker}@g" "${configmap_yaml}" } +# Remove configuration in yaml file if node journal is not enabled. +function update-node-journal { + local -r configmap_yaml="$1" + if [[ "${ENABLE_NODE_JOURNAL:-}" != "true" ]]; then + # Removes all lines between two patterns (throws away node-journal) + sed -i -e "/# BEGIN_NODE_JOURNAL/,/# END_NODE_JOURNAL/d" "${configmap_yaml}" + fi +} + # Updates parameters in yaml file for prometheus-to-sd configuration, or # removes component if it is disabled. function update-prometheus-to-sd-parameters { @@ -2187,6 +2196,7 @@ EOF update-prometheus-to-sd-parameters ${fluentd_gcp_yaml} start-fluentd-resource-update ${fluentd_gcp_yaml} update-container-runtime ${fluentd_gcp_configmap_yaml} + update-node-journal ${fluentd_gcp_configmap_yaml} fi if [[ "${ENABLE_CLUSTER_UI:-}" == "true" ]]; then setup-addon-manifests "addons" "dashboard" diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 21b7a236905..5ad79f48939 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -732,6 +732,7 @@ ADVANCED_AUDIT_WEBHOOK_THROTTLE_BURST: $(yaml-quote ${ADVANCED_AUDIT_WEBHOOK_THR ADVANCED_AUDIT_WEBHOOK_INITIAL_BACKOFF: $(yaml-quote ${ADVANCED_AUDIT_WEBHOOK_INITIAL_BACKOFF:-}) GCE_API_ENDPOINT: $(yaml-quote ${GCE_API_ENDPOINT:-}) GCE_GLBC_IMAGE: $(yaml-quote ${GCE_GLBC_IMAGE:-}) +ENABLE_NODE_JOURNAL: $(yaml-quote ${ENABLE_NODE_JOURNAL:-false}) PROMETHEUS_TO_SD_ENDPOINT: $(yaml-quote ${PROMETHEUS_TO_SD_ENDPOINT:-}) PROMETHEUS_TO_SD_PREFIX: $(yaml-quote ${PROMETHEUS_TO_SD_PREFIX:-}) ENABLE_PROMETHEUS_TO_SD: $(yaml-quote ${ENABLE_PROMETHEUS_TO_SD:-false}) @@ -1400,7 +1401,7 @@ function get-template-name-from-version() { echo "${NODE_INSTANCE_PREFIX}-template-${1}" | cut -c 1-63 | sed 's/[\.\+]/-/g;s/-*$//g' } -# validates the NODE_LOCAL_SSDS_EXT variable +# validates the NODE_LOCAL_SSDS_EXT variable function validate-node-local-ssds-ext(){ ssdopts="${1}" @@ -1476,7 +1477,7 @@ function create-node-template() { done done fi - + if [[ ! -z ${NODE_LOCAL_SSDS+x} ]]; then # The NODE_LOCAL_SSDS check below fixes issue #49171 # Some versions of seq will count down from 1 if "seq 0" is specified @@ -1486,7 +1487,7 @@ function create-node-template() { done fi fi - + local network=$(make-gcloud-network-argument \ "${NETWORK_PROJECT}" \