From 73916e5a9b08f0d0656af66c0405b7059ba6c6da Mon Sep 17 00:00:00 2001 From: Barni S Date: Mon, 26 Oct 2020 22:40:46 -0400 Subject: [PATCH] If image has stackdriver agent installed, use it. Check if stackdriver agent is preinstalled in the image. If so configure and use it. Else check for fluentbit agent --- cluster/gce/windows/k8s-node-setup.psm1 | 278 ++++++++++++++++++++++++ 1 file changed, 278 insertions(+) diff --git a/cluster/gce/windows/k8s-node-setup.psm1 b/cluster/gce/windows/k8s-node-setup.psm1 index e93298aeb57..ead44148cd6 100644 --- a/cluster/gce/windows/k8s-node-setup.psm1 +++ b/cluster/gce/windows/k8s-node-setup.psm1 @@ -1544,6 +1544,11 @@ $LOGGINGEXPORTER_CMDLINE = '*flb-exporter.exe*' # Restart Logging agent or starts it if it is not currently running function Restart-LoggingAgent { + if (IsStackdriverAgentInstalled) { + Restart-StackdriverAgent + return + } + Restart-LogService $LOGGINGEXPORTER_SERVICE $LOGGINGEXPORTER_CMDLINE Restart-LogService $LOGGINGAGENT_SERVICE $LOGGINGAGENT_CMDLINE } @@ -1599,6 +1604,19 @@ function IsLoggingAgentInstalled { # Installs the logging agent according to https://docs.fluentbit.io/manual/installation/windows# # Also installs fluent bit stackdriver exporter function Install-LoggingAgent { + if (IsStackdriverAgentInstalled) { + # Remove the existing storage.json file if it exists. This is a workaround + # for the bug where the logging agent cannot start up if the file is + # corrupted. + Remove-Item ` + -Force ` + -ErrorAction Ignore ` + ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" + + "storage.json") + Log-Output ("Skip: Stackdriver logging agent is already installed") + return + } + if (IsLoggingAgentInstalled) { # Note: we should reinstall the agent if $REDO_STEPS is true # here, but we don't know how to run the installer without it prompting @@ -1658,6 +1676,11 @@ function Create-LoggingAgentServices { # Writes the logging configuration file for Logging agent. Restart-LoggingAgent # should then be called to pick up the new configuration. function Configure-LoggingAgent { + if (IsStackdriverAgentInstalled) { + Configure-StackdriverAgent + return + } + $fluentbit_config_file = "$LOGGINGAGENT_ROOT\conf\fluent-bit.conf" $FLUENTBIT_CONFIG | Out-File -FilePath $fluentbit_config_file -Encoding ASCII Log-Output "Wrote logging config to $fluentbit_config_file" @@ -1944,5 +1967,260 @@ $PARSERS_CONFIG = @' Regex (?[^.]+)?\.?(?[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?[^_]+)_(?.+)-(?[a-z0-9]{64})\.log$ '@ + +# ----------- Stackdriver logging setup -------------------------- +# This section would be deprecated soon +# + +$STACKDRIVER_VERSION = 'v1-11' +$STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver' + +# Restarts the Stackdriver logging agent, or starts it if it is not currently +# running. A standard `Restart-Service StackdriverLogging` may fail because +# StackdriverLogging sometimes is unstoppable, so this function works around it +# by killing the processes. +function Restart-StackdriverAgent { + Stop-Service -NoWait -ErrorAction Ignore StackdriverLogging + + # Wait (if necessary) for service to stop. + $timeout = 10 + $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped' + for ($i = 0; $i -lt $timeout -and !($stopped); $i++) { + Start-Sleep 1 + $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped' + } + + if ((Get-service StackdriverLogging).Status -ne 'Stopped') { + # Force kill the processes. + Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process | + Where CommandLine -Like '*Stackdriver/logging*').ProcessId + + # Wait until process has stopped. + $waited = 0 + $log_period = 10 + $timeout = 60 + while ((Get-service StackdriverLogging).Status -ne 'Stopped' -and $waited -lt $timeout) { + Start-Sleep 1 + $waited++ + + if ($waited % $log_period -eq 0) { + Log-Output "Waiting for StackdriverLogging service to stop" + } + } + + # Timeout occurred + if ($waited -ge $timeout) { + Throw ("Timeout while waiting for StackdriverLogging service to stop") + } + } + + Start-Service StackdriverLogging +} + +# Check whether the logging agent is installed by whether it's registered as service +function IsStackdriverAgentInstalled { + $stackdriver_status = (Get-Service StackdriverLogging -ErrorAction Ignore).Status + return -not [string]::IsNullOrEmpty($stackdriver_status) +} + +# Writes the logging configuration file for Stackdriver. Restart-LoggingAgent +# should then be called to pick up the new configuration. +function Configure-StackdriverAgent { + $fluentd_config_dir = "$STACKDRIVER_ROOT\LoggingAgent\config.d" + $fluentd_config_file = "$fluentd_config_dir\k8s_containers.conf" + + # Create a configuration file for kubernetes containers. + # The config.d directory should have already been created automatically, but + # try creating again just in case. + New-Item $fluentd_config_dir -ItemType 'directory' -Force | Out-Null + + $config = $FLUENTD_CONFIG.replace('NODE_NAME', (hostname)) + $config | Out-File -FilePath $fluentd_config_file -Encoding ASCII + Log-Output "Wrote fluentd logging config to $fluentd_config_file" +} + +# The NODE_NAME placeholder must be replaced with the node's name (hostname). +$FLUENTD_CONFIG = @' +# This configuration file for Fluentd is used to watch changes to kubernetes +# container logs in the directory /var/lib/docker/containers/ and submit the +# log records to Google Cloud Logging using the cloud-logging plugin. +# +# Example +# ======= +# A line in the Docker log file might look like this JSON: +# +# {"log":"2014/09/25 21:15:03 Got request with path wombat\\n", +# "stream":"stderr", +# "time":"2014-09-25T21:15:03.499185026Z"} +# +# The original tag is derived from the log file's location. +# For example a Docker container's logs might be in the directory: +# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b +# and in the file: +# 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log +# where 997599971ee6... is the Docker ID of the running container. +# The Kubernetes kubelet makes a symbolic link to this file on the host +# machine in the /var/log/containers directory which includes the pod name, +# the namespace name and the Kubernetes container name: +# synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# -> +# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log +# The /var/log directory on the host is mapped to the /var/log directory in the container +# running this instance of Fluentd and we end up collecting the file: +# /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# This results in the tag: +# var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the +# namespace name, 'synth-lgr' is the container name and '997599971ee6..' is +# the container ID. +# The record reformer is used to extract pod_name, namespace_name and +# container_name from the tag and set them in a local_resource_id in the +# format of: +# 'k8s_container...'. +# The reformer also changes the tags to 'stderr' or 'stdout' based on the +# value of 'stream'. +# local_resource_id is later used by google_cloud plugin to determine the +# monitored resource to ingest logs against. +# Json Log Example: +# {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"} +# CRI Log Example: +# 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here + + @type tail + path /var/log/containers/*.log + pos_file /var/log/gcp-containers.log.pos + # Tags at this point are in the format of: + # reform.var.log.containers.__-.log + tag reform.* + read_from_head true + + @type multi_format + + format json + time_key time + time_format %Y-%m-%dT%H:%M:%S.%NZ + + + format /^(? + + +# Example: +# I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537] + + @type tail + format multiline + multiline_flush_interval 5s + format_firstline /^\w\d{4}/ + format1 /^(?\w)(?