From 26cdcde3d555c2c874dfd3e0998cf5c27af13171 Mon Sep 17 00:00:00 2001 From: Jeremy Edwards Date: Thu, 6 Aug 2020 15:56:15 -0700 Subject: [PATCH] Configure StackdriverLogging Windows service to restart on failure. --- cluster/gce/windows/k8s-node-setup.psm1 | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cluster/gce/windows/k8s-node-setup.psm1 b/cluster/gce/windows/k8s-node-setup.psm1 index ead44148cd6..45eebc51b35 100644 --- a/cluster/gce/windows/k8s-node-setup.psm1 +++ b/cluster/gce/windows/k8s-node-setup.psm1 @@ -1180,7 +1180,7 @@ function Start-WorkerServices { "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])", "--hostname-override=${instance_name}" ) - + $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args} Log-Output "Final kubeproxy_args: ${kubeproxy_args}" @@ -1626,7 +1626,7 @@ function Install-LoggingAgent { Log-Output ("Skip: Fluentbit logging agent is already installed") return } - + DownloadAndInstall-LoggingAgents Create-LoggingAgentServices } @@ -1699,7 +1699,7 @@ $FLUENTBIT_CONFIG = @' Log_File /var/log/fluentbit.log Daemon off Parsers_File parsers.conf - HTTP_Server off + HTTP_Server off HTTP_Listen 0.0.0.0 HTTP_PORT 2020 plugins_file plugins.conf @@ -1753,7 +1753,7 @@ $FLUENTBIT_CONFIG = @' # Channels Setup,Windows PowerShell Channels application,system,security Tag winevent.raw - DB winlog.sqlite # + DB winlog.sqlite # # Json Log Example: @@ -1767,9 +1767,9 @@ $FLUENTBIT_CONFIG = @' Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 - DB flb_kube.db + DB flb_kube.db - # Settings from fluentd missing here. + # Settings from fluentd missing here. # tag reform.* # format json # time_key time @@ -2037,6 +2037,11 @@ function Configure-StackdriverAgent { $config = $FLUENTD_CONFIG.replace('NODE_NAME', (hostname)) $config | Out-File -FilePath $fluentd_config_file -Encoding ASCII Log-Output "Wrote fluentd logging config to $fluentd_config_file" + + # Configure StackdriverLogging to automatically restart on failure after 10 + # seconds. The logging agent may die die to various disruptions but can be + # resumed. + sc.exe failure StackdriverLogging reset= 0 actions= restart/1000/restart/10000 } # The NODE_NAME placeholder must be replaced with the node's name (hostname).