diff --git a/cluster/gce/windows/configure.ps1 b/cluster/gce/windows/configure.ps1 index eefb60dd8da..8edabaf2f7f 100644 --- a/cluster/gce/windows/configure.ps1 +++ b/cluster/gce/windows/configure.ps1 @@ -111,6 +111,7 @@ try { Set-EnvironmentVars Create-Directories Download-HelperScripts + InstallAndStart-LoggingAgent Create-DockerRegistryKey DownloadAndInstall-KubernetesBinaries diff --git a/cluster/gce/windows/k8s-node-setup.psm1 b/cluster/gce/windows/k8s-node-setup.psm1 index f156c34578c..9254f754847 100644 --- a/cluster/gce/windows/k8s-node-setup.psm1 +++ b/cluster/gce/windows/k8s-node-setup.psm1 @@ -1080,5 +1080,154 @@ function Create-DockerRegistryKey { Remove-Item -Force -Recurse ${tmp_dir} } +# TODO(pjh): move the Stackdriver logging agent code below into a separate +# module; it was put here temporarily to avoid disrupting the file layout in +# the K8s release machinery. +$STACKDRIVER_VERSION = 'v1-8' +$STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver' + +# Install and start the Stackdriver logging agent according to +# https://cloud.google.com/logging/docs/agent/installation. +# TODO(yujuhong): Update to a newer Stackdriver agent once it is released to +# support kubernetes metadata properly. The current version does not recognizes +# the local resource key "logging.googleapis.com/local_resource_id", and fails +# to label namespace, pod and container names on the logs. +function InstallAndStart-LoggingAgent { + # Remove the existing storage.json file if it exists. This is a workaround + # for the bug where the logging agent cannot start up if the file is + # corrupted. + Remove-Item ` + -Force ` + -ErrorAction Ignore ` + ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" + + "storage.json") + + if (Test-Path $STACKDRIVER_ROOT) { + # Note: we should reinstall the Stackdriver agent if $REDO_STEPS is true + # here, but we don't know how to run the installer without it prompting + # when Stackdriver is already installed. We dumped the strings in the + # installer binary and searched for flags to do this but found nothing. Oh + # well. + Log-Output ("Skip: $STACKDRIVER_ROOT is already present, assuming that " + + "Stackdriver logging agent is already installed") + # Restart-Service restarts a running service or starts a not-running + # service. + Restart-Service StackdriverLogging + return + } + + $url = ("https://dl.google.com/cloudagents/windows/" + + "StackdriverLogging-${STACKDRIVER_VERSION}.exe") + $tmp_dir = 'C:\stackdriver_tmp' + New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null + $installer_file = "${tmp_dir}\StackdriverLogging-${STACKDRIVER_VERSION}.exe" + MustDownload-File -OutFile $installer_file -URLs $url + + # Start the installer silently. This automatically starts the + # "StackdriverLogging" service. + Log-Output 'Invoking Stackdriver installer' + Start-Process $installer_file -ArgumentList "/S" -Wait + + Start-Process "$STACKDRIVER_ROOT\LoggingAgent\Main\bin\fluent-gem" ` + -ArgumentList "install","fluent-plugin-record-reformer" ` + -Wait + + # Create a configuration file for kubernetes containers. + # The config.d directory should have already been created automatically, but + # try creating again just in case. + New-Item "$STACKDRIVER_ROOT\LoggingAgent\config.d" ` + -ItemType 'directory' ` + -Force | Out-Null + $FLUENTD_CONFIG | Out-File ` + -FilePath "$STACKDRIVER_ROOT\LoggingAgent\config.d\k8s_containers.conf" ` + -Encoding ASCII + + # Restart the service to pick up the new configurations. + Restart-Service StackdriverLogging + Remove-Item -Force -Recurse $tmp_dir +} + +# TODO(yujuhong): +# - Collect kubelet/kube-proxy logs. +# - Add tag for kubernetes node name. +$FLUENTD_CONFIG = @' +# This configuration file for Fluentd is used to watch changes to kubernetes +# container logs in the directory /var/lib/docker/containers/ and submit the +# log records to Google Cloud Logging using the cloud-logging plugin. +# +# Example +# ======= +# A line in the Docker log file might look like this JSON: +# +# {"log":"2014/09/25 21:15:03 Got request with path wombat\\n", +# "stream":"stderr", +# "time":"2014-09-25T21:15:03.499185026Z"} +# +# The original tag is derived from the log file's location. +# For example a Docker container's logs might be in the directory: +# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b +# and in the file: +# 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log +# where 997599971ee6... is the Docker ID of the running container. +# The Kubernetes kubelet makes a symbolic link to this file on the host +# machine in the /var/log/containers directory which includes the pod name, +# the namespace name and the Kubernetes container name: +# synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# -> +# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log +# The /var/log directory on the host is mapped to the /var/log directory in the container +# running this instance of Fluentd and we end up collecting the file: +# /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# This results in the tag: +# var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the +# namespace name, 'synth-lgr' is the container name and '997599971ee6..' is +# the container ID. +# The record reformer is used to extract pod_name, namespace_name and +# container_name from the tag and set them in a local_resource_id in the +# format of: +# 'k8s_container...'. +# The reformer also changes the tags to 'stderr' or 'stdout' based on the +# value of 'stream'. +# local_resource_id is later used by google_cloud plugin to determine the +# monitored resource to ingest logs against. + +# Json Log Example: +# {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"} +# TODO: Support CRI log format, which requires the multi_format plugin. + + @type tail + path /var/log/containers/*.log + pos_file /var/log/gcp-containers.log.pos + # Tags at this point are in the format of: + # reform.var.log.containers.__-.log + tag reform.* + format json + time_key time + time_format %Y-%m-%dT%H:%M:%S.%NZ + read_from_head true + + + + @type record_reformer + enable_ruby true + + # Extract local_resource_id from tag for 'k8s_container' monitored + # resource. The format is: + # 'k8s_container...'. + "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"} + # Rename the field 'log' to a more generic field 'message'. This way the + # fluent-plugin-google-cloud knows to flatten the field as textPayload + # instead of jsonPayload after extracting 'time', 'severity' and + # 'stream' from the record. + message ${record['log']} + # If 'severity' is not set, assume stderr is ERROR and stdout is INFO. + severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end} + + tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end} + remove_keys stream,log + +'@ + # Export all public functions: Export-ModuleMember -Function *-*