From 614b1cf3c15ff9490b076971af1a6c1d19c84d18 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Fri, 11 Sep 2015 16:32:48 -0700 Subject: [PATCH 1/3] Replace /varlog with /var/log in the fluentd-gcp config to match the recently updated fluent-es config. --- .../fluentd-gcp-image/google-fluentd.conf | 51 +++++++++---------- .../salt/fluentd-gcp/fluentd-gcp.yaml | 6 +-- docs/getting-started-guides/logging.md | 6 +-- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index 9d481ec54a0..89ea437e0f1 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -1,7 +1,7 @@ # This configuration file for Fluentd / td-agent is used # to watch changes to Docker log files that live in the # directory /var/lib/docker/containers/ and are symbolically -# linked to from the /varlog directory using names that capture the +# linked to from the /var/log directory using names that capture the # pod name and container name. These logs are then submitted to # Google Cloud Logging which assumes the installation of the cloud-logging plug-in. # @@ -29,28 +29,25 @@ # synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log # -> # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log -# The /var/log directory on the host is mapped to the /varlog directory in the container +# The /var/log directory on the host is mapped to the /var/log directory in the container # running this instance of Fluentd and we end up collecting the file: -# /varlog/containers/synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# /var/log/containers/synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log # This results in the tag: -# varlog.containers.synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log -# The record reformer is used is discard the varlog.containers prefix and +# var.log.containers.synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# The record reformer is used is discard the var.log.containers prefix and # the Docker container ID suffix and "kubernetes." is pre-pended giving the # final tag which is ingested into Elasticsearch: # kubernetes.synthetic-logger-0.25lps-pod_default-synth-lgr # This makes it easier for users to search for logs by pod name or by # the name of the Kubernetes container regardless of how many times the # Kubernetes pod has been restarted (resulting in a several Docker container IDs). -# TODO: Propagate the labels associated with a container along with its logs -# so users can query logs using labels as well as or instead of the pod name -# and container name. type tail format json time_key time - path /varlog/containers/*.log - pos_file /varlog/gcp-containers.log.pos + path /var/log/containers/*.log + pos_file /var/log/gcp-containers.log.pos time_format %Y-%m-%dT%H:%M:%S tag reform.* read_from_head true @@ -59,70 +56,70 @@ type record_reformer enable_ruby true - tag kubernetes.${tag_suffix[3].split('-')[0..-2].join('-')} + tag kubernetes.${tag_suffix[4].split('-')[0..-2].join('-')} type tail format none - path /varlog/salt/minion - pos_file /varlog/gcp-salt.pos + path /var/log/salt/minion + pos_file /var/log/gcp-salt.pos tag salt type tail format none - path /varlog/startupscript.log - pos_file /varlog/gcp-startupscript.log.pos + path /var/log/startupscript.log + pos_file /var/log/gcp-startupscript.log.pos tag startupscript type tail format none - path /varlog/docker.log - pos_file /varlog/gcp-docker.log.pos + path /var/log/docker.log + pos_file /var/log/gcp-docker.log.pos tag docker type tail format none - path /varlog/etcd.log - pos_file /varlog/gcp-etcd.log.pos + path /var/log/etcd.log + pos_file /var/log/gcp-etcd.log.pos tag etcd type tail format none - path /varlog/kubelet.log - pos_file /varlog/gcp-kubelet.log.pos + path /var/log/kubelet.log + pos_file /var/log/gcp-kubelet.log.pos tag kubelet type tail format none - path /varlog/kube-apiserver.log - pos_file /varlog/gcp-kube-apiserver.log.pos + path /var/log/kube-apiserver.log + pos_file /var/log/gcp-kube-apiserver.log.pos tag kube-apiserver type tail format none - path /varlog/kube-controller-manager.log - pos_file /varlog/gcp-kube-controller-manager.log.pos + path /var/log/kube-controller-manager.log + pos_file /var/log/gcp-kube-controller-manager.log.pos tag kube-controller-manager type tail format none - path /varlog/kube-scheduler.log - pos_file /varlog/gcp-kube-scheduler.log.pos + path /var/log/kube-scheduler.log + pos_file /var/log/gcp-kube-scheduler.log.pos tag kube-scheduler diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml index 6eae4c8f699..14fb2f8f045 100644 --- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml +++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml @@ -16,15 +16,15 @@ spec: value: -q volumeMounts: - name: varlog - mountPath: /varlog - - name: containers + mountPath: /var/log + - name: varlibdockercontainers mountPath: /var/lib/docker/containers terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: path: /var/log - - name: containers + - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers diff --git a/docs/getting-started-guides/logging.md b/docs/getting-started-guides/logging.md index 95460f9de1d..967fe9caaf3 100644 --- a/docs/getting-started-guides/logging.md +++ b/docs/getting-started-guides/logging.md @@ -179,15 +179,15 @@ spec: value: -q volumeMounts: - name: varlog - mountPath: /varlog - - name: containers + mountPath: /var/log + - name: varlibdockercontainers mountPath: /var/lib/docker/containers terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: path: /var/log - - name: containers + - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers ``` From f691dca96e6fdab62329478980836e81616f1874 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Fri, 11 Sep 2015 16:50:34 -0700 Subject: [PATCH 2/3] Don't install unused configs in the fluentd-gcp Dockerfile. --- cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile index ff6836ba9a7..ef9c58e2467 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile @@ -11,6 +11,8 @@ MAINTAINER Alex Robinson "arob@google.com" # Disable prompts from apt. ENV DEBIAN_FRONTEND noninteractive +# Keeps unneeded configs from being installed along with fluentd. +ENV DO_NOT_INSTALL_CATCH_ALL_CONFIG true RUN apt-get -q update && \ apt-get install -y curl && \ From 8040f75594292484db0fa0ee521211ba7ef84082 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Fri, 11 Sep 2015 16:53:18 -0700 Subject: [PATCH 3/3] Update fluentd-gcp to use a new google-fluentd build with my recent changes to fluent-plugin-google-cloud to attach Kubernetes metadata to logs. Along with this, separate logs from containers in the cluster out from logs from the daemons running on the node by instantiating two instances of the output plugin, one which uses the new metadata (for containers) and one which doesn't (for things like docker and the kubelet). --- .../fluentd-gcp/fluentd-gcp-image/Makefile | 2 +- .../fluentd-gcp-image/google-fluentd.conf | 24 ++++++++++++++++--- .../salt/fluentd-gcp/fluentd-gcp.yaml | 2 +- docs/getting-started-guides/logging.md | 2 +- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile index cfbe2def814..a0a808b37ea 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile @@ -14,7 +14,7 @@ .PHONY: kbuild kpush -TAG = 1.12 +TAG = 1.13 # Rules for building the test image for deployment to Dockerhub with user kubernetes. diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index 89ea437e0f1..25a49850f01 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -123,16 +123,34 @@ tag kube-scheduler - +# We use 2 output stanzas - one to handle the container logs and one to handle +# the node daemon logs, the latter of which explicitly sends its logs to the +# compute.googleapis.com service rather than container.googleapis.com to keep +# them separate since most users don't care about the node logs. + type google_cloud # Set the chunk limit conservatively to avoid exceeding the GCL limit # of 2MB per write request. buffer_chunk_limit 512K - # Cap buffer memory usage to 512KB/chunk * 128 chunks = 65 MB - buffer_queue_limit 128 + # Cap the combined memory usage of this buffer and the one below to + # 512KB/chunk * (96 + 32) chunks = 65 MB + buffer_queue_limit 96 + # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s # Never wait longer than 5 minutes between retries. max_retry_wait 300 # Disable the limit on the number of retries (retry forever). disable_retry_limit + +# Keep a smaller buffer here since these logs are less important than the user's +# container logs. + + type google_cloud + detect_subservice false + buffer_chunk_limit 512K + buffer_queue_limit 32 + flush_interval 5s + max_retry_wait 300 + disable_retry_limit + diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml index 14fb2f8f045..510425b6ca3 100644 --- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml +++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml @@ -6,7 +6,7 @@ metadata: spec: containers: - name: fluentd-cloud-logging - image: gcr.io/google_containers/fluentd-gcp:1.12 + image: gcr.io/google_containers/fluentd-gcp:1.13 resources: limits: cpu: 100m diff --git a/docs/getting-started-guides/logging.md b/docs/getting-started-guides/logging.md index 967fe9caaf3..dacc0fc21a9 100644 --- a/docs/getting-started-guides/logging.md +++ b/docs/getting-started-guides/logging.md @@ -169,7 +169,7 @@ metadata: spec: containers: - name: fluentd-cloud-logging - image: gcr.io/google_containers/fluentd-gcp:1.12 + image: gcr.io/google_containers/fluentd-gcp:1.13 resources: limits: cpu: 100m