From e07da97447e86d5a3f5fcb802b062c098421cb4f Mon Sep 17 00:00:00 2001 From: Jimmi Dyson Date: Thu, 21 May 2015 20:04:01 +0100 Subject: [PATCH] Switch to fluentd kubernetes metadata plugin to enrich logs with more searchable kubernetes metadata --- .../fluentd-es-image/Dockerfile | 16 +-- .../fluentd-es-image/Makefile | 2 +- .../fluentd-es-image/td-agent.conf | 131 +++++++++++------- .../saltbase/salt/fluentd-es/fluentd-es.yaml | 14 +- 4 files changed, 99 insertions(+), 64 deletions(-) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Dockerfile b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Dockerfile index 0ba7f117245..aac23b526cb 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Dockerfile +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Dockerfile @@ -4,12 +4,13 @@ # TODO(a-robinson): Use a lighter base image, e.g. some form of busybox. # The image acts as an executable for the binary /usr/sbin/td-agent. # Note that fluentd is run with root permssion to allow access to -# log files with root only access under /var/lib/docker/containers/* +# log files with root only access under /var/log/containers/* # Please see http://docs.fluentd.org/articles/install-by-deb for more # information about installing fluentd using deb package. FROM ubuntu:14.04 MAINTAINER Alex Robinson "arob@google.com" +MAINTAINER Jimmi Dyson "jimmidyson@gmail.com" # Ensure there are enough file descriptors for running Fluentd. RUN ulimit -n 65536 @@ -19,9 +20,9 @@ ENV DEBIAN_FRONTEND noninteractive # Install prerequisites. RUN apt-get update && \ - apt-get install -y curl && \ - apt-get install -y -q libcurl4-openssl-dev make && \ - apt-get clean + apt-get install -y -q curl make g++ && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Install Fluentd. RUN /usr/bin/curl -L https://td-toolbelt.herokuapp.com/sh/install-ubuntu-trusty-td-agent2.sh | sh @@ -31,13 +32,10 @@ RUN /usr/bin/curl -L https://td-toolbelt.herokuapp.com/sh/install-ubuntu-trusty- RUN sed -i -e "s/USER=td-agent/USER=root/" -e "s/GROUP=td-agent/GROUP=root/" /etc/init.d/td-agent # Install the Elasticsearch Fluentd plug-in. -RUN /usr/sbin/td-agent-gem install fluent-plugin-elasticsearch - -# Install the record reformer plugin. -RUN /usr/sbin/td-agent-gem install fluent-plugin-record-reformer +RUN td-agent-gem install fluent-plugin-kubernetes_metadata_filter fluent-plugin-elasticsearch # Copy the Fluentd configuration file. COPY td-agent.conf /etc/td-agent/td-agent.conf # Run the Fluentd service. -CMD /usr/sbin/td-agent "$FLUENTD_ARGS" +ENTRYPOINT ["td-agent"] diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile index 7a364d6e114..bf80e2aa8e8 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile @@ -1,7 +1,7 @@ .PHONY: build push IMAGE = fluentd-elasticsearch -TAG = 1.10 +TAG = 1.11 build: docker build -t gcr.io/google_containers/$(IMAGE):$(TAG) . diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf index ef21e3e2449..4a155ac4296 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf @@ -1,11 +1,17 @@ # This configuration file for Fluentd / td-agent is used -# to watch changes to Docker log files that live in the -# directory /var/lib/docker/containers/ and are symbolically -# linked to from the /varlog directory using names that capture the -# pod name and container name. These logs are then submitted to -# Elasticsearch which assumes the installation of the fluentd-elasticsearch plug-in. -# See https://github.com/uken/fluent-plugin-elasticsearch for -# more information about the plug-in. +# to watch changes to Docker log files. The kubelet creates symlinks that +# capture the pod name, namespace, container name & Docker container ID +# to the docker logs for pods in the /var/log/containers directory on the host. +# If running this fluentd configuration in a Docker container, the /var/log +# directory should be mounted in the container. +# +# These logs are then submitted to Elasticsearch which assumes the +# installation of the fluent-plugin-elasticsearch & the +# fluent-plugin-kubernetes_metadata_filter plugins. +# See https://github.com/uken/fluent-plugin-elasticsearch & +# https://github.com/fabric8io/fluent-plugin-kubernetes_metadata_filter for +# more information about the plugins. +# Maintainer: Jimmi Dyson # # Example # ======= @@ -31,116 +37,147 @@ # }, # ... # -# The record reformer is used to write the tag to focus on the pod name -# and the Kubernetes container name. For example a Docker container's logs -# might be in the directory: +# The Kubernetes fluentd plugin is used to write the Kubernetes metadata to the log +# record & add labels to the log record if properly configured. This enables users +# to filter & search logs on any metadata. +# For example a Docker container's logs might be in the directory: +# # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b +# # and in the file: +# # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log +# # where 997599971ee6... is the Docker ID of the running container. # The Kubernetes kubelet makes a symbolic link to this file on the host machine # in the /var/log/containers directory which includes the pod name and the Kubernetes # container name: -# synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log -# -> +# +# synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# -> # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log -# The /var/log directory on the host is mapped to the /varlog directory in the container +# +# The /var/log directory on the host is mapped to the /var/log directory in the container # running this instance of Fluentd and we end up collecting the file: -# /varlog/containers/synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# +# /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# # This results in the tag: -# varlog.containers.synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log -# The record reformer is used is discard the varlog.containers prefix and -# the Docker container ID suffix and "kubernetes." is pre-pended giving the -# final tag which is ingested into Elasticsearch: -# kubernetes.synthetic-logger-0.25lps-pod_default-synth-lgr +# +# var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# +# The Kubernetes fluentd plugin is used to extract the namespace, pod name & container name +# which are added to the log message as a kubernetes field object & the Docker container ID +# is also added under the docker field object. +# The final tag is: +# +# kubernetes.var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log +# +# And the final log record look like: +# +# { +# "log":"2014/09/25 21:15:03 Got request with path wombat\n", +# "stream":"stderr", +# "time":"2014-09-25T21:15:03.499185026Z", +# "kubernetes": { +# "namespace": "default", +# "pod_name": "synthetic-logger-0.25lps-pod", +# "container_name": "synth-lgr" +# }, +# "docker": { +# "container_id": "997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b" +# } +# } +# # This makes it easier for users to search for logs by pod name or by # the name of the Kubernetes container regardless of how many times the # Kubernetes pod has been restarted (resulting in a several Docker container IDs). +# # TODO: Propagate the labels associated with a container along with its logs # so users can query logs using labels as well as or instead of the pod name -# and container name. +# and container name. This is simply done via configuration of the Kubernetes +# fluentd plugin but requires secrets to be enabled in the fluent pod. This is a +# problem yet to be solved as secrets are not usable in static pods which the fluentd +# pod must be until a per-node controller is available in Kubernetes. type tail - format json - time_key time - path /varlog/containers/*.log - pos_file /varlog/es-containers.log.pos + path /var/log/containers/*.log + pos_file /var/log/es-containers.log.pos time_format %Y-%m-%dT%H:%M:%S - tag reform.* + tag kubernetes.* + format json read_from_head true - - type record_reformer - enable_ruby true - tag kubernetes.${tag_suffix[3].split('-')[0..-2].join('-')} - - type tail format none - path /varlog/salt/minion - pos_file /varlog/gcp-salt.pos + path /var/log/salt/minion + pos_file /var/log/gcp-salt.pos tag salt type tail format none - path /varlog/startupscript.log - pos_file /varlog/es-startupscript.log.pos + path /var/log/startupscript.log + pos_file /var/log/es-startupscript.log.pos tag startupscript type tail format none - path /varlog/docker.log - pos_file /varlog/es-docker.log.pos + path /var/log/docker.log + pos_file /var/log/es-docker.log.pos tag docker type tail format none - path /varlog/etcd.log - pos_file /varlog/es-etcd.log.pos + path /var/log/etcd.log + pos_file /var/log/es-etcd.log.pos tag etcd type tail format none - path /varlog/kubelet.log - pos_file /varlog/es-kubelet.log.pos + path /var/log/kubelet.log + pos_file /var/log/es-kubelet.log.pos tag kubelet type tail format none - path /varlog/kube-apiserver.log - pos_file /varlog/es-kube-apiserver.log.pos + path /var/log/kube-apiserver.log + pos_file /var/log/es-kube-apiserver.log.pos tag kube-apiserver type tail format none - path /varlog/kube-controller-manager.log - pos_file /varlog/es-kube-controller-manager.log.pos + path /var/log/kube-controller-manager.log + pos_file /var/log/es-kube-controller-manager.log.pos tag kube-controller-manager type tail format none - path /varlog/kube-scheduler.log - pos_file /varlog/es-kube-scheduler.log.pos + path /var/log/kube-scheduler.log + pos_file /var/log/es-kube-scheduler.log.pos tag kube-scheduler + + type kubernetes_metadata + + type elasticsearch log_level info diff --git a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml index e1fda84eacd..caf90526c26 100644 --- a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml +++ b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml @@ -6,24 +6,24 @@ metadata: spec: containers: - name: fluentd-elasticsearch - image: gcr.io/google_containers/fluentd-elasticsearch:1.9 + image: gcr.io/google_containers/fluentd-elasticsearch:1.11 resources: limits: cpu: 100m - env: - - name: "FLUENTD_ARGS" - value: "-q" + args: + - -qq volumeMounts: - name: varlog - mountPath: /varlog - - name: containers + mountPath: /var/log + - name: varlibdockercontainers mountPath: /var/lib/docker/containers + readOnly: true terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: path: /var/log - - name: containers + - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers