Switch to fluentd kubernetes metadata plugin to enrich logs with more searchable kubernetes metadata

This commit is contained in:
Jimmi Dyson 2015-05-21 20:04:01 +01:00
parent 3dd3aa2730
commit e07da97447
4 changed files with 99 additions and 64 deletions

View File

@ -4,12 +4,13 @@
# TODO(a-robinson): Use a lighter base image, e.g. some form of busybox. # TODO(a-robinson): Use a lighter base image, e.g. some form of busybox.
# The image acts as an executable for the binary /usr/sbin/td-agent. # The image acts as an executable for the binary /usr/sbin/td-agent.
# Note that fluentd is run with root permssion to allow access to # Note that fluentd is run with root permssion to allow access to
# log files with root only access under /var/lib/docker/containers/* # log files with root only access under /var/log/containers/*
# Please see http://docs.fluentd.org/articles/install-by-deb for more # Please see http://docs.fluentd.org/articles/install-by-deb for more
# information about installing fluentd using deb package. # information about installing fluentd using deb package.
FROM ubuntu:14.04 FROM ubuntu:14.04
MAINTAINER Alex Robinson "arob@google.com" MAINTAINER Alex Robinson "arob@google.com"
MAINTAINER Jimmi Dyson "jimmidyson@gmail.com"
# Ensure there are enough file descriptors for running Fluentd. # Ensure there are enough file descriptors for running Fluentd.
RUN ulimit -n 65536 RUN ulimit -n 65536
@ -19,9 +20,9 @@ ENV DEBIAN_FRONTEND noninteractive
# Install prerequisites. # Install prerequisites.
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y curl && \ apt-get install -y -q curl make g++ && \
apt-get install -y -q libcurl4-openssl-dev make && \ apt-get clean && \
apt-get clean rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install Fluentd. # Install Fluentd.
RUN /usr/bin/curl -L https://td-toolbelt.herokuapp.com/sh/install-ubuntu-trusty-td-agent2.sh | sh RUN /usr/bin/curl -L https://td-toolbelt.herokuapp.com/sh/install-ubuntu-trusty-td-agent2.sh | sh
@ -31,13 +32,10 @@ RUN /usr/bin/curl -L https://td-toolbelt.herokuapp.com/sh/install-ubuntu-trusty-
RUN sed -i -e "s/USER=td-agent/USER=root/" -e "s/GROUP=td-agent/GROUP=root/" /etc/init.d/td-agent RUN sed -i -e "s/USER=td-agent/USER=root/" -e "s/GROUP=td-agent/GROUP=root/" /etc/init.d/td-agent
# Install the Elasticsearch Fluentd plug-in. # Install the Elasticsearch Fluentd plug-in.
RUN /usr/sbin/td-agent-gem install fluent-plugin-elasticsearch RUN td-agent-gem install fluent-plugin-kubernetes_metadata_filter fluent-plugin-elasticsearch
# Install the record reformer plugin.
RUN /usr/sbin/td-agent-gem install fluent-plugin-record-reformer
# Copy the Fluentd configuration file. # Copy the Fluentd configuration file.
COPY td-agent.conf /etc/td-agent/td-agent.conf COPY td-agent.conf /etc/td-agent/td-agent.conf
# Run the Fluentd service. # Run the Fluentd service.
CMD /usr/sbin/td-agent "$FLUENTD_ARGS" ENTRYPOINT ["td-agent"]

View File

@ -1,7 +1,7 @@
.PHONY: build push .PHONY: build push
IMAGE = fluentd-elasticsearch IMAGE = fluentd-elasticsearch
TAG = 1.10 TAG = 1.11
build: build:
docker build -t gcr.io/google_containers/$(IMAGE):$(TAG) . docker build -t gcr.io/google_containers/$(IMAGE):$(TAG) .

View File

@ -1,11 +1,17 @@
# This configuration file for Fluentd / td-agent is used # This configuration file for Fluentd / td-agent is used
# to watch changes to Docker log files that live in the # to watch changes to Docker log files. The kubelet creates symlinks that
# directory /var/lib/docker/containers/ and are symbolically # capture the pod name, namespace, container name & Docker container ID
# linked to from the /varlog directory using names that capture the # to the docker logs for pods in the /var/log/containers directory on the host.
# pod name and container name. These logs are then submitted to # If running this fluentd configuration in a Docker container, the /var/log
# Elasticsearch which assumes the installation of the fluentd-elasticsearch plug-in. # directory should be mounted in the container.
# See https://github.com/uken/fluent-plugin-elasticsearch for #
# more information about the plug-in. # These logs are then submitted to Elasticsearch which assumes the
# installation of the fluent-plugin-elasticsearch & the
# fluent-plugin-kubernetes_metadata_filter plugins.
# See https://github.com/uken/fluent-plugin-elasticsearch &
# https://github.com/fabric8io/fluent-plugin-kubernetes_metadata_filter for
# more information about the plugins.
# Maintainer: Jimmi Dyson <jimmidyson@gmail.com>
# #
# Example # Example
# ======= # =======
@ -31,116 +37,147 @@
# }, # },
# ... # ...
# #
# The record reformer is used to write the tag to focus on the pod name # The Kubernetes fluentd plugin is used to write the Kubernetes metadata to the log
# and the Kubernetes container name. For example a Docker container's logs # record & add labels to the log record if properly configured. This enables users
# might be in the directory: # to filter & search logs on any metadata.
# For example a Docker container's logs might be in the directory:
#
# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
#
# and in the file: # and in the file:
#
# 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
#
# where 997599971ee6... is the Docker ID of the running container. # where 997599971ee6... is the Docker ID of the running container.
# The Kubernetes kubelet makes a symbolic link to this file on the host machine # The Kubernetes kubelet makes a symbolic link to this file on the host machine
# in the /var/log/containers directory which includes the pod name and the Kubernetes # in the /var/log/containers directory which includes the pod name and the Kubernetes
# container name: # container name:
# synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log #
# synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
# -> # ->
# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
# The /var/log directory on the host is mapped to the /varlog directory in the container #
# The /var/log directory on the host is mapped to the /var/log directory in the container
# running this instance of Fluentd and we end up collecting the file: # running this instance of Fluentd and we end up collecting the file:
# /varlog/containers/synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log #
# /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
#
# This results in the tag: # This results in the tag:
# varlog.containers.synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log #
# The record reformer is used is discard the varlog.containers prefix and # var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
# the Docker container ID suffix and "kubernetes." is pre-pended giving the #
# final tag which is ingested into Elasticsearch: # The Kubernetes fluentd plugin is used to extract the namespace, pod name & container name
# kubernetes.synthetic-logger-0.25lps-pod_default-synth-lgr # which are added to the log message as a kubernetes field object & the Docker container ID
# is also added under the docker field object.
# The final tag is:
#
# kubernetes.var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
#
# And the final log record look like:
#
# {
# "log":"2014/09/25 21:15:03 Got request with path wombat\n",
# "stream":"stderr",
# "time":"2014-09-25T21:15:03.499185026Z",
# "kubernetes": {
# "namespace": "default",
# "pod_name": "synthetic-logger-0.25lps-pod",
# "container_name": "synth-lgr"
# },
# "docker": {
# "container_id": "997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b"
# }
# }
#
# This makes it easier for users to search for logs by pod name or by # This makes it easier for users to search for logs by pod name or by
# the name of the Kubernetes container regardless of how many times the # the name of the Kubernetes container regardless of how many times the
# Kubernetes pod has been restarted (resulting in a several Docker container IDs). # Kubernetes pod has been restarted (resulting in a several Docker container IDs).
#
# TODO: Propagate the labels associated with a container along with its logs # TODO: Propagate the labels associated with a container along with its logs
# so users can query logs using labels as well as or instead of the pod name # so users can query logs using labels as well as or instead of the pod name
# and container name. # and container name. This is simply done via configuration of the Kubernetes
# fluentd plugin but requires secrets to be enabled in the fluent pod. This is a
# problem yet to be solved as secrets are not usable in static pods which the fluentd
# pod must be until a per-node controller is available in Kubernetes.
<source> <source>
type tail type tail
format json path /var/log/containers/*.log
time_key time pos_file /var/log/es-containers.log.pos
path /varlog/containers/*.log
pos_file /varlog/es-containers.log.pos
time_format %Y-%m-%dT%H:%M:%S time_format %Y-%m-%dT%H:%M:%S
tag reform.* tag kubernetes.*
format json
read_from_head true read_from_head true
</source> </source>
<match reform.**>
type record_reformer
enable_ruby true
tag kubernetes.${tag_suffix[3].split('-')[0..-2].join('-')}
</match>
<source> <source>
type tail type tail
format none format none
path /varlog/salt/minion path /var/log/salt/minion
pos_file /varlog/gcp-salt.pos pos_file /var/log/gcp-salt.pos
tag salt tag salt
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/startupscript.log path /var/log/startupscript.log
pos_file /varlog/es-startupscript.log.pos pos_file /var/log/es-startupscript.log.pos
tag startupscript tag startupscript
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/docker.log path /var/log/docker.log
pos_file /varlog/es-docker.log.pos pos_file /var/log/es-docker.log.pos
tag docker tag docker
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/etcd.log path /var/log/etcd.log
pos_file /varlog/es-etcd.log.pos pos_file /var/log/es-etcd.log.pos
tag etcd tag etcd
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/kubelet.log path /var/log/kubelet.log
pos_file /varlog/es-kubelet.log.pos pos_file /var/log/es-kubelet.log.pos
tag kubelet tag kubelet
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/kube-apiserver.log path /var/log/kube-apiserver.log
pos_file /varlog/es-kube-apiserver.log.pos pos_file /var/log/es-kube-apiserver.log.pos
tag kube-apiserver tag kube-apiserver
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/kube-controller-manager.log path /var/log/kube-controller-manager.log
pos_file /varlog/es-kube-controller-manager.log.pos pos_file /var/log/es-kube-controller-manager.log.pos
tag kube-controller-manager tag kube-controller-manager
</source> </source>
<source> <source>
type tail type tail
format none format none
path /varlog/kube-scheduler.log path /var/log/kube-scheduler.log
pos_file /varlog/es-kube-scheduler.log.pos pos_file /var/log/es-kube-scheduler.log.pos
tag kube-scheduler tag kube-scheduler
</source> </source>
<filter kubernetes.**>
type kubernetes_metadata
</filter>
<match **> <match **>
type elasticsearch type elasticsearch
log_level info log_level info

View File

@ -6,24 +6,24 @@ metadata:
spec: spec:
containers: containers:
- name: fluentd-elasticsearch - name: fluentd-elasticsearch
image: gcr.io/google_containers/fluentd-elasticsearch:1.9 image: gcr.io/google_containers/fluentd-elasticsearch:1.11
resources: resources:
limits: limits:
cpu: 100m cpu: 100m
env: args:
- name: "FLUENTD_ARGS" - -qq
value: "-q"
volumeMounts: volumeMounts:
- name: varlog - name: varlog
mountPath: /varlog mountPath: /var/log
- name: containers - name: varlibdockercontainers
mountPath: /var/lib/docker/containers mountPath: /var/lib/docker/containers
readOnly: true
terminationGracePeriodSeconds: 30 terminationGracePeriodSeconds: 30
volumes: volumes:
- name: varlog - name: varlog
hostPath: hostPath:
path: /var/log path: /var/log
- name: containers - name: varlibdockercontainers
hostPath: hostPath:
path: /var/lib/docker/containers path: /var/lib/docker/containers