diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile index f2244a57bd9..59316c69729 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile @@ -21,36 +21,38 @@ # in the Google Developer Console. FROM gcr.io/google_containers/ubuntu-slim:0.4 -MAINTAINER Alex Robinson "arob@google.com" -# Disable prompts from apt. +MAINTAINER Mik Vyatskov "vmik@google.com" + +# Disable prompts from apt ENV DEBIAN_FRONTEND noninteractive -# Keeps unneeded configs from being installed along with fluentd. -ENV DO_NOT_INSTALL_CATCH_ALL_CONFIG true -RUN apt-get -q update && \ - apt-get install -y curl ca-certificates gcc make bash && \ - apt-get install -y --reinstall lsb-base lsb-release && \ - echo "Installing logging agent" && \ - curl -sSL https://dl.google.com/cloudagents/install-logging-agent.sh | bash && \ - /usr/sbin/google-fluentd-gem install fluent-plugin-record-reformer -v 0.8.1 && \ - /usr/sbin/google-fluentd-gem install fluent-plugin-systemd -v 0.0.3 && \ - apt-get remove -y gcc make && \ - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ - /opt/google-fluentd/embedded/share/doc \ - /opt/google-fluentd/embedded/share/gtk-doc \ - /opt/google-fluentd/embedded/lib/postgresql \ - /opt/google-fluentd/embedded/bin/postgres \ - /opt/google-fluentd/embedded/share/postgresql \ - /var/log/google-fluentd +# Install build tools +RUN apt-get -qq update && \ + apt-get install -y -qq curl ca-certificates gcc make bash sudo && \ + apt-get install -y -qq --reinstall lsb-base lsb-release + +# Install logging agent and required gems +RUN /usr/bin/curl -sSL https://toolbelt.treasuredata.com/sh/install-ubuntu-xenial-td-agent2.sh | sh && \ + sed -i -e "s/USER=td-agent/USER=root/" -e "s/GROUP=td-agent/GROUP=root/" /etc/init.d/td-agent && \ + td-agent-gem install --no-document fluent-plugin-record-reformer -v 0.8.2 && \ + td-agent-gem install --no-document fluent-plugin-systemd -v 0.0.5 && \ + td-agent-gem install --no-document fluent-plugin-google-cloud -v 0.5.2 + +# Remove build tools +RUN apt-get remove -y -qq gcc make && \ + apt-get autoremove -y -qq && \ + apt-get clean -qq + +# Remove unnecessary files +RUN rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Copy the Fluentd configuration files for logging Docker container logs. # Either configuration file can be used by specifying `-c ` as a command # line argument. -COPY google-fluentd.conf /etc/google-fluentd/google-fluentd.conf -COPY google-fluentd-journal.conf /etc/google-fluentd/google-fluentd-journal.conf +RUN rm /etc/td-agent/td-agent.conf +COPY google-fluentd.conf /etc/td-agent/google-fluentd.conf +COPY google-fluentd-journal.conf /etc/td-agent/google-fluentd-journal.conf # Start Fluentd to pick up our config that watches Docker container logs. -CMD /usr/sbin/google-fluentd "$FLUENTD_ARGS" +CMD /usr/sbin/td-agent "$FLUENTD_ARGS" \ No newline at end of file diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile index 696d74dada8..053f3933c1c 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile @@ -26,25 +26,14 @@ # 6. When PR is approved make the gcr.io version of the image: make build push # 7. Revert the referendes to kubernetes/fluentd-gcp:$(TAG) to gcr.io/google_containers/fluentd-gcp:$(TAG) -.PHONY: kbuild kpush +.PHONY: build push -TAG = 1.25 - -# Rules for building the test image for deployment to Dockerhub with user kubernetes. - -kbuild: - docker build -t kubernetes/fluentd-gcp:$(TAG) . - - -kpush: - docker push kubernetes/fluentd-gcp:$(TAG) - - -# Rules for building the real image for deployment to gcr.io +PREFIX=gcr.io/google_containers +TAG = 1.26 build: - docker build -t gcr.io/google_containers/fluentd-gcp:$(TAG) . + docker build -t $(PREFIX)/fluentd-gcp:$(TAG) . push: - gcloud docker -- push gcr.io/google_containers/fluentd-gcp:$(TAG) + gcloud docker -- push $(PREFIX)/fluentd-gcp:$(TAG) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf index d02a7a22780..9b844a25c49 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf @@ -222,12 +222,18 @@ # them separate since most users don't care about the node logs. type google_cloud + # Set the buffer type to file to improve the reliability and reduce the memory consumption + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer + # Set queue_full action to block because we want to pause gracefully + # in case of the off-the-limits load instead of throwing an exception + buffer_queue_full_action block # Set the chunk limit conservatively to avoid exceeding the GCL limit # of 10MiB per write request. buffer_chunk_limit 2M # Cap the combined memory usage of this buffer and the one below to - # 2MiB/chunk * (24 + 8) chunks = 64 MiB - buffer_queue_limit 24 + # 2MiB/chunk * (6 + 2) chunks = 16 MiB + buffer_queue_limit 6 # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s # Never wait longer than 30 seconds between retries. @@ -235,7 +241,7 @@ # Disable the limit on the number of retries (retry forever). disable_retry_limit # Use multiple threads for processing. - num_threads 8 + num_threads 2 # Keep a smaller buffer here since these logs are less important than the user's @@ -243,10 +249,13 @@ type google_cloud detect_subservice false + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer + buffer_queue_full_action block buffer_chunk_limit 2M - buffer_queue_limit 8 + buffer_queue_limit 2 flush_interval 5s max_retry_wait 30 disable_retry_limit - num_threads 8 + num_threads 2 diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index bce71ae584d..37135026192 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -206,12 +206,18 @@ # them separate since most users don't care about the node logs. type google_cloud + # Set the buffer type to file to improve the reliability and reduce the memory consumption + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer + # Set queue_full action to block because we want to pause gracefully + # in case of the off-the-limits load instead of throwing an exception + buffer_queue_full_action block # Set the chunk limit conservatively to avoid exceeding the GCL limit # of 10MiB per write request. buffer_chunk_limit 2M # Cap the combined memory usage of this buffer and the one below to - # 2MiB/chunk * (24 + 8) chunks = 64 MiB - buffer_queue_limit 24 + # 2MiB/chunk * (6 + 2) chunks = 16 MiB + buffer_queue_limit 6 # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s # Never wait longer than 30 seconds between retries. @@ -219,7 +225,7 @@ # Disable the limit on the number of retries (retry forever). disable_retry_limit # Use multiple threads for processing. - num_threads 8 + num_threads 2 # Keep a smaller buffer here since these logs are less important than the user's @@ -227,10 +233,13 @@ type google_cloud detect_subservice false + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer + buffer_queue_full_action block buffer_chunk_limit 2M - buffer_queue_limit 8 + buffer_queue_limit 2 flush_interval 5s max_retry_wait 30 disable_retry_limit - num_threads 8 + num_threads 2