Add fluentd monitoring to fluentd-gcp image

2025-07-27 05:27:21 +00:00 · 2017-02-16 17:04:13 +01:00 · 2017-02-16 17:04:13 +01:00 · 8d2d91070a
commit 8d2d91070a
parent 8ecc256e88
3 changed files with 110 additions and 41 deletions
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
@ -22,7 +22,6 @@
 FROM gcr.io/google_containers/ubuntu-slim:0.6
 # Disable prompts from apt
 ENV DEBIAN_FRONTEND noninteractive
@ -37,6 +36,7 @@ RUN apt-get -qq update && \
    td-agent-gem install --no-document fluent-plugin-systemd -v 0.0.5 && \
    td-agent-gem install --no-document fluent-plugin-google-cloud -v 0.5.2 && \
    td-agent-gem install --no-document fluent-plugin-detect-exceptions -v 0.0.4 && \
    td-agent-gem install --no-document fluent-plugin-prometheus -v 0.2.1 && \
    # Remove build tools
    apt-get remove -y -qq gcc make && \
    apt-get autoremove -y -qq && \
@ -56,5 +56,7 @@ COPY fluent.conf /etc/td-agent/td-agent.conf
 # Copy the entrypoint for the container
 COPY run.sh /run.sh
 EXPOSE 80
 # Start Fluentd to pick up our config that watches Docker container logs.
 CMD /run.sh $FLUENTD_ARGS
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
@ -26,7 +26,7 @@
 .PHONY:	build push
 PREFIX=gcr.io/google_containers
-TAG = 1.38
+TAG = 1.40
 build:
 	docker build --pull -t $(PREFIX)/fluentd-gcp:$(TAG) .
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf
@ -75,13 +75,30 @@
 # Detect exceptions in the log output and forward them as one log entry.
 <match raw.kubernetes.**>
-  type detect_exceptions
+  @type copy
-  remove_tag_prefix raw
+
-  message log
+  <store>
-  stream stream
+    @type prometheus
-  multiline_flush_interval 5
+
-  max_bytes 500000
+    <metric>
-  max_lines 1000
+      type counter
      name logging_line_count
      desc Total number of lines generated by application containers
      <labels>
        tag ${tag}
      </labels>
    </metric>
  </store>
  <store>
    @type detect_exceptions
    remove_tag_prefix raw
    message log
    stream stream
    multiline_flush_interval 5
    max_bytes 500000
    max_lines 1000
  </store>
 </match>
 # Example:
@ -283,46 +300,96 @@
  tag kubelet
 </source>
 # Prometheus monitoring
 <source>
  @type prometheus
  port 80
 </source>
 <source>
  @type prometheus_monitor
 </source>
 <match fluent.**>
  @type null
 </match>
 # We use 2 output stanzas - one to handle the container logs and one to handle
 # the node daemon logs, the latter of which explicitly sends its logs to the
 # compute.googleapis.com service rather than container.googleapis.com to keep
 # them separate since most users don't care about the node logs.
 <match kubernetes.**>
-  type google_cloud
+  @type copy
-  # Set the buffer type to file to improve the reliability and reduce the memory consumption
+
-  buffer_type file
+  <store>
-  buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
+    @type google_cloud
-  # Set queue_full action to block because we want to pause gracefully
+
-  # in case of the off-the-limits load instead of throwing an exception
+    # Set the buffer type to file to improve the reliability and reduce the memory consumption
-  buffer_queue_full_action block
+    buffer_type file
-  # Set the chunk limit conservatively to avoid exceeding the GCL limit
+    buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
-  # of 10MiB per write request.
+    # Set queue_full action to block because we want to pause gracefully
-  buffer_chunk_limit 2M
+    # in case of the off-the-limits load instead of throwing an exception
-  # Cap the combined memory usage of this buffer and the one below to
+    buffer_queue_full_action block
-  # 2MiB/chunk * (6 + 2) chunks = 16 MiB
+    # Set the chunk limit conservatively to avoid exceeding the GCL limit
-  buffer_queue_limit 6
+    # of 10MiB per write request.
-  # Never wait more than 5 seconds before flushing logs in the non-error case.
+    buffer_chunk_limit 2M
-  flush_interval 5s
+    # Cap the combined memory usage of this buffer and the one below to
-  # Never wait longer than 30 seconds between retries.
+    # 2MiB/chunk * (6 + 2) chunks = 16 MiB
-  max_retry_wait 30
+    buffer_queue_limit 6
-  # Disable the limit on the number of retries (retry forever).
+    # Never wait more than 5 seconds before flushing logs in the non-error case.
-  disable_retry_limit
+    flush_interval 5s
-  # Use multiple threads for processing.
+    # Never wait longer than 30 seconds between retries.
-  num_threads 2
+    max_retry_wait 30
    # Disable the limit on the number of retries (retry forever).
    disable_retry_limit
    # Use multiple threads for processing.
    num_threads 2
  </store>
  <store>
    @type prometheus
    <metric>
      type counter
      name logging_entry_count
      desc Total number of log entries generated by application containers
      <labels>
        tag ${tag}
        component container
      </labels>
    </metric>
  </store>
 </match>
 # Keep a smaller buffer here since these logs are less important than the user's
 # container logs.
 <match **>
-  type google_cloud
+  @type copy
-  detect_subservice false
+
-  buffer_type file
+  <store>
-  buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
+    @type google_cloud
-  buffer_queue_full_action block
+
-  buffer_chunk_limit 2M
+    detect_subservice false
-  buffer_queue_limit 2
+    buffer_type file
-  flush_interval 5s
+    buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
-  max_retry_wait 30
+    buffer_queue_full_action block
-  disable_retry_limit
+    buffer_chunk_limit 2M
-  num_threads 2
+    buffer_queue_limit 2
    flush_interval 5s
    max_retry_wait 30
    disable_retry_limit
    num_threads 2
  </store>
  <store>
    @type prometheus
    <metric>
      type counter
      name logging_entry_count
      desc Total number of log entries generated by system components
      <labels>
        tag ${tag}
        component system
      </labels>
    </metric>
  </store>
 </match>