diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile index 2b38e4ae8c0..f7ed180d190 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile @@ -22,7 +22,6 @@ FROM gcr.io/google_containers/ubuntu-slim:0.6 - # Disable prompts from apt ENV DEBIAN_FRONTEND noninteractive @@ -37,6 +36,7 @@ RUN apt-get -qq update && \ td-agent-gem install --no-document fluent-plugin-systemd -v 0.0.5 && \ td-agent-gem install --no-document fluent-plugin-google-cloud -v 0.5.2 && \ td-agent-gem install --no-document fluent-plugin-detect-exceptions -v 0.0.4 && \ + td-agent-gem install --no-document fluent-plugin-prometheus -v 0.2.1 && \ # Remove build tools apt-get remove -y -qq gcc make && \ apt-get autoremove -y -qq && \ @@ -56,5 +56,7 @@ COPY fluent.conf /etc/td-agent/td-agent.conf # Copy the entrypoint for the container COPY run.sh /run.sh +EXPOSE 80 + # Start Fluentd to pick up our config that watches Docker container logs. CMD /run.sh $FLUENTD_ARGS diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile index b154b824fcd..cdf36cc31a9 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile @@ -26,7 +26,7 @@ .PHONY: build push PREFIX=gcr.io/google_containers -TAG = 1.38 +TAG = 1.40 build: docker build --pull -t $(PREFIX)/fluentd-gcp:$(TAG) . diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf index 775c78ccd15..3ca289232bf 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf @@ -75,13 +75,30 @@ # Detect exceptions in the log output and forward them as one log entry. - type detect_exceptions - remove_tag_prefix raw - message log - stream stream - multiline_flush_interval 5 - max_bytes 500000 - max_lines 1000 + @type copy + + + @type prometheus + + + type counter + name logging_line_count + desc Total number of lines generated by application containers + + tag ${tag} + + + + + @type detect_exceptions + + remove_tag_prefix raw + message log + stream stream + multiline_flush_interval 5 + max_bytes 500000 + max_lines 1000 + # Example: @@ -283,46 +300,96 @@ tag kubelet +# Prometheus monitoring + + @type prometheus + port 80 + + + + @type prometheus_monitor + + + + @type null + + # We use 2 output stanzas - one to handle the container logs and one to handle # the node daemon logs, the latter of which explicitly sends its logs to the # compute.googleapis.com service rather than container.googleapis.com to keep # them separate since most users don't care about the node logs. - type google_cloud - # Set the buffer type to file to improve the reliability and reduce the memory consumption - buffer_type file - buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer - # Set queue_full action to block because we want to pause gracefully - # in case of the off-the-limits load instead of throwing an exception - buffer_queue_full_action block - # Set the chunk limit conservatively to avoid exceeding the GCL limit - # of 10MiB per write request. - buffer_chunk_limit 2M - # Cap the combined memory usage of this buffer and the one below to - # 2MiB/chunk * (6 + 2) chunks = 16 MiB - buffer_queue_limit 6 - # Never wait more than 5 seconds before flushing logs in the non-error case. - flush_interval 5s - # Never wait longer than 30 seconds between retries. - max_retry_wait 30 - # Disable the limit on the number of retries (retry forever). - disable_retry_limit - # Use multiple threads for processing. - num_threads 2 + @type copy + + + @type google_cloud + + # Set the buffer type to file to improve the reliability and reduce the memory consumption + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer + # Set queue_full action to block because we want to pause gracefully + # in case of the off-the-limits load instead of throwing an exception + buffer_queue_full_action block + # Set the chunk limit conservatively to avoid exceeding the GCL limit + # of 10MiB per write request. + buffer_chunk_limit 2M + # Cap the combined memory usage of this buffer and the one below to + # 2MiB/chunk * (6 + 2) chunks = 16 MiB + buffer_queue_limit 6 + # Never wait more than 5 seconds before flushing logs in the non-error case. + flush_interval 5s + # Never wait longer than 30 seconds between retries. + max_retry_wait 30 + # Disable the limit on the number of retries (retry forever). + disable_retry_limit + # Use multiple threads for processing. + num_threads 2 + + + @type prometheus + + + type counter + name logging_entry_count + desc Total number of log entries generated by application containers + + tag ${tag} + component container + + + # Keep a smaller buffer here since these logs are less important than the user's # container logs. - type google_cloud - detect_subservice false - buffer_type file - buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer - buffer_queue_full_action block - buffer_chunk_limit 2M - buffer_queue_limit 2 - flush_interval 5s - max_retry_wait 30 - disable_retry_limit - num_threads 2 + @type copy + + + @type google_cloud + + detect_subservice false + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer + buffer_queue_full_action block + buffer_chunk_limit 2M + buffer_queue_limit 2 + flush_interval 5s + max_retry_wait 30 + disable_retry_limit + num_threads 2 + + + @type prometheus + + + type counter + name logging_entry_count + desc Total number of log entries generated by system components + + tag ${tag} + component system + + +