diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
index 2b38e4ae8c0..f7ed180d190 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
@@ -22,7 +22,6 @@
FROM gcr.io/google_containers/ubuntu-slim:0.6
-
# Disable prompts from apt
ENV DEBIAN_FRONTEND noninteractive
@@ -37,6 +36,7 @@ RUN apt-get -qq update && \
td-agent-gem install --no-document fluent-plugin-systemd -v 0.0.5 && \
td-agent-gem install --no-document fluent-plugin-google-cloud -v 0.5.2 && \
td-agent-gem install --no-document fluent-plugin-detect-exceptions -v 0.0.4 && \
+ td-agent-gem install --no-document fluent-plugin-prometheus -v 0.2.1 && \
# Remove build tools
apt-get remove -y -qq gcc make && \
apt-get autoremove -y -qq && \
@@ -56,5 +56,7 @@ COPY fluent.conf /etc/td-agent/td-agent.conf
# Copy the entrypoint for the container
COPY run.sh /run.sh
+EXPOSE 80
+
# Start Fluentd to pick up our config that watches Docker container logs.
CMD /run.sh $FLUENTD_ARGS
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
index b154b824fcd..cdf36cc31a9 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
@@ -26,7 +26,7 @@
.PHONY: build push
PREFIX=gcr.io/google_containers
-TAG = 1.38
+TAG = 1.40
build:
docker build --pull -t $(PREFIX)/fluentd-gcp:$(TAG) .
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf
index 775c78ccd15..3ca289232bf 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/fluent.conf
@@ -75,13 +75,30 @@
# Detect exceptions in the log output and forward them as one log entry.
- type detect_exceptions
- remove_tag_prefix raw
- message log
- stream stream
- multiline_flush_interval 5
- max_bytes 500000
- max_lines 1000
+ @type copy
+
+
+ @type prometheus
+
+
+ type counter
+ name logging_line_count
+ desc Total number of lines generated by application containers
+
+ tag ${tag}
+
+
+
+
+ @type detect_exceptions
+
+ remove_tag_prefix raw
+ message log
+ stream stream
+ multiline_flush_interval 5
+ max_bytes 500000
+ max_lines 1000
+
# Example:
@@ -283,46 +300,96 @@
tag kubelet
+# Prometheus monitoring
+
+ @type prometheus
+ port 80
+
+
+
+ @type prometheus_monitor
+
+
+
+ @type null
+
+
# We use 2 output stanzas - one to handle the container logs and one to handle
# the node daemon logs, the latter of which explicitly sends its logs to the
# compute.googleapis.com service rather than container.googleapis.com to keep
# them separate since most users don't care about the node logs.
- type google_cloud
- # Set the buffer type to file to improve the reliability and reduce the memory consumption
- buffer_type file
- buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
- # Set queue_full action to block because we want to pause gracefully
- # in case of the off-the-limits load instead of throwing an exception
- buffer_queue_full_action block
- # Set the chunk limit conservatively to avoid exceeding the GCL limit
- # of 10MiB per write request.
- buffer_chunk_limit 2M
- # Cap the combined memory usage of this buffer and the one below to
- # 2MiB/chunk * (6 + 2) chunks = 16 MiB
- buffer_queue_limit 6
- # Never wait more than 5 seconds before flushing logs in the non-error case.
- flush_interval 5s
- # Never wait longer than 30 seconds between retries.
- max_retry_wait 30
- # Disable the limit on the number of retries (retry forever).
- disable_retry_limit
- # Use multiple threads for processing.
- num_threads 2
+ @type copy
+
+
+ @type google_cloud
+
+ # Set the buffer type to file to improve the reliability and reduce the memory consumption
+ buffer_type file
+ buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
+ # Set queue_full action to block because we want to pause gracefully
+ # in case of the off-the-limits load instead of throwing an exception
+ buffer_queue_full_action block
+ # Set the chunk limit conservatively to avoid exceeding the GCL limit
+ # of 10MiB per write request.
+ buffer_chunk_limit 2M
+ # Cap the combined memory usage of this buffer and the one below to
+ # 2MiB/chunk * (6 + 2) chunks = 16 MiB
+ buffer_queue_limit 6
+ # Never wait more than 5 seconds before flushing logs in the non-error case.
+ flush_interval 5s
+ # Never wait longer than 30 seconds between retries.
+ max_retry_wait 30
+ # Disable the limit on the number of retries (retry forever).
+ disable_retry_limit
+ # Use multiple threads for processing.
+ num_threads 2
+
+
+ @type prometheus
+
+
+ type counter
+ name logging_entry_count
+ desc Total number of log entries generated by application containers
+
+ tag ${tag}
+ component container
+
+
+
# Keep a smaller buffer here since these logs are less important than the user's
# container logs.
- type google_cloud
- detect_subservice false
- buffer_type file
- buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
- buffer_queue_full_action block
- buffer_chunk_limit 2M
- buffer_queue_limit 2
- flush_interval 5s
- max_retry_wait 30
- disable_retry_limit
- num_threads 2
+ @type copy
+
+
+ @type google_cloud
+
+ detect_subservice false
+ buffer_type file
+ buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
+ buffer_queue_full_action block
+ buffer_chunk_limit 2M
+ buffer_queue_limit 2
+ flush_interval 5s
+ max_retry_wait 30
+ disable_retry_limit
+ num_threads 2
+
+
+ @type prometheus
+
+
+ type counter
+ name logging_entry_count
+ desc Total number of log entries generated by system components
+
+ tag ${tag}
+ component system
+
+
+