From 95a4ea3fc87dc49e8337ecbe404ca5c6a968ee7f Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Tue, 26 Jan 2016 23:57:02 +0000 Subject: [PATCH 1/3] Increase the fluentd buffer chunk size to improve write throughput. Also reduce the max wait between retries, 30 seconds should be more than enough backoff. --- .../fluentd-es-image/td-agent.conf | 8 ++++---- .../fluentd-gcp-image/google-fluentd.conf | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf index 4a155ac4296..ddfe37c979f 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf @@ -186,12 +186,12 @@ port 9200 logstash_format true # Set the chunk limit the same as for fluentd-gcp. - buffer_chunk_limit 512K - # Cap buffer memory usage to 512KB/chunk * 128 chunks = 65 MB - buffer_queue_limit 128 + buffer_chunk_limit 2M + # Cap buffer memory usage to 2MiB/chunk * 32 chunks = 64 MiB + buffer_queue_limit 32 flush_interval 5s # Never wait longer than 5 minutes between retries. - max_retry_wait 300 + max_retry_wait 30 # Disable the limit on the number of retries (retry forever). disable_retry_limit diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index 25a49850f01..9b0b7ff4933 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -130,15 +130,15 @@ type google_cloud # Set the chunk limit conservatively to avoid exceeding the GCL limit - # of 2MB per write request. - buffer_chunk_limit 512K + # of 10MiB per write request. + buffer_chunk_limit 2M # Cap the combined memory usage of this buffer and the one below to - # 512KB/chunk * (96 + 32) chunks = 65 MB - buffer_queue_limit 96 + # 2MiB/chunk * (24 + 8) chunks = 64 MiB + buffer_queue_limit 24 # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s - # Never wait longer than 5 minutes between retries. - max_retry_wait 300 + # Never wait longer than 30 seconds between retries. + max_retry_wait 30 # Disable the limit on the number of retries (retry forever). disable_retry_limit @@ -148,9 +148,9 @@ type google_cloud detect_subservice false - buffer_chunk_limit 512K - buffer_queue_limit 32 + buffer_chunk_limit 2M + buffer_queue_limit 8 flush_interval 5s - max_retry_wait 300 + max_retry_wait 30 disable_retry_limit From ac13e851e2acb53179278eb2e818249563c92a15 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Tue, 26 Jan 2016 23:59:27 +0000 Subject: [PATCH 2/3] Don't let fluentd pipe its own logs directly back into itself. --- .../fluentd-elasticsearch/fluentd-es-image/td-agent.conf | 5 +++++ .../addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf index ddfe37c979f..bbe39854181 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/td-agent.conf @@ -100,6 +100,11 @@ # problem yet to be solved as secrets are not usable in static pods which the fluentd # pod must be until a per-node controller is available in Kubernetes. +# Do not directly collect fluentd's own logs to avoid infinite loops. + + type null + + type tail path /var/log/containers/*.log diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index 9b0b7ff4933..98caf02fb60 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -42,6 +42,11 @@ # the name of the Kubernetes container regardless of how many times the # Kubernetes pod has been restarted (resulting in a several Docker container IDs). +# Do not directly collect fluentd's own logs to avoid infinite loops. + + type null + + type tail format json From 6acf2972319e64dc851c5feaac29a4d55944f0a8 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Wed, 27 Jan 2016 00:07:46 +0000 Subject: [PATCH 3/3] Update the fluentd versions to include fixes for #19405. --- cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile | 2 +- cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile | 2 +- cluster/saltbase/salt/fluentd-es/fluentd-es.yaml | 2 +- cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml | 2 +- docs/getting-started-guides/logging.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile index fdae1c06f3c..f20c9c99e2c 100644 --- a/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile +++ b/cluster/addons/fluentd-elasticsearch/fluentd-es-image/Makefile @@ -1,7 +1,7 @@ .PHONY: build push IMAGE = fluentd-elasticsearch -TAG = 1.12 +TAG = 1.13 build: docker build -t gcr.io/google_containers/$(IMAGE):$(TAG) . diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile index ff9a86c4c96..e7a506f3aa6 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile @@ -14,7 +14,7 @@ .PHONY: kbuild kpush -TAG = 1.14 +TAG = 1.15 # Rules for building the test image for deployment to Dockerhub with user kubernetes. diff --git a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml index 6784dd13b33..af662a74811 100644 --- a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml +++ b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: fluentd-elasticsearch - image: gcr.io/google_containers/fluentd-elasticsearch:1.12 + image: gcr.io/google_containers/fluentd-elasticsearch:1.13 resources: limits: cpu: 100m diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml index 90a7d86a019..61869685fbd 100644 --- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml +++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: fluentd-cloud-logging - image: gcr.io/google_containers/fluentd-gcp:1.14 + image: gcr.io/google_containers/fluentd-gcp:1.15 resources: limits: cpu: 100m diff --git a/docs/getting-started-guides/logging.md b/docs/getting-started-guides/logging.md index c0632b8f220..273065f6764 100644 --- a/docs/getting-started-guides/logging.md +++ b/docs/getting-started-guides/logging.md @@ -172,7 +172,7 @@ metadata: spec: containers: - name: fluentd-cloud-logging - image: gcr.io/google_containers/fluentd-gcp:1.14 + image: gcr.io/google_containers/fluentd-gcp:1.15 resources: limits: cpu: 100m