diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
index 696d74dada8..053f3933c1c 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
@@ -26,25 +26,14 @@
# 6. When PR is approved make the gcr.io version of the image: make build push
# 7. Revert the referendes to kubernetes/fluentd-gcp:$(TAG) to gcr.io/google_containers/fluentd-gcp:$(TAG)
-.PHONY: kbuild kpush
+.PHONY: build push
-TAG = 1.25
-
-# Rules for building the test image for deployment to Dockerhub with user kubernetes.
-
-kbuild:
- docker build -t kubernetes/fluentd-gcp:$(TAG) .
-
-
-kpush:
- docker push kubernetes/fluentd-gcp:$(TAG)
-
-
-# Rules for building the real image for deployment to gcr.io
+PREFIX=gcr.io/google_containers
+TAG = 1.26
build:
- docker build -t gcr.io/google_containers/fluentd-gcp:$(TAG) .
+ docker build -t $(PREFIX)/fluentd-gcp:$(TAG) .
push:
- gcloud docker -- push gcr.io/google_containers/fluentd-gcp:$(TAG)
+ gcloud docker -- push $(PREFIX)/fluentd-gcp:$(TAG)
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf
index d02a7a22780..9b844a25c49 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf
@@ -222,12 +222,18 @@
# them separate since most users don't care about the node logs.
type google_cloud
+ # Set the buffer type to file to improve the reliability and reduce the memory consumption
+ buffer_type file
+ buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
+ # Set queue_full action to block because we want to pause gracefully
+ # in case of the off-the-limits load instead of throwing an exception
+ buffer_queue_full_action block
# Set the chunk limit conservatively to avoid exceeding the GCL limit
# of 10MiB per write request.
buffer_chunk_limit 2M
# Cap the combined memory usage of this buffer and the one below to
- # 2MiB/chunk * (24 + 8) chunks = 64 MiB
- buffer_queue_limit 24
+ # 2MiB/chunk * (6 + 2) chunks = 16 MiB
+ buffer_queue_limit 6
# Never wait more than 5 seconds before flushing logs in the non-error case.
flush_interval 5s
# Never wait longer than 30 seconds between retries.
@@ -235,7 +241,7 @@
# Disable the limit on the number of retries (retry forever).
disable_retry_limit
# Use multiple threads for processing.
- num_threads 8
+ num_threads 2
# Keep a smaller buffer here since these logs are less important than the user's
@@ -243,10 +249,13 @@
type google_cloud
detect_subservice false
+ buffer_type file
+ buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
+ buffer_queue_full_action block
buffer_chunk_limit 2M
- buffer_queue_limit 8
+ buffer_queue_limit 2
flush_interval 5s
max_retry_wait 30
disable_retry_limit
- num_threads 8
+ num_threads 2
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf
index bce71ae584d..37135026192 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf
@@ -206,12 +206,18 @@
# them separate since most users don't care about the node logs.
type google_cloud
+ # Set the buffer type to file to improve the reliability and reduce the memory consumption
+ buffer_type file
+ buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
+ # Set queue_full action to block because we want to pause gracefully
+ # in case of the off-the-limits load instead of throwing an exception
+ buffer_queue_full_action block
# Set the chunk limit conservatively to avoid exceeding the GCL limit
# of 10MiB per write request.
buffer_chunk_limit 2M
# Cap the combined memory usage of this buffer and the one below to
- # 2MiB/chunk * (24 + 8) chunks = 64 MiB
- buffer_queue_limit 24
+ # 2MiB/chunk * (6 + 2) chunks = 16 MiB
+ buffer_queue_limit 6
# Never wait more than 5 seconds before flushing logs in the non-error case.
flush_interval 5s
# Never wait longer than 30 seconds between retries.
@@ -219,7 +225,7 @@
# Disable the limit on the number of retries (retry forever).
disable_retry_limit
# Use multiple threads for processing.
- num_threads 8
+ num_threads 2
# Keep a smaller buffer here since these logs are less important than the user's
@@ -227,10 +233,13 @@
type google_cloud
detect_subservice false
+ buffer_type file
+ buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
+ buffer_queue_full_action block
buffer_chunk_limit 2M
- buffer_queue_limit 8
+ buffer_queue_limit 2
flush_interval 5s
max_retry_wait 30
disable_retry_limit
- num_threads 8
+ num_threads 2