diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile index 696d74dada8..053f3933c1c 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile @@ -26,25 +26,14 @@ # 6. When PR is approved make the gcr.io version of the image: make build push # 7. Revert the referendes to kubernetes/fluentd-gcp:$(TAG) to gcr.io/google_containers/fluentd-gcp:$(TAG) -.PHONY: kbuild kpush +.PHONY: build push -TAG = 1.25 - -# Rules for building the test image for deployment to Dockerhub with user kubernetes. - -kbuild: - docker build -t kubernetes/fluentd-gcp:$(TAG) . - - -kpush: - docker push kubernetes/fluentd-gcp:$(TAG) - - -# Rules for building the real image for deployment to gcr.io +PREFIX=gcr.io/google_containers +TAG = 1.26 build: - docker build -t gcr.io/google_containers/fluentd-gcp:$(TAG) . + docker build -t $(PREFIX)/fluentd-gcp:$(TAG) . push: - gcloud docker -- push gcr.io/google_containers/fluentd-gcp:$(TAG) + gcloud docker -- push $(PREFIX)/fluentd-gcp:$(TAG) diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf index d02a7a22780..9b844a25c49 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd-journal.conf @@ -222,12 +222,18 @@ # them separate since most users don't care about the node logs. type google_cloud + # Set the buffer type to file to improve the reliability and reduce the memory consumption + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer + # Set queue_full action to block because we want to pause gracefully + # in case of the off-the-limits load instead of throwing an exception + buffer_queue_full_action block # Set the chunk limit conservatively to avoid exceeding the GCL limit # of 10MiB per write request. buffer_chunk_limit 2M # Cap the combined memory usage of this buffer and the one below to - # 2MiB/chunk * (24 + 8) chunks = 64 MiB - buffer_queue_limit 24 + # 2MiB/chunk * (6 + 2) chunks = 16 MiB + buffer_queue_limit 6 # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s # Never wait longer than 30 seconds between retries. @@ -235,7 +241,7 @@ # Disable the limit on the number of retries (retry forever). disable_retry_limit # Use multiple threads for processing. - num_threads 8 + num_threads 2 # Keep a smaller buffer here since these logs are less important than the user's @@ -243,10 +249,13 @@ type google_cloud detect_subservice false + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer + buffer_queue_full_action block buffer_chunk_limit 2M - buffer_queue_limit 8 + buffer_queue_limit 2 flush_interval 5s max_retry_wait 30 disable_retry_limit - num_threads 8 + num_threads 2 diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf index bce71ae584d..37135026192 100644 --- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf +++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf @@ -206,12 +206,18 @@ # them separate since most users don't care about the node logs. type google_cloud + # Set the buffer type to file to improve the reliability and reduce the memory consumption + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer + # Set queue_full action to block because we want to pause gracefully + # in case of the off-the-limits load instead of throwing an exception + buffer_queue_full_action block # Set the chunk limit conservatively to avoid exceeding the GCL limit # of 10MiB per write request. buffer_chunk_limit 2M # Cap the combined memory usage of this buffer and the one below to - # 2MiB/chunk * (24 + 8) chunks = 64 MiB - buffer_queue_limit 24 + # 2MiB/chunk * (6 + 2) chunks = 16 MiB + buffer_queue_limit 6 # Never wait more than 5 seconds before flushing logs in the non-error case. flush_interval 5s # Never wait longer than 30 seconds between retries. @@ -219,7 +225,7 @@ # Disable the limit on the number of retries (retry forever). disable_retry_limit # Use multiple threads for processing. - num_threads 8 + num_threads 2 # Keep a smaller buffer here since these logs are less important than the user's @@ -227,10 +233,13 @@ type google_cloud detect_subservice false + buffer_type file + buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer + buffer_queue_full_action block buffer_chunk_limit 2M - buffer_queue_limit 8 + buffer_queue_limit 2 flush_interval 5s max_retry_wait 30 disable_retry_limit - num_threads 8 + num_threads 2