mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #47462 from derekwaynecarr/strip-container-id-events
Automatic merge from submit-queue Strip container id from events **What this PR does / why we need it**: reduces spam events from kubelet in bad pod scenarios **Which issue this PR fixes**: relates to https://github.com/kubernetes/kubernetes/issues/47366 **Special notes for your reviewer**: pods in permanent failure states created unique events **Release note**: ```release-note None ```
This commit is contained in:
commit
562e721ece
@ -233,6 +233,7 @@ func (ds *dockerService) removeContainerLogSymlink(containerID string) error {
|
|||||||
func (ds *dockerService) StartContainer(containerID string) error {
|
func (ds *dockerService) StartContainer(containerID string) error {
|
||||||
err := ds.client.StartContainer(containerID)
|
err := ds.client.StartContainer(containerID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
err = transformStartContainerError(err)
|
||||||
return fmt.Errorf("failed to start container %q: %v", containerID, err)
|
return fmt.Errorf("failed to start container %q: %v", containerID, err)
|
||||||
}
|
}
|
||||||
// Create container log symlink.
|
// Create container log symlink.
|
||||||
|
@ -55,6 +55,10 @@ const (
|
|||||||
var (
|
var (
|
||||||
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
|
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
|
||||||
|
|
||||||
|
// this is hacky, but extremely common.
|
||||||
|
// if a container starts but the executable file is not found, runc gives a message that matches
|
||||||
|
startRE = regexp.MustCompile(`\\\\\\\"(.*)\\\\\\\": executable file not found`)
|
||||||
|
|
||||||
// Docker changes the security option separator from ':' to '=' in the 1.23
|
// Docker changes the security option separator from ':' to '=' in the 1.23
|
||||||
// API version.
|
// API version.
|
||||||
optsSeparatorChangeVersion = semver.MustParse(securityOptSeparatorChangeVersion)
|
optsSeparatorChangeVersion = semver.MustParse(securityOptSeparatorChangeVersion)
|
||||||
@ -359,6 +363,19 @@ func recoverFromCreationConflictIfNeeded(client libdocker.Interface, createConfi
|
|||||||
return client.CreateContainer(createConfig)
|
return client.CreateContainer(createConfig)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// transformStartContainerError does regex parsing on returned error
|
||||||
|
// for where container runtimes are giving less than ideal error messages.
|
||||||
|
func transformStartContainerError(err error) error {
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
matches := startRE.FindStringSubmatch(err.Error())
|
||||||
|
if len(matches) > 0 {
|
||||||
|
return fmt.Errorf("executable not found in $PATH")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// getSecurityOptSeparator returns the security option separator based on the
|
// getSecurityOptSeparator returns the security option separator based on the
|
||||||
// docker API version.
|
// docker API version.
|
||||||
// TODO: Remove this function along with the relevant code when we no longer
|
// TODO: Remove this function along with the relevant code when we no longer
|
||||||
|
@ -55,6 +55,7 @@ go_library(
|
|||||||
"//vendor/github.com/fsnotify/fsnotify:go_default_library",
|
"//vendor/github.com/fsnotify/fsnotify:go_default_library",
|
||||||
"//vendor/github.com/golang/glog:go_default_library",
|
"//vendor/github.com/golang/glog:go_default_library",
|
||||||
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
||||||
|
"//vendor/google.golang.org/grpc:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
||||||
|
@ -24,9 +24,12 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"google.golang.org/grpc"
|
||||||
|
|
||||||
"github.com/armon/circbuf"
|
"github.com/armon/circbuf"
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
|
|
||||||
@ -45,6 +48,30 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/util/tail"
|
"k8s.io/kubernetes/pkg/util/tail"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// recordContainerEvent should be used by the runtime manager for all container related events.
|
||||||
|
// it has sanity checks to ensure that we do not write events that can abuse our masters.
|
||||||
|
// in particular, it ensures that a containerID never appears in an event message as that
|
||||||
|
// is prone to causing a lot of distinct events that do not count well.
|
||||||
|
// it replaces any reference to a containerID with the containerName which is stable, and is what users know.
|
||||||
|
func (m *kubeGenericRuntimeManager) recordContainerEvent(pod *v1.Pod, container *v1.Container, containerID, eventType, reason, message string, args ...interface{}) {
|
||||||
|
ref, err := kubecontainer.GenerateContainerRef(pod, container)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
eventMessage := message
|
||||||
|
if len(args) > 0 {
|
||||||
|
eventMessage = fmt.Sprintf(message, args...)
|
||||||
|
}
|
||||||
|
// this is a hack, but often the error from the runtime includes the containerID
|
||||||
|
// which kills our ability to deduplicate events. this protection makes a huge
|
||||||
|
// difference in the number of unique events
|
||||||
|
if containerID != "" {
|
||||||
|
eventMessage = strings.Replace(eventMessage, containerID, container.Name, -1)
|
||||||
|
}
|
||||||
|
m.recorder.Event(events.ToObjectReference(ref), eventType, reason, eventMessage)
|
||||||
|
}
|
||||||
|
|
||||||
// startContainer starts a container and returns a message indicates why it is failed on error.
|
// startContainer starts a container and returns a message indicates why it is failed on error.
|
||||||
// It starts the container through the following steps:
|
// It starts the container through the following steps:
|
||||||
// * pull the image
|
// * pull the image
|
||||||
@ -74,15 +101,16 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
|
|||||||
|
|
||||||
containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef)
|
containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err)
|
m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
|
||||||
return "Generate Container Config Failed", err
|
return "Generate Container Config Failed", err
|
||||||
}
|
}
|
||||||
containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
|
containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err)
|
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
|
||||||
return "Create Container Failed", err
|
return "Create Container Failed", err
|
||||||
}
|
}
|
||||||
m.recorder.Eventf(ref, v1.EventTypeNormal, events.CreatedContainer, "Created container with id %v", containerID)
|
m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, "Created container")
|
||||||
|
|
||||||
if ref != nil {
|
if ref != nil {
|
||||||
m.containerRefManager.SetRef(kubecontainer.ContainerID{
|
m.containerRefManager.SetRef(kubecontainer.ContainerID{
|
||||||
Type: m.runtimeName,
|
Type: m.runtimeName,
|
||||||
@ -93,11 +121,10 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
|
|||||||
// Step 3: start the container.
|
// Step 3: start the container.
|
||||||
err = m.runtimeService.StartContainer(containerID)
|
err = m.runtimeService.StartContainer(containerID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToStartContainer,
|
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err))
|
||||||
"Failed to start container with id %v with error: %v", containerID, err)
|
|
||||||
return "Start Container Failed", err
|
return "Start Container Failed", err
|
||||||
}
|
}
|
||||||
m.recorder.Eventf(ref, v1.EventTypeNormal, events.StartedContainer, "Started container with id %v", containerID)
|
m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, "Started container")
|
||||||
|
|
||||||
// Symlink container logs to the legacy container log location for cluster logging
|
// Symlink container logs to the legacy container log location for cluster logging
|
||||||
// support.
|
// support.
|
||||||
@ -120,8 +147,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
|
|||||||
}
|
}
|
||||||
msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
|
msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
|
||||||
if handlerErr != nil {
|
if handlerErr != nil {
|
||||||
err := fmt.Errorf("PostStart handler: %v", handlerErr)
|
m.recordContainerEvent(pod, container, kubeContainerID.ID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
|
||||||
m.generateContainerEvent(kubeContainerID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
|
|
||||||
m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil)
|
m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil)
|
||||||
return "PostStart Hook Failed", err
|
return "PostStart Hook Failed", err
|
||||||
}
|
}
|
||||||
@ -439,16 +465,6 @@ func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName strin
|
|||||||
return cStatus
|
return cStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
// generateContainerEvent generates an event for the container.
|
|
||||||
func (m *kubeGenericRuntimeManager) generateContainerEvent(containerID kubecontainer.ContainerID, eventType, reason, message string) {
|
|
||||||
ref, ok := m.containerRefManager.GetRef(containerID)
|
|
||||||
if !ok {
|
|
||||||
glog.Warningf("No ref for container %q", containerID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
m.recorder.Event(events.ToObjectReference(ref), eventType, reason, message)
|
|
||||||
}
|
|
||||||
|
|
||||||
// executePreStopHook runs the pre-stop lifecycle hooks if applicable and returns the duration it takes.
|
// executePreStopHook runs the pre-stop lifecycle hooks if applicable and returns the duration it takes.
|
||||||
func (m *kubeGenericRuntimeManager) executePreStopHook(pod *v1.Pod, containerID kubecontainer.ContainerID, containerSpec *v1.Container, gracePeriod int64) int64 {
|
func (m *kubeGenericRuntimeManager) executePreStopHook(pod *v1.Pod, containerID kubecontainer.ContainerID, containerSpec *v1.Container, gracePeriod int64) int64 {
|
||||||
glog.V(3).Infof("Running preStop hook for container %q", containerID.String())
|
glog.V(3).Infof("Running preStop hook for container %q", containerID.String())
|
||||||
@ -460,7 +476,7 @@ func (m *kubeGenericRuntimeManager) executePreStopHook(pod *v1.Pod, containerID
|
|||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
if msg, err := m.runner.Run(containerID, pod, containerSpec, containerSpec.Lifecycle.PreStop); err != nil {
|
if msg, err := m.runner.Run(containerID, pod, containerSpec, containerSpec.Lifecycle.PreStop); err != nil {
|
||||||
glog.Errorf("preStop hook for container %q failed: %v", containerSpec.Name, err)
|
glog.Errorf("preStop hook for container %q failed: %v", containerSpec.Name, err)
|
||||||
m.generateContainerEvent(containerID, v1.EventTypeWarning, events.FailedPreStopHook, msg)
|
m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeWarning, events.FailedPreStopHook, msg)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@ -568,7 +584,7 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
|
|||||||
if reason != "" {
|
if reason != "" {
|
||||||
message = fmt.Sprint(message, ":", reason)
|
message = fmt.Sprint(message, ":", reason)
|
||||||
}
|
}
|
||||||
m.generateContainerEvent(containerID, v1.EventTypeNormal, events.KillingContainer, message)
|
m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
|
||||||
m.containerRefManager.ClearRef(containerID)
|
m.containerRefManager.ClearRef(containerID)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
|
@ -180,7 +180,13 @@ func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) {
|
|||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Error syncing pod %s (%q), skipping: %v", update.Pod.UID, format.Pod(update.Pod), err)
|
glog.Errorf("Error syncing pod %s (%q), skipping: %v", update.Pod.UID, format.Pod(update.Pod), err)
|
||||||
p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "Error syncing pod, skipping: %v", err)
|
// if we failed sync, we throw more specific events for why it happened.
|
||||||
|
// as a result, i question the value of this event.
|
||||||
|
// TODO: determine if we can remove this in a future release.
|
||||||
|
// do not include descriptive text that can vary on why it failed so in a pathological
|
||||||
|
// scenario, kubelet does not create enough discrete events that miss default aggregation
|
||||||
|
// window.
|
||||||
|
p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "Error syncing pod")
|
||||||
}
|
}
|
||||||
p.wrapUp(update.Pod.UID, err)
|
p.wrapUp(update.Pod.UID, err)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user