migrate pkg/kubelet/preemption to contextual logging

This commit is contained in:
Ed Bartosh
2025-02-02 17:36:18 +02:00
parent b96e3cac74
commit 4bc2ad6eea
7 changed files with 31 additions and 17 deletions

View File

@@ -215,6 +215,7 @@ linters:
contextual k8s.io/kubernetes/pkg/kubelet/kubeletconfig/.*
contextual k8s.io/kubernetes/pkg/kubelet/nodeshutdown/.*
contextual k8s.io/kubernetes/pkg/kubelet/pod/.*
contextual k8s.io/kubernetes/pkg/kubelet/preemption/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -229,6 +229,7 @@ linters:
contextual k8s.io/kubernetes/pkg/kubelet/kubeletconfig/.*
contextual k8s.io/kubernetes/pkg/kubelet/nodeshutdown/.*
contextual k8s.io/kubernetes/pkg/kubelet/pod/.*
contextual k8s.io/kubernetes/pkg/kubelet/preemption/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -61,6 +61,7 @@ contextual k8s.io/kubernetes/pkg/kubelet/apis/.*
contextual k8s.io/kubernetes/pkg/kubelet/kubeletconfig/.*
contextual k8s.io/kubernetes/pkg/kubelet/nodeshutdown/.*
contextual k8s.io/kubernetes/pkg/kubelet/pod/.*
contextual k8s.io/kubernetes/pkg/kubelet/preemption/.*
# As long as contextual logging is alpha or beta, all WithName, WithValues,
# NewContext calls have to go through klog. Once it is GA, we can lift

View File

@@ -17,7 +17,9 @@ limitations under the License.
package lifecycle
import (
"k8s.io/api/core/v1"
"context"
v1 "k8s.io/api/core/v1"
)
// AdmissionFailureHandlerStub is an AdmissionFailureHandler that does not perform any handling of admission failure.
@@ -32,6 +34,6 @@ func NewAdmissionFailureHandlerStub() *AdmissionFailureHandlerStub {
}
// HandleAdmissionFailure simply passes admission rejection on, with no special handling.
func (n *AdmissionFailureHandlerStub) HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) {
func (n *AdmissionFailureHandlerStub) HandleAdmissionFailure(ctx context.Context, admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) {
return failureReasons, nil
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package lifecycle
import (
"context"
"fmt"
"runtime"
@@ -92,7 +93,7 @@ type pluginResourceUpdateFuncType func(*schedulerframework.NodeInfo, *PodAdmitAt
// AdmissionFailureHandler is an interface which defines how to deal with a failure to admit a pod.
// This allows for the graceful handling of pod admission failure.
type AdmissionFailureHandler interface {
HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error)
HandleAdmissionFailure(ctx context.Context, admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error)
}
type predicateAdmitHandler struct {
@@ -114,6 +115,10 @@ func NewPredicateAdmitHandler(getNodeAnyWayFunc getNodeAnyWayFuncType, admission
}
func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult {
// TODO: pass context to Admit when migrating this component to
// contextual logging
ctx := context.TODO()
logger := klog.FromContext(ctx)
node, err := w.getNodeAnyWayFunc()
if err != nil {
klog.ErrorS(err, "Cannot get Node info")
@@ -158,7 +163,7 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
// ensure the node has enough plugin resources for that required in pods
if err = w.pluginResourceUpdateFunc(nodeInfo, attrs); err != nil {
message := fmt.Sprintf("Update plugin resources failed due to %v, which is unexpected.", err)
klog.InfoS("Failed to admit pod", "pod", klog.KObj(admitPod), "message", message)
logger.Info("Failed to admit pod", "pod", klog.KObj(admitPod), "message", message)
return PodAdmitResult{
Admit: false,
Reason: UnexpectedAdmissionError,
@@ -179,11 +184,11 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
reasons := generalFilter(podWithoutMissingExtendedResources, nodeInfo)
fit := len(reasons) == 0
if !fit {
reasons, err = w.admissionFailureHandler.HandleAdmissionFailure(admitPod, reasons)
reasons, err = w.admissionFailureHandler.HandleAdmissionFailure(ctx, admitPod, reasons)
fit = len(reasons) == 0 && err == nil
if err != nil {
message := fmt.Sprintf("Unexpected error while attempting to recover from admission failure: %v", err)
klog.InfoS("Failed to admit pod, unexpected error while attempting to recover from admission failure", "pod", klog.KObj(admitPod), "err", err)
logger.Info("Failed to admit pod, unexpected error while attempting to recover from admission failure", "pod", klog.KObj(admitPod), "err", err)
return PodAdmitResult{
Admit: fit,
Reason: UnexpectedAdmissionError,
@@ -196,7 +201,7 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
var message string
if len(reasons) == 0 {
message = fmt.Sprint("GeneralPredicates failed due to unknown reason, which is unexpected.")
klog.InfoS("Failed to admit pod: GeneralPredicates failed due to unknown reason, which is unexpected", "pod", klog.KObj(admitPod))
logger.Info("Failed to admit pod: GeneralPredicates failed due to unknown reason, which is unexpected", "pod", klog.KObj(admitPod))
return PodAdmitResult{
Admit: fit,
Reason: UnknownReason,
@@ -209,7 +214,7 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
case *PredicateFailureError:
reason = re.PredicateName
message = re.Error()
klog.V(2).InfoS("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message)
logger.V(2).Info("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message)
case *InsufficientResourceError:
switch re.ResourceName {
case v1.ResourceCPU:
@@ -224,11 +229,11 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
reason = fmt.Sprintf("%s%s", InsufficientResourcePrefix, re.ResourceName)
}
message = re.Error()
klog.V(2).InfoS("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message)
logger.V(2).Info("Predicate failed on Pod", "pod", klog.KObj(admitPod), "err", message)
default:
reason = UnexpectedPredicateFailureType
message = fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", r)
klog.InfoS("Failed to admit pod", "pod", klog.KObj(admitPod), "err", message)
logger.Info("Failed to admit pod", "pod", klog.KObj(admitPod), "err", message)
}
return PodAdmitResult{
Admit: fit,

View File

@@ -17,6 +17,7 @@ limitations under the License.
package preemption
import (
"context"
"fmt"
"math"
@@ -60,7 +61,7 @@ func NewCriticalPodAdmissionHandler(getPodsFunc eviction.ActivePodsFunc, killPod
// HandleAdmissionFailure gracefully handles admission rejection, and, in some cases,
// to allow admission of the pod despite its previous failure.
func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []lifecycle.PredicateFailureReason) ([]lifecycle.PredicateFailureReason, error) {
func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(ctx context.Context, admitPod *v1.Pod, failureReasons []lifecycle.PredicateFailureReason) ([]lifecycle.PredicateFailureReason, error) {
if !kubetypes.IsCriticalPod(admitPod) {
return failureReasons, nil
}
@@ -82,7 +83,7 @@ func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(admitPod *v1.Pod, f
// Return only reasons that are not resource related, since critical pods cannot fail admission for resource reasons.
return nonResourceReasons, nil
}
err := c.evictPodsToFreeRequests(admitPod, admissionRequirementList(resourceReasons))
err := c.evictPodsToFreeRequests(ctx, admitPod, admissionRequirementList(resourceReasons))
// if no error is returned, preemption succeeded and the pod is safe to admit.
return nil, err
}
@@ -90,7 +91,8 @@ func (c *CriticalPodAdmissionHandler) HandleAdmissionFailure(admitPod *v1.Pod, f
// evictPodsToFreeRequests takes a list of insufficient resources, and attempts to free them by evicting pods
// based on requests. For example, if the only insufficient resource is 200Mb of memory, this function could
// evict a pod with request=250Mb.
func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod, insufficientResources admissionRequirementList) error {
func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(ctx context.Context, admitPod *v1.Pod, insufficientResources admissionRequirementList) error {
logger := klog.FromContext(ctx)
podsToPreempt, err := getPodsToPreempt(admitPod, c.getPodsFunc(), insufficientResources)
if err != nil {
return fmt.Errorf("preemption: error finding a set of pods to preempt: %v", err)
@@ -99,7 +101,7 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod,
// record that we are evicting the pod
c.recorder.Eventf(pod, v1.EventTypeWarning, events.PreemptContainer, message)
// this is a blocking call and should only return when the pod and its containers are killed.
klog.V(2).InfoS("Preempting pod to free up resources", "pod", klog.KObj(pod), "podUID", pod.UID, "insufficientResources", insufficientResources.toString(), "requestingPod", klog.KObj(admitPod))
logger.V(2).Info("Preempting pod to free up resources", "pod", klog.KObj(pod), "podUID", pod.UID, "insufficientResources", insufficientResources.toString(), "requestingPod", klog.KObj(admitPod))
err := c.killPodFunc(pod, true, nil, func(status *v1.PodStatus) {
status.Phase = v1.PodFailed
status.Reason = events.PreemptContainer
@@ -113,7 +115,7 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod,
})
})
if err != nil {
klog.ErrorS(err, "Failed to evict pod", "pod", klog.KObj(pod))
logger.Error(err, "Failed to evict pod", "pod", klog.KObj(pod))
// In future syncPod loops, the kubelet will retry the pod deletion steps that it was stuck on.
continue
}
@@ -122,7 +124,7 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod,
} else {
metrics.Preemptions.WithLabelValues("").Inc()
}
klog.InfoS("Pod evicted successfully", "pod", klog.KObj(pod))
logger.Info("Pod evicted successfully", "pod", klog.KObj(pod))
}
return nil
}

View File

@@ -26,6 +26,7 @@ import (
"k8s.io/client-go/tools/record"
kubeapi "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/scheduling"
"k8s.io/kubernetes/test/utils/ktesting"
)
const (
@@ -91,6 +92,7 @@ func getTestCriticalPodAdmissionHandler(podProvider *fakePodProvider, podKiller
}
func TestEvictPodsToFreeRequests(t *testing.T) {
tCtx := ktesting.Init(t)
type testRun struct {
testName string
isPodKillerWithError bool
@@ -144,7 +146,7 @@ func TestEvictPodsToFreeRequests(t *testing.T) {
podKiller := newFakePodKiller(r.isPodKillerWithError)
criticalPodAdmissionHandler := getTestCriticalPodAdmissionHandler(podProvider, podKiller)
podProvider.setPods(r.inputPods)
outErr := criticalPodAdmissionHandler.evictPodsToFreeRequests(allPods[clusterCritical], r.insufficientResources)
outErr := criticalPodAdmissionHandler.evictPodsToFreeRequests(tCtx, allPods[clusterCritical], r.insufficientResources)
outputPods := podKiller.getKilledPods()
if !r.expectErr && outErr != nil {
t.Errorf("evictPodsToFreeRequests returned an unexpected error during the %s test. Err: %v", r.testName, outErr)