mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 19:47:56 +00:00
Merge pull request #116690 from smarterclayton/handle_twice
kubelet: HandlePodCleanups takes an extra sync to restart pods
This commit is contained in:
commit
d48c883372
@ -1034,14 +1034,6 @@ func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
|
||||
}
|
||||
|
||||
allPods, mirrorPods := kl.podManager.GetPodsAndMirrorPods()
|
||||
activePods := kl.filterOutInactivePods(allPods)
|
||||
allRegularPods, allStaticPods := splitPodsByStatic(allPods)
|
||||
activeRegularPods, activeStaticPods := splitPodsByStatic(activePods)
|
||||
metrics.DesiredPodCount.WithLabelValues("").Set(float64(len(allRegularPods)))
|
||||
metrics.DesiredPodCount.WithLabelValues("true").Set(float64(len(allStaticPods)))
|
||||
metrics.ActivePodCount.WithLabelValues("").Set(float64(len(activeRegularPods)))
|
||||
metrics.ActivePodCount.WithLabelValues("true").Set(float64(len(activeStaticPods)))
|
||||
metrics.MirrorPodCount.Set(float64(len(mirrorPods)))
|
||||
|
||||
// Pod phase progresses monotonically. Once a pod has reached a final state,
|
||||
// it should never leave regardless of the restart policy. The statuses
|
||||
@ -1139,6 +1131,17 @@ func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
|
||||
klog.V(3).InfoS("Clean up orphaned mirror pods")
|
||||
kl.deleteOrphanedMirrorPods()
|
||||
|
||||
// After pruning pod workers for terminated pods get the list of active pods for
|
||||
// metrics and to determine restarts.
|
||||
activePods := kl.filterOutInactivePods(allPods)
|
||||
allRegularPods, allStaticPods := splitPodsByStatic(allPods)
|
||||
activeRegularPods, activeStaticPods := splitPodsByStatic(activePods)
|
||||
metrics.DesiredPodCount.WithLabelValues("").Set(float64(len(allRegularPods)))
|
||||
metrics.DesiredPodCount.WithLabelValues("true").Set(float64(len(allStaticPods)))
|
||||
metrics.ActivePodCount.WithLabelValues("").Set(float64(len(activeRegularPods)))
|
||||
metrics.ActivePodCount.WithLabelValues("true").Set(float64(len(activeStaticPods)))
|
||||
metrics.MirrorPodCount.Set(float64(len(mirrorPods)))
|
||||
|
||||
// At this point, the pod worker is aware of which pods are not desired (SyncKnownPods).
|
||||
// We now look through the set of active pods for those that the pod worker is not aware of
|
||||
// and deliver an update. The most common reason a pod is not known is because the pod was
|
||||
|
@ -5363,6 +5363,100 @@ func TestKubelet_HandlePodCleanups(t *testing.T) {
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "terminated pod is restarted in the same invocation that it is detected",
|
||||
wantErr: false,
|
||||
pods: []*v1.Pod{
|
||||
func() *v1.Pod {
|
||||
pod := staticPod()
|
||||
pod.Annotations = map[string]string{"version": "2"}
|
||||
return pod
|
||||
}(),
|
||||
},
|
||||
prepareWorker: func(t *testing.T, w *podWorkers, records map[types.UID][]syncPodRecord) {
|
||||
// simulate a delete and recreate of the static pod
|
||||
pod := simplePod()
|
||||
w.UpdatePod(UpdatePodOptions{
|
||||
UpdateType: kubetypes.SyncPodCreate,
|
||||
StartTime: time.Unix(1, 0).UTC(),
|
||||
Pod: pod,
|
||||
})
|
||||
drainAllWorkers(w)
|
||||
w.UpdatePod(UpdatePodOptions{
|
||||
UpdateType: kubetypes.SyncPodKill,
|
||||
Pod: pod,
|
||||
})
|
||||
pod2 := simplePod()
|
||||
pod2.Annotations = map[string]string{"version": "2"}
|
||||
w.UpdatePod(UpdatePodOptions{
|
||||
UpdateType: kubetypes.SyncPodCreate,
|
||||
Pod: pod2,
|
||||
})
|
||||
drainAllWorkers(w)
|
||||
},
|
||||
wantWorker: func(t *testing.T, w *podWorkers, records map[types.UID][]syncPodRecord) {
|
||||
uid := types.UID("1")
|
||||
if len(w.podSyncStatuses) != 1 {
|
||||
t.Fatalf("unexpected sync statuses: %#v", w.podSyncStatuses)
|
||||
}
|
||||
s, ok := w.podSyncStatuses[uid]
|
||||
if !ok || s.IsTerminationRequested() || s.IsTerminationStarted() || s.IsFinished() || s.IsWorking() || s.IsDeleted() {
|
||||
t.Fatalf("unexpected requested pod termination: %#v", s)
|
||||
}
|
||||
if s.pendingUpdate != nil || s.activeUpdate == nil || s.activeUpdate.Pod == nil || s.activeUpdate.Pod.Annotations["version"] != "2" {
|
||||
t.Fatalf("unexpected restarted pod: %#v", s.activeUpdate.Pod)
|
||||
}
|
||||
// expect we get a pod sync record for kill that should have the same grace period as before (2), but no
|
||||
// running pod because the SyncKnownPods method killed it
|
||||
if actual, expected := records[uid], []syncPodRecord{
|
||||
{name: "pod1", updateType: kubetypes.SyncPodCreate},
|
||||
{name: "pod1", updateType: kubetypes.SyncPodKill, gracePeriod: &one},
|
||||
{name: "pod1", terminated: true},
|
||||
{name: "pod1", updateType: kubetypes.SyncPodCreate},
|
||||
}; !reflect.DeepEqual(expected, actual) {
|
||||
t.Fatalf("unexpected pod sync records: %s", cmp.Diff(expected, actual, cmp.AllowUnexported(syncPodRecord{})))
|
||||
}
|
||||
},
|
||||
expectMetrics: map[string]string{
|
||||
metrics.DesiredPodCount.FQName(): `# HELP kubelet_desired_pods [ALPHA] The number of pods the kubelet is being instructed to run. static is true if the pod is not from the apiserver.
|
||||
# TYPE kubelet_desired_pods gauge
|
||||
kubelet_desired_pods{static=""} 1
|
||||
kubelet_desired_pods{static="true"} 0
|
||||
`,
|
||||
metrics.ActivePodCount.FQName(): `# HELP kubelet_active_pods [ALPHA] The number of pods the kubelet considers active and which are being considered when admitting new pods. static is true if the pod is not from the apiserver.
|
||||
# TYPE kubelet_active_pods gauge
|
||||
kubelet_active_pods{static=""} 1
|
||||
kubelet_active_pods{static="true"} 0
|
||||
`,
|
||||
metrics.OrphanedRuntimePodTotal.FQName(): `# HELP kubelet_orphaned_runtime_pods_total [ALPHA] Number of pods that have been detected in the container runtime without being already known to the pod worker. This typically indicates the kubelet was restarted while a pod was force deleted in the API or in the local configuration, which is unusual.
|
||||
# TYPE kubelet_orphaned_runtime_pods_total counter
|
||||
kubelet_orphaned_runtime_pods_total 0
|
||||
`,
|
||||
metrics.RestartedPodTotal.FQName(): `# HELP kubelet_restarted_pods_total [ALPHA] Number of pods that have been restarted because they were deleted and recreated with the same UID while the kubelet was watching them (common for static pods, extremely uncommon for API pods)
|
||||
# TYPE kubelet_restarted_pods_total counter
|
||||
kubelet_restarted_pods_total{static=""} 1
|
||||
kubelet_restarted_pods_total{static="true"} 0
|
||||
`,
|
||||
metrics.WorkingPodCount.FQName(): `# HELP kubelet_working_pods [ALPHA] Number of pods the kubelet is actually running, broken down by lifecycle phase, whether the pod is desired, orphaned, or runtime only (also orphaned), and whether the pod is static. An orphaned pod has been removed from local configuration or force deleted in the API and consumes resources that are not otherwise visible.
|
||||
# TYPE kubelet_working_pods gauge
|
||||
kubelet_working_pods{config="desired",lifecycle="sync",static=""} 1
|
||||
kubelet_working_pods{config="desired",lifecycle="sync",static="true"} 0
|
||||
kubelet_working_pods{config="desired",lifecycle="terminated",static=""} 0
|
||||
kubelet_working_pods{config="desired",lifecycle="terminated",static="true"} 0
|
||||
kubelet_working_pods{config="desired",lifecycle="terminating",static=""} 0
|
||||
kubelet_working_pods{config="desired",lifecycle="terminating",static="true"} 0
|
||||
kubelet_working_pods{config="orphan",lifecycle="sync",static=""} 0
|
||||
kubelet_working_pods{config="orphan",lifecycle="sync",static="true"} 0
|
||||
kubelet_working_pods{config="orphan",lifecycle="terminated",static=""} 0
|
||||
kubelet_working_pods{config="orphan",lifecycle="terminated",static="true"} 0
|
||||
kubelet_working_pods{config="orphan",lifecycle="terminating",static=""} 0
|
||||
kubelet_working_pods{config="orphan",lifecycle="terminating",static="true"} 0
|
||||
kubelet_working_pods{config="runtime_only",lifecycle="sync",static="unknown"} 0
|
||||
kubelet_working_pods{config="runtime_only",lifecycle="terminated",static="unknown"} 0
|
||||
kubelet_working_pods{config="runtime_only",lifecycle="terminating",static="unknown"} 0
|
||||
`,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
Loading…
Reference in New Issue
Block a user