Dont generatePodStatus twice for new pods

2025-09-13 13:14:05 +00:00 · 2015-06-09 17:50:15 -07:00
parent 5a02fc07d8
commit b5ed0e9b13
8 changed files with 163 additions and 81 deletions
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@@ -88,7 +88,7 @@ type SyncHandler interface {
 	// Syncs current state to match the specified pods. SyncPodType specified what
 	// type of sync is occuring per pod. StartTime specifies the time at which
 	// syncing began (for use in monitoring).
-	SyncPods(pods []*api.Pod, podSyncTypes map[types.UID]metrics.SyncPodType, mirrorPods map[string]*api.Pod,
+	SyncPods(pods []*api.Pod, podSyncTypes map[types.UID]SyncPodType, mirrorPods map[string]*api.Pod,
 		startTime time.Time) error
 }

@@ -1078,7 +1078,7 @@ func (kl *Kubelet) makePodDataDirs(pod *api.Pod) error {
 	return nil
 }

-func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecontainer.Pod) error {
+func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecontainer.Pod, updateType SyncPodType) error {
 	podFullName := kubecontainer.GetPodFullName(pod)
 	uid := pod.UID

@@ -1130,10 +1130,38 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont
 	}
 	kl.volumeManager.SetVolumes(pod.UID, podVolumes)

-	podStatus, err := kl.generatePodStatus(pod)
-	if err != nil {
-		glog.Errorf("Unable to get status for pod %q (uid %q): %v", podFullName, uid, err)
-		return err
+	// The kubelet is the source of truth for pod status. It ignores the status sent from
+	// the apiserver and regenerates status for every pod update, incrementally updating
+	// the status it received at pod creation time.
+	//
+	// The container runtime needs 2 pieces of information from the status to sync a pod:
+	// The terminated state of containers (to restart them) and the podIp (for liveness probes).
+	// New pods don't have either, so we skip the expensive status generation step.
+	//
+	// If we end up here with a create event for an already running pod, it could result in a
+	// restart of its containers. This cannot happen unless the kubelet restarts, because the
+	// delete before the second create is processed by SyncPods, which cancels this pod worker.
+	//
+	// If the kubelet restarts, we have a bunch of running containers for which we get create
+	// events. This is ok, because the pod status for these will include the podIp and terminated
+	// status. Any race conditions here effectively boils down to -- the pod worker didn't sync
+	// state of a newly started container with the apiserver before the kubelet restarted, so
+	// it's OK to pretend like the kubelet started them after it restarted.
+	//
+	// Also note that deletes currently have an updateType of `create` set in UpdatePods.
+	// This, again, does not matter because deletes are not processed by this method.
+
+	var podStatus api.PodStatus
+	if updateType == SyncPodCreate {
+		podStatus = pod.Status
+		glog.V(3).Infof("Not generating pod status for new pod %v", podFullName)
+	} else {
+		var err error
+		podStatus, err = kl.generatePodStatus(pod)
+		if err != nil {
+			glog.Errorf("Unable to get status for pod %q (uid %q): %v", podFullName, uid, err)
+			return err
+		}
 	}

 	pullSecrets, err := kl.getPullSecretsForPod(pod)
@@ -1306,7 +1334,7 @@ func (kl *Kubelet) filterOutTerminatedPods(allPods []*api.Pod) []*api.Pod {
 }

 // SyncPods synchronizes the configured list of pods (desired state) with the host current state.
-func (kl *Kubelet) SyncPods(allPods []*api.Pod, podSyncTypes map[types.UID]metrics.SyncPodType,
+func (kl *Kubelet) SyncPods(allPods []*api.Pod, podSyncTypes map[types.UID]SyncPodType,
 	mirrorPods map[string]*api.Pod, start time.Time) error {
 	defer func() {
 		metrics.SyncPodsLatency.Observe(metrics.SinceInMicroseconds(start))
@@ -1344,7 +1372,7 @@ func (kl *Kubelet) SyncPods(allPods []*api.Pod, podSyncTypes map[types.UID]metri
 		})

 		// Note the number of containers for new pods.
-		if val, ok := podSyncTypes[pod.UID]; ok && (val == metrics.SyncPodCreate) {
+		if val, ok := podSyncTypes[pod.UID]; ok && (val == SyncPodCreate) {
 			metrics.ContainersPerPodCount.Observe(float64(len(pod.Spec.Containers)))
 		}
 	}
@@ -1486,7 +1514,7 @@ func (kl *Kubelet) checkCapacityExceeded(pods []*api.Pod) (fitting []*api.Pod, n
 }

 // handleOutOfDisk detects if pods can't fit due to lack of disk space.
-func (kl *Kubelet) handleOutOfDisk(pods []*api.Pod, podSyncTypes map[types.UID]metrics.SyncPodType) []*api.Pod {
+func (kl *Kubelet) handleOutOfDisk(pods []*api.Pod, podSyncTypes map[types.UID]SyncPodType) []*api.Pod {
 	if len(podSyncTypes) == 0 {
 		// regular sync. no new pods
 		return pods
@@ -1519,7 +1547,7 @@ func (kl *Kubelet) handleOutOfDisk(pods []*api.Pod, podSyncTypes map[types.UID]m
 	for i := range pods {
 		pod := pods[i]
 		// Only reject pods that didn't start yet.
-		if podSyncTypes[pod.UID] == metrics.SyncPodCreate {
+		if podSyncTypes[pod.UID] == SyncPodCreate {
 			kl.recorder.Eventf(pod, "OutOfDisk", "Cannot start the pod due to lack of disk space.")
 			kl.statusManager.SetPodStatus(pod, api.PodStatus{
 				Phase:   api.PodFailed,
@@ -1578,7 +1606,7 @@ func (kl *Kubelet) handleNotFittingPods(pods []*api.Pod) []*api.Pod {

 // admitPods handles pod admission. It filters out terminated pods, and pods
 // that don't fit on the node, and may reject pods if node is overcommitted.
-func (kl *Kubelet) admitPods(allPods []*api.Pod, podSyncTypes map[types.UID]metrics.SyncPodType) []*api.Pod {
+func (kl *Kubelet) admitPods(allPods []*api.Pod, podSyncTypes map[types.UID]SyncPodType) []*api.Pod {
 	// Pod phase progresses monotonically. Once a pod has reached a final state,
 	// it should never leave irregardless of the restart policy. The statuses
 	// of such pods should not be changed, and there is no need to sync them.
@@ -1616,7 +1644,7 @@ func (kl *Kubelet) syncLoop(updates <-chan PodUpdate, handler SyncHandler) {
 	glog.Info("Starting kubelet main sync loop.")
 	for {
 		unsyncedPod := false
-		podSyncTypes := make(map[types.UID]metrics.SyncPodType)
+		podSyncTypes := make(map[types.UID]SyncPodType)
 		select {
 		case u, ok := <-updates:
 			if !ok {