mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #42768 from yujuhong/fix_sandbox_listing
Automatic merge from submit-queue dockershim: Fix the race condition in ListPodSandbox In ListPodSandbox(), we 1. List all sandbox docker containers 2. List all sandbox checkpoints. If the checkpoint does not have a corresponding container in (1), we return partial result based on the checkpoint. The problem is that new PodSandboxes can be created between step (1) and (2). In those cases, we will see the checkpoints, but not the sandbox containers. This leads to strange behavior because the partial result from the checkpoint does not include some critical information. For example, the creation timestamp'd be zero, and that would cause kubelet's garbage collector to immediately remove the sandbox. This change fixes that by getting the list of checkpoints before listing all the containers (since in RunPodSandbox we create them in the reverse order).
This commit is contained in:
commit
6fac75c80a
@ -341,6 +341,18 @@ func (ds *dockerService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we get the list of checkpoints first so that we don't include
|
||||
// new PodSandboxes that are being created right now.
|
||||
var err error
|
||||
checkpoints := []string{}
|
||||
if filter == nil {
|
||||
checkpoints, err = ds.checkpointHandler.ListCheckpoints()
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to list checkpoints: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
containers, err := ds.client.ListContainers(opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -367,27 +379,21 @@ func (ds *dockerService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]
|
||||
// Include sandbox that could only be found with its checkpoint if no filter is applied
|
||||
// These PodSandbox will only include PodSandboxID, Name, Namespace.
|
||||
// These PodSandbox will be in PodSandboxState_SANDBOX_NOTREADY state.
|
||||
if filter == nil {
|
||||
checkpoints, err := ds.checkpointHandler.ListCheckpoints()
|
||||
for _, id := range checkpoints {
|
||||
if _, ok := sandboxIDs[id]; ok {
|
||||
continue
|
||||
}
|
||||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to list checkpoints: %v", err)
|
||||
}
|
||||
for _, id := range checkpoints {
|
||||
if _, ok := sandboxIDs[id]; ok {
|
||||
continue
|
||||
}
|
||||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
|
||||
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
|
||||
|
||||
if err == errors.CorruptCheckpointError {
|
||||
glog.V(2).Info("Removing corrupted checkpoint %q: %+v", id, *checkpoint)
|
||||
ds.checkpointHandler.RemoveCheckpoint(id)
|
||||
}
|
||||
continue
|
||||
if err == errors.CorruptCheckpointError {
|
||||
glog.V(2).Info("Removing corrupted checkpoint %q: %+v", id, *checkpoint)
|
||||
ds.checkpointHandler.RemoveCheckpoint(id)
|
||||
}
|
||||
result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
|
||||
continue
|
||||
}
|
||||
result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
|
||||
}
|
||||
|
||||
// Include legacy sandboxes if there are still legacy sandboxes not cleaned up yet.
|
||||
|
Loading…
Reference in New Issue
Block a user