diff --git a/src/cmd/linuxkit/pkglib/docker.go b/src/cmd/linuxkit/pkglib/docker.go
index c88e85fe3..de9316662 100644
--- a/src/cmd/linuxkit/pkglib/docker.go
+++ b/src/cmd/linuxkit/pkglib/docker.go
@@ -263,79 +263,100 @@ func (dr *dockerRunnerImpl) builderEnsureContainer(ctx context.Context, name, im
 		// stop existing one
 		stop   = false
 		remove = false
-		b      bytes.Buffer
+		found  = false
 	)
 
-	if err := dr.command(nil, &b, io.Discard, "--context", dockerContext, "container", "inspect", name); err == nil {
-		// we already have a container named "linuxkit-builder" in the provided context.
-		// get its state and config
-		var containerJSON []dockercontainertypes.InspectResponse
-		if err := json.Unmarshal(b.Bytes(), &containerJSON); err != nil || len(containerJSON) < 1 {
-			return nil, fmt.Errorf("unable to read results of 'container inspect %s': %v", name, err)
-		}
+	const (
 
-		existingImage := containerJSON[0].Config.Image
-		isRunning := containerJSON[0].State.Status == "running"
-
-		switch {
-		case forceRestart:
-			// if forceRestart==true, we always recreate, else we check if it matches our requirements
-			fmt.Printf("told to force restart, replacing existing container %s\n", name)
-			recreate = true
-			stop = isRunning
-			remove = true
-		case existingImage != image:
-			// if image mismatches, recreate
-			fmt.Printf("existing container %s is running image %s instead of target %s, replacing\n", name, existingImage, image)
-			recreate = true
-			stop = isRunning
-			remove = true
-		case !containerJSON[0].HostConfig.Privileged:
-			// if unprivileged, we need to remove it and start a new container with the right permissions
-			fmt.Printf("existing container %s is unprivileged, replacing\n", name)
-			recreate = true
-			stop = isRunning
-			remove = true
-		case isRunning:
-			// if already running with the right image and permissions, just use it
-			fmt.Printf("using existing container %s\n", name)
-			return buildkitClient.New(ctx, fmt.Sprintf("docker-container://%s?context=%s", name, dockerContext))
-		default:
-			// we have an existing container, but it isn't running, so start it
-			fmt.Printf("starting existing container %s\n", name)
-			if err := dr.command(nil, io.Discard, io.Discard, "--context", dockerContext, "container", "start", name); err != nil {
-				return nil, fmt.Errorf("failed to start existing container %s", name)
+		// we will retry starting the container 3 times, waiting 1 second between each retry
+		// this is to allow for race conditions, where we inspected, didn't find it,
+		// some other process created it, and we are now trying to create it.
+		buildkitCheckInterval   = 1 * time.Second
+		buildKitCheckRetryCount = 3
+	)
+	for range buildKitCheckRetryCount {
+		var b bytes.Buffer
+		if err := dr.command(nil, &b, io.Discard, "--context", dockerContext, "container", "inspect", name); err == nil {
+			// we already have a container named "linuxkit-builder" in the provided context.
+			// get its state and config
+			var containerJSON []dockercontainertypes.InspectResponse
+			if err := json.Unmarshal(b.Bytes(), &containerJSON); err != nil || len(containerJSON) < 1 {
+				return nil, fmt.Errorf("unable to read results of 'container inspect %s': %v", name, err)
+			}
+
+			existingImage := containerJSON[0].Config.Image
+			isRunning := containerJSON[0].State.Status == "running"
+
+			switch {
+			case forceRestart:
+				// if forceRestart==true, we always recreate, else we check if it matches our requirements
+				fmt.Printf("told to force restart, replacing existing container %s\n", name)
+				recreate = true
+				stop = isRunning
+				remove = true
+			case existingImage != image:
+				// if image mismatches, recreate
+				fmt.Printf("existing container %s is running image %s instead of target %s, replacing\n", name, existingImage, image)
+				recreate = true
+				stop = isRunning
+				remove = true
+			case !containerJSON[0].HostConfig.Privileged:
+				// if unprivileged, we need to remove it and start a new container with the right permissions
+				fmt.Printf("existing container %s is unprivileged, replacing\n", name)
+				recreate = true
+				stop = isRunning
+				remove = true
+			case isRunning:
+				// if already running with the right image and permissions, just use it
+				fmt.Printf("using existing container %s\n", name)
+				return buildkitClient.New(ctx, fmt.Sprintf("docker-container://%s?context=%s", name, dockerContext))
+			default:
+				// we have an existing container, but it isn't running, so start it
+				// note that if it somehow got started in a parallel process or thread,
+				// `container start` is a no-op, so we will get no errors; this just works.
+				fmt.Printf("starting existing container %s\n", name)
+				if err := dr.command(nil, io.Discard, io.Discard, "--context", dockerContext, "container", "start", name); err != nil {
+					return nil, fmt.Errorf("failed to start existing container %s", name)
+				}
+				recreate = false
+				stop = false
+				remove = false
 			}
-			recreate = false
-			stop = false
-			remove = false
 		}
-	}
-	// if we made it here, we need to stop and remove the container, either because of a config mismatch,
-	// or because we received the CLI option
-	if stop {
-		if err := dr.command(nil, io.Discard, io.Discard, "--context", dockerContext, "container", "stop", name); err != nil {
-			return nil, fmt.Errorf("failed to stop existing container %s", name)
+		// if we made it here, we need to stop and remove the container, either because of a config mismatch,
+		// or because we received the CLI option
+		if stop {
+			if err := dr.command(nil, io.Discard, io.Discard, "--context", dockerContext, "container", "stop", name); err != nil {
+				return nil, fmt.Errorf("failed to stop existing container %s", name)
+			}
 		}
-	}
-	if remove {
-		if err := dr.command(nil, io.Discard, io.Discard, "--context", dockerContext, "container", "rm", name); err != nil {
-			return nil, fmt.Errorf("failed to remove existing container %s", name)
+		if remove {
+			if err := dr.command(nil, io.Discard, io.Discard, "--context", dockerContext, "container", "rm", name); err != nil {
+				return nil, fmt.Errorf("failed to remove existing container %s", name)
+			}
 		}
-	}
-	if recreate {
-		// create the builder
-		args := []string{"--context", dockerContext, "container", "run", "-d", "--name", name, "--privileged", image, "--allow-insecure-entitlement", "network.host", "--addr", fmt.Sprintf("unix://%s", buildkitSocketPath), "--debug"}
-		msg := fmt.Sprintf("creating builder container '%s' in context '%s'", name, dockerContext)
-		fmt.Println(msg)
-		if err := dr.command(nil, nil, nil, args...); err != nil {
-			return nil, err
+		if recreate {
+			// create the builder
+			args := []string{"--context", dockerContext, "container", "run", "-d", "--name", name, "--privileged", image, "--allow-insecure-entitlement", "network.host", "--addr", fmt.Sprintf("unix://%s", buildkitSocketPath), "--debug"}
+			msg := fmt.Sprintf("creating builder container '%s' in context '%s'", name, dockerContext)
+			fmt.Println(msg)
+			if err := dr.command(nil, nil, io.Discard, args...); err != nil {
+				// if we failed, do a retry
+				time.Sleep(buildkitCheckInterval)
+				continue
+			}
 		}
+		found = true
+		break
 	}
+	if !found {
+		return nil, fmt.Errorf("unable to create or find builder container %s in context %s after %d retries", name, dockerContext, buildKitCheckRetryCount)
+	}
+
 	// wait for buildkit socket to be ready up to the timeout
 	fmt.Printf("waiting for buildkit builder to be ready, up to %d seconds\n", buildkitWaitServer)
 	timeout := time.After(buildkitWaitServer * time.Second)
-	ticker := time.NewTicker(buildkitCheckInterval * time.Second)
+	ticker := time.NewTicker(buildkitCheckInterval)
 	// Keep trying until we're timed out or get a success
 	for {
 		select {