runtime/fc: track real firecracker PID instead of jailer PID

When the jailer is in use (the default for kata-fc), cmd.Process.Pid in
fcInit() is the jailer's PID, not firecracker's. The jailer forks +
execs firecracker as a separate child and exits. fc.info.PID was
therefore stored as the (soon-to-be-dead) jailer PID.

At sandbox shutdown, fcEnd() calls WaitLocalProcess(fc.info.PID, SIGTERM, ...).
syscall.Kill on the dead jailer PID returns ESRCH, WaitLocalProcess
returns nil immediately, and the real firecracker microVM never
receives a signal. It gets reparented to init and stays alive
indefinitely, holding open resources from the host. Over many
container lifecycles this becomes a serious resource leak.

Read the real PID from <jailerRoot>/firecracker.pid, which firecracker
itself writes after the exec. Update fc.info.PID with that value so all
downstream code (fcEnd, Save/Load, kill-0 alive checks, NewProc) operates
on the actual firecracker process.

Also fix a small adjacent bug in Sandbox.Stop where the per-container
teardown loop ignored the force flag, causing any container.stop error
to short-circuit Stop before stopVM ran.

Signed-off-by: Sebastian Wolf <swolf@nvidia.com>
This commit is contained in:
Sebastian Wolf
2026-05-07 10:24:43 +00:00
committed by Fabiano Fidêncio
parent 7c971f0c4c
commit 26746c9ce8
3 changed files with 137 additions and 1 deletions

View File

@@ -1527,7 +1527,10 @@ func (c *Container) stop(ctx context.Context, force bool) error {
}
if err := c.state.ValidTransition(c.state.State, types.StateStopped); err != nil {
return err
if !force {
return err
}
c.Logger().WithError(err).Warn("invalid state transition to Stopped; continuing because force is set")
}
// Force the container to be killed. For most of the cases, this

View File

@@ -421,9 +421,55 @@ func (fc *firecracker) fcInit(ctx context.Context, timeout int) error {
fc.Logger().WithField("fcInit failed:", err).Debug()
return err
}
// When jailer is in use, cmd.Process.Pid is the jailer's PID, but jailer
// fork+execs firecracker as a separate child. The real fc PID lives in
// firecracker.pid inside the jail root; if we don't pick it up, fcEnd
// signals the (dead) jailer PID, gets ESRCH, returns nil, and the actual
// firecracker microVM is orphaned to init.
//
// Retry reading the PID file for up to 5 seconds; the jailer writes it
// after fork+exec so there can be a small delay. Falling back to the
// jailer PID would reintroduce the shutdown leak, so treat failure as
// fatal.
if fc.config.JailerPath != "" {
pid, err := fc.readJailedFirecrackerPID()
if err != nil {
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
time.Sleep(50 * time.Millisecond)
pid, err = fc.readJailedFirecrackerPID()
if err == nil {
break
}
}
}
if err != nil {
return fmt.Errorf("unable to determine firecracker PID from jailer root: %w", err)
}
fc.info.PID = pid
}
return nil
}
// readJailedFirecrackerPID returns the PID firecracker writes to
// <jailerRoot>/firecracker.pid after the jailer's fork+exec.
func (fc *firecracker) readJailedFirecrackerPID() (int, error) {
pidFile := filepath.Join(fc.jailerRoot, "firecracker.pid")
data, err := os.ReadFile(pidFile)
if err != nil {
return 0, fmt.Errorf("read %s: %w", pidFile, err)
}
pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
if err != nil {
return 0, fmt.Errorf("parse %s: %w", pidFile, err)
}
if pid <= 0 {
return 0, fmt.Errorf("invalid pid %d in %s", pid, pidFile)
}
return pid, nil
}
func (fc *firecracker) fcEnd(ctx context.Context, waitOnly bool) (err error) {
span, _ := katatrace.Trace(ctx, fc.Logger(), "fcEnd", fcTracingTags, map[string]string{"sandbox_id": fc.id})
defer span.End()

View File

@@ -9,6 +9,8 @@ package virtcontainers
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
@@ -255,3 +257,88 @@ func TestFcSetConfig(t *testing.T) {
assert.Equal(fc.config, config)
}
func TestReadJailedFirecrackerPID(t *testing.T) {
assert := assert.New(t)
tests := []struct {
name string
fileContent string
createFile bool
expectPID int
expectError bool
}{
{
name: "valid pid",
fileContent: "12345",
createFile: true,
expectPID: 12345,
expectError: false,
},
{
name: "valid pid with trailing newline",
fileContent: "12345\n",
createFile: true,
expectPID: 12345,
expectError: false,
},
{
name: "valid pid with surrounding whitespace",
fileContent: " 12345 \n",
createFile: true,
expectPID: 12345,
expectError: false,
},
{
name: "missing file",
createFile: false,
expectError: true,
},
{
name: "non-numeric content",
fileContent: "not-a-pid",
createFile: true,
expectError: true,
},
{
name: "zero pid",
fileContent: "0",
createFile: true,
expectError: true,
},
{
name: "negative pid",
fileContent: "-1",
createFile: true,
expectError: true,
},
{
name: "empty file",
fileContent: "",
createFile: true,
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tmpDir := t.TempDir()
fc := firecracker{jailerRoot: tmpDir}
if tt.createFile {
pidFile := filepath.Join(tmpDir, "firecracker.pid")
err := os.WriteFile(pidFile, []byte(tt.fileContent), 0644)
assert.NoError(err)
}
pid, err := fc.readJailedFirecrackerPID()
if tt.expectError {
assert.Error(err)
assert.Equal(0, pid)
} else {
assert.NoError(err)
assert.Equal(tt.expectPID, pid)
}
})
}
}