From 26746c9ce898218320c006a53ff233859ea9b3aa Mon Sep 17 00:00:00 2001 From: Sebastian Wolf Date: Thu, 7 May 2026 10:24:43 +0000 Subject: [PATCH] runtime/fc: track real firecracker PID instead of jailer PID When the jailer is in use (the default for kata-fc), cmd.Process.Pid in fcInit() is the jailer's PID, not firecracker's. The jailer forks + execs firecracker as a separate child and exits. fc.info.PID was therefore stored as the (soon-to-be-dead) jailer PID. At sandbox shutdown, fcEnd() calls WaitLocalProcess(fc.info.PID, SIGTERM, ...). syscall.Kill on the dead jailer PID returns ESRCH, WaitLocalProcess returns nil immediately, and the real firecracker microVM never receives a signal. It gets reparented to init and stays alive indefinitely, holding open resources from the host. Over many container lifecycles this becomes a serious resource leak. Read the real PID from /firecracker.pid, which firecracker itself writes after the exec. Update fc.info.PID with that value so all downstream code (fcEnd, Save/Load, kill-0 alive checks, NewProc) operates on the actual firecracker process. Also fix a small adjacent bug in Sandbox.Stop where the per-container teardown loop ignored the force flag, causing any container.stop error to short-circuit Stop before stopVM ran. Signed-off-by: Sebastian Wolf --- src/runtime/virtcontainers/container.go | 5 +- src/runtime/virtcontainers/fc.go | 46 +++++++++++++ src/runtime/virtcontainers/fc_test.go | 87 +++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index a9655f8001..dc96e3cf39 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -1527,7 +1527,10 @@ func (c *Container) stop(ctx context.Context, force bool) error { } if err := c.state.ValidTransition(c.state.State, types.StateStopped); err != nil { - return err + if !force { + return err + } + c.Logger().WithError(err).Warn("invalid state transition to Stopped; continuing because force is set") } // Force the container to be killed. For most of the cases, this diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index d90c358347..29a6e20ce4 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -421,9 +421,55 @@ func (fc *firecracker) fcInit(ctx context.Context, timeout int) error { fc.Logger().WithField("fcInit failed:", err).Debug() return err } + + // When jailer is in use, cmd.Process.Pid is the jailer's PID, but jailer + // fork+execs firecracker as a separate child. The real fc PID lives in + // firecracker.pid inside the jail root; if we don't pick it up, fcEnd + // signals the (dead) jailer PID, gets ESRCH, returns nil, and the actual + // firecracker microVM is orphaned to init. + // + // Retry reading the PID file for up to 5 seconds; the jailer writes it + // after fork+exec so there can be a small delay. Falling back to the + // jailer PID would reintroduce the shutdown leak, so treat failure as + // fatal. + if fc.config.JailerPath != "" { + pid, err := fc.readJailedFirecrackerPID() + if err != nil { + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + time.Sleep(50 * time.Millisecond) + pid, err = fc.readJailedFirecrackerPID() + if err == nil { + break + } + } + } + if err != nil { + return fmt.Errorf("unable to determine firecracker PID from jailer root: %w", err) + } + fc.info.PID = pid + } return nil } +// readJailedFirecrackerPID returns the PID firecracker writes to +// /firecracker.pid after the jailer's fork+exec. +func (fc *firecracker) readJailedFirecrackerPID() (int, error) { + pidFile := filepath.Join(fc.jailerRoot, "firecracker.pid") + data, err := os.ReadFile(pidFile) + if err != nil { + return 0, fmt.Errorf("read %s: %w", pidFile, err) + } + pid, err := strconv.Atoi(strings.TrimSpace(string(data))) + if err != nil { + return 0, fmt.Errorf("parse %s: %w", pidFile, err) + } + if pid <= 0 { + return 0, fmt.Errorf("invalid pid %d in %s", pid, pidFile) + } + return pid, nil +} + func (fc *firecracker) fcEnd(ctx context.Context, waitOnly bool) (err error) { span, _ := katatrace.Trace(ctx, fc.Logger(), "fcEnd", fcTracingTags, map[string]string{"sandbox_id": fc.id}) defer span.End() diff --git a/src/runtime/virtcontainers/fc_test.go b/src/runtime/virtcontainers/fc_test.go index 5550b68958..f041fb940e 100644 --- a/src/runtime/virtcontainers/fc_test.go +++ b/src/runtime/virtcontainers/fc_test.go @@ -9,6 +9,8 @@ package virtcontainers import ( "context" + "os" + "path/filepath" "strings" "testing" @@ -255,3 +257,88 @@ func TestFcSetConfig(t *testing.T) { assert.Equal(fc.config, config) } + +func TestReadJailedFirecrackerPID(t *testing.T) { + assert := assert.New(t) + + tests := []struct { + name string + fileContent string + createFile bool + expectPID int + expectError bool + }{ + { + name: "valid pid", + fileContent: "12345", + createFile: true, + expectPID: 12345, + expectError: false, + }, + { + name: "valid pid with trailing newline", + fileContent: "12345\n", + createFile: true, + expectPID: 12345, + expectError: false, + }, + { + name: "valid pid with surrounding whitespace", + fileContent: " 12345 \n", + createFile: true, + expectPID: 12345, + expectError: false, + }, + { + name: "missing file", + createFile: false, + expectError: true, + }, + { + name: "non-numeric content", + fileContent: "not-a-pid", + createFile: true, + expectError: true, + }, + { + name: "zero pid", + fileContent: "0", + createFile: true, + expectError: true, + }, + { + name: "negative pid", + fileContent: "-1", + createFile: true, + expectError: true, + }, + { + name: "empty file", + fileContent: "", + createFile: true, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + fc := firecracker{jailerRoot: tmpDir} + + if tt.createFile { + pidFile := filepath.Join(tmpDir, "firecracker.pid") + err := os.WriteFile(pidFile, []byte(tt.fileContent), 0644) + assert.NoError(err) + } + + pid, err := fc.readJailedFirecrackerPID() + if tt.expectError { + assert.Error(err) + assert.Equal(0, pid) + } else { + assert.NoError(err) + assert.Equal(tt.expectPID, pid) + } + }) + } +}