shimv2: return the hypervisor's pid as the container pid

Since the kata's hypervisor process is in the network namespace,
which is close to container's process, and some host metrics
such as cadvisor can use this pid to access the network namespace
to get some network metrics. Thus this commit replace the shim's
pid with the hypervisor's pid.

Fixes: #1451

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>

(backport https://github.com/kata-containers/kata-containers/pull/1452)
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
This commit is contained in:
fupan.lfp 2021-02-24 13:26:05 +08:00 committed by Peng Tao
parent 4c9af982e6
commit 2dd859bfce
4 changed files with 34 additions and 14 deletions

View File

@ -87,6 +87,12 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
return nil, err return nil, err
} }
s.sandbox = sandbox s.sandbox = sandbox
pid, err := s.sandbox.GetHypervisorPid()
if err != nil {
return nil, err
}
s.hpid = uint32(pid)
go s.startManagementServer(ctx, ociSpec) go s.startManagementServer(ctx, ociSpec)
case vc.PodContainer: case vc.PodContainer:

View File

@ -113,9 +113,12 @@ type service struct {
mu sync.Mutex mu sync.Mutex
eventSendMu sync.Mutex eventSendMu sync.Mutex
// pid Since this shimv2 cannot get the container processes pid from VM, // hypervisor pid, Since this shimv2 cannot get the container processes pid from VM,
// thus for the returned values needed pid, just return this shim's // thus for the returned values needed pid, just return the hypervisor's
// pid directly. // pid directly.
hpid uint32
// shim's pid
pid uint32 pid uint32
ctx context.Context ctx context.Context
@ -370,11 +373,11 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
Terminal: r.Terminal, Terminal: r.Terminal,
}, },
Checkpoint: r.Checkpoint, Checkpoint: r.Checkpoint,
Pid: s.pid, Pid: s.hpid,
}) })
return &taskAPI.CreateTaskResponse{ return &taskAPI.CreateTaskResponse{
Pid: s.pid, Pid: s.hpid,
}, nil }, nil
} }
@ -406,7 +409,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (_ *taskAP
} }
s.send(&eventstypes.TaskStart{ s.send(&eventstypes.TaskStart{
ContainerID: c.id, ContainerID: c.id,
Pid: s.pid, Pid: s.hpid,
}) })
} else { } else {
//start an exec //start an exec
@ -417,12 +420,12 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (_ *taskAP
s.send(&eventstypes.TaskExecStarted{ s.send(&eventstypes.TaskExecStarted{
ContainerID: c.id, ContainerID: c.id,
ExecID: r.ExecID, ExecID: r.ExecID,
Pid: s.pid, Pid: s.hpid,
}) })
} }
return &taskAPI.StartResponse{ return &taskAPI.StartResponse{
Pid: s.pid, Pid: s.hpid,
}, nil }, nil
} }
@ -449,7 +452,7 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
s.send(&eventstypes.TaskDelete{ s.send(&eventstypes.TaskDelete{
ContainerID: c.id, ContainerID: c.id,
Pid: s.pid, Pid: s.hpid,
ExitStatus: c.exit, ExitStatus: c.exit,
ExitedAt: c.exitTime, ExitedAt: c.exitTime,
}) })
@ -457,7 +460,7 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
return &taskAPI.DeleteResponse{ return &taskAPI.DeleteResponse{
ExitStatus: c.exit, ExitStatus: c.exit,
ExitedAt: c.exitTime, ExitedAt: c.exitTime,
Pid: s.pid, Pid: s.hpid,
}, nil }, nil
} }
//deal with the exec case //deal with the exec case
@ -471,7 +474,7 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
return &taskAPI.DeleteResponse{ return &taskAPI.DeleteResponse{
ExitStatus: uint32(execs.exitCode), ExitStatus: uint32(execs.exitCode),
ExitedAt: execs.exitTime, ExitedAt: execs.exitTime,
Pid: s.pid, Pid: s.hpid,
}, nil }, nil
} }
@ -566,7 +569,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP
return &taskAPI.StateResponse{ return &taskAPI.StateResponse{
ID: c.id, ID: c.id,
Bundle: c.bundle, Bundle: c.bundle,
Pid: s.pid, Pid: s.hpid,
Status: c.status, Status: c.status,
Stdin: c.stdin, Stdin: c.stdin,
Stdout: c.stdout, Stdout: c.stdout,
@ -585,7 +588,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP
return &taskAPI.StateResponse{ return &taskAPI.StateResponse{
ID: execs.id, ID: execs.id,
Bundle: c.bundle, Bundle: c.bundle,
Pid: s.pid, Pid: s.hpid,
Status: execs.status, Status: execs.status,
Stdin: execs.tty.stdin, Stdin: execs.tty.stdin,
Stdout: execs.tty.stdout, Stdout: execs.tty.stdout,
@ -735,7 +738,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.
}() }()
pInfo := task.ProcessInfo{ pInfo := task.ProcessInfo{
Pid: s.pid, Pid: s.hpid,
} }
processes = append(processes, &pInfo) processes = append(processes, &pInfo)
@ -807,7 +810,7 @@ func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *ta
return &taskAPI.ConnectResponse{ return &taskAPI.ConnectResponse{
ShimPid: s.pid, ShimPid: s.pid,
//Since kata cannot get the container's pid in VM, thus only return the shim's pid. //Since kata cannot get the container's pid in VM, thus only return the shim's pid.
TaskPid: s.pid, TaskPid: s.hpid,
}, nil }, nil
} }

View File

@ -72,6 +72,7 @@ type VCSandbox interface {
ListRoutes() ([]*pbTypes.Route, error) ListRoutes() ([]*pbTypes.Route, error)
GetOOMEvent() (string, error) GetOOMEvent() (string, error)
GetHypervisorPid() (int, error)
UpdateRuntimeMetrics() error UpdateRuntimeMetrics() error
GetAgentMetrics() (string, error) GetAgentMetrics() (string, error)

View File

@ -246,6 +246,16 @@ func (s *Sandbox) GetNetNs() string {
return s.networkNS.NetNsPath return s.networkNS.NetNsPath
} }
// GetHypervisorPid returns the hypervisor's pid.
func (s *Sandbox) GetHypervisorPid() (int, error) {
pids := s.hypervisor.getPids()
if len(pids) == 0 || pids[0] == 0 {
return -1, fmt.Errorf("Invalid hypervisor PID: %+v", pids)
}
return pids[0], nil
}
// GetAllContainers returns all containers. // GetAllContainers returns all containers.
func (s *Sandbox) GetAllContainers() []VCContainer { func (s *Sandbox) GetAllContainers() []VCContainer {
ifa := make([]VCContainer, len(s.containers)) ifa := make([]VCContainer, len(s.containers))