mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-29 04:04:45 +00:00
clh: Increase API and SandboxStop timeouts for TDX
While doing tests using `ctr`, I've noticed that I've been hitting those timeouts more frequently than expected. Till we find the root cause of the issue (which is *not* in the Kata Containers), let's increase the timeouts when dealing with a Confidential Guest. Fixes: #4978 Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
This commit is contained in:
parent
c142fa2541
commit
9f0a57c0eb
@ -66,17 +66,19 @@ const (
|
|||||||
const (
|
const (
|
||||||
// Values are mandatory by http API
|
// Values are mandatory by http API
|
||||||
// Values based on:
|
// Values based on:
|
||||||
clhTimeout = 10
|
clhTimeout = 10
|
||||||
clhAPITimeout = 1
|
clhAPITimeout = 1
|
||||||
|
clhAPITimeoutConfidentialGuest = 10
|
||||||
// Timeout for hot-plug - hotplug devices can take more time, than usual API calls
|
// Timeout for hot-plug - hotplug devices can take more time, than usual API calls
|
||||||
// Use longer time timeout for it.
|
// Use longer time timeout for it.
|
||||||
clhHotPlugAPITimeout = 5
|
clhHotPlugAPITimeout = 5
|
||||||
clhStopSandboxTimeout = 3
|
clhStopSandboxTimeout = 3
|
||||||
clhSocket = "clh.sock"
|
clhStopSandboxTimeoutConfidentialGuest = 5
|
||||||
clhAPISocket = "clh-api.sock"
|
clhSocket = "clh.sock"
|
||||||
virtioFsSocket = "virtiofsd.sock"
|
clhAPISocket = "clh-api.sock"
|
||||||
defaultClhPath = "/usr/local/bin/cloud-hypervisor"
|
virtioFsSocket = "virtiofsd.sock"
|
||||||
virtioFsCacheAlways = "always"
|
defaultClhPath = "/usr/local/bin/cloud-hypervisor"
|
||||||
|
virtioFsCacheAlways = "always"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Interface that hides the implementation of openAPI client
|
// Interface that hides the implementation of openAPI client
|
||||||
@ -272,6 +274,28 @@ var clhDebugKernelParams = []Param{
|
|||||||
//
|
//
|
||||||
//###########################################################
|
//###########################################################
|
||||||
|
|
||||||
|
func (clh *cloudHypervisor) getClhAPITimeout() time.Duration {
|
||||||
|
// Increase the APITimeout when dealing with a Confidential Guest.
|
||||||
|
// The value has been chosen based on tests using `ctr`, and hopefully
|
||||||
|
// this change can be dropped in further steps of the development.
|
||||||
|
if clh.config.ConfidentialGuest {
|
||||||
|
return clhAPITimeoutConfidentialGuest
|
||||||
|
}
|
||||||
|
|
||||||
|
return clhAPITimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
func (clh *cloudHypervisor) getClhStopSandboxTimeout() time.Duration {
|
||||||
|
// Increase the StopSandboxTimeout when dealing with a Confidential Guest.
|
||||||
|
// The value has been chosen based on tests using `ctr`, and hopefully
|
||||||
|
// this change can be dropped in further steps of the development.
|
||||||
|
if clh.config.ConfidentialGuest {
|
||||||
|
return clhStopSandboxTimeoutConfidentialGuest
|
||||||
|
}
|
||||||
|
|
||||||
|
return clhStopSandboxTimeout
|
||||||
|
}
|
||||||
|
|
||||||
func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error {
|
func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error {
|
||||||
clh.config = *config
|
clh.config = *config
|
||||||
|
|
||||||
@ -594,7 +618,7 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error {
|
|||||||
span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
clh.Logger().WithField("function", "StartVM").Info("starting Sandbox")
|
clh.Logger().WithField("function", "StartVM").Info("starting Sandbox")
|
||||||
@ -890,7 +914,7 @@ func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, m
|
|||||||
}
|
}
|
||||||
|
|
||||||
cl := clh.client()
|
cl := clh.client()
|
||||||
ctx, cancelResize := context.WithTimeout(ctx, clhAPITimeout*time.Second)
|
ctx, cancelResize := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
||||||
defer cancelResize()
|
defer cancelResize()
|
||||||
|
|
||||||
resize := *chclient.NewVmResize()
|
resize := *chclient.NewVmResize()
|
||||||
@ -935,7 +959,7 @@ func (clh *cloudHypervisor) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (c
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Resize (hot-plug) vCPUs via HTTP API
|
// Resize (hot-plug) vCPUs via HTTP API
|
||||||
ctx, cancel := context.WithTimeout(ctx, clhAPITimeout*time.Second)
|
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
resize := *chclient.NewVmResize()
|
resize := *chclient.NewVmResize()
|
||||||
resize.DesiredVcpus = func(i int32) *int32 { return &i }(int32(reqVCPUs))
|
resize.DesiredVcpus = func(i int32) *int32 { return &i }(int32(reqVCPUs))
|
||||||
@ -1086,9 +1110,9 @@ func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err e
|
|||||||
clh.Logger().Debug("Stopping Cloud Hypervisor")
|
clh.Logger().Debug("Stopping Cloud Hypervisor")
|
||||||
|
|
||||||
if pidRunning && !waitOnly {
|
if pidRunning && !waitOnly {
|
||||||
clhRunning, _ := clh.isClhRunning(clhStopSandboxTimeout)
|
clhRunning, _ := clh.isClhRunning(uint(clh.getClhStopSandboxTimeout()))
|
||||||
if clhRunning {
|
if clhRunning {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), clhStopSandboxTimeout*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhStopSandboxTimeout()*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
if _, err = clh.client().ShutdownVMM(ctx); err != nil {
|
if _, err = clh.client().ShutdownVMM(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -1096,7 +1120,7 @@ func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err e
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = utils.WaitLocalProcess(pid, clhStopSandboxTimeout, syscall.Signal(0), clh.Logger()); err != nil {
|
if err = utils.WaitLocalProcess(pid, uint(clh.getClhStopSandboxTimeout()), syscall.Signal(0), clh.Logger()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1281,7 +1305,7 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) {
|
|||||||
timeStart := time.Now()
|
timeStart := time.Now()
|
||||||
cl := clh.client()
|
cl := clh.client()
|
||||||
for {
|
for {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
_, _, err := cl.VmmPingGet(ctx)
|
_, _, err := cl.VmmPingGet(ctx)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@ -1547,7 +1571,7 @@ func (clh *cloudHypervisor) cleanupVM(force bool) error {
|
|||||||
// vmInfo ask to hypervisor for current VM status
|
// vmInfo ask to hypervisor for current VM status
|
||||||
func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) {
|
func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) {
|
||||||
cl := clh.client()
|
cl := clh.client()
|
||||||
ctx, cancelInfo := context.WithTimeout(context.Background(), clhAPITimeout*time.Second)
|
ctx, cancelInfo := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
||||||
defer cancelInfo()
|
defer cancelInfo()
|
||||||
|
|
||||||
info, _, err := cl.VmInfoGet(ctx)
|
info, _, err := cl.VmInfoGet(ctx)
|
||||||
|
Loading…
Reference in New Issue
Block a user