mirror of
				https://github.com/kata-containers/kata-containers.git
				synced 2025-10-31 01:13:02 +00:00 
			
		
		
		
	clh: Increase API and SandboxStop timeouts for TDX
While doing tests using `ctr`, I've noticed that I've been hitting those timeouts more frequently than expected. Till we find the root cause of the issue (which is *not* in the Kata Containers), let's increase the timeouts when dealing with a Confidential Guest. Fixes: #4978 Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
This commit is contained in:
		| @@ -68,10 +68,12 @@ const ( | |||||||
| 	// Values based on: | 	// Values based on: | ||||||
| 	clhTimeout                     = 10 | 	clhTimeout                     = 10 | ||||||
| 	clhAPITimeout                  = 1 | 	clhAPITimeout                  = 1 | ||||||
|  | 	clhAPITimeoutConfidentialGuest = 10 | ||||||
| 	// Timeout for hot-plug - hotplug devices can take more time, than usual API calls | 	// Timeout for hot-plug - hotplug devices can take more time, than usual API calls | ||||||
| 	// Use longer time timeout for it. | 	// Use longer time timeout for it. | ||||||
| 	clhHotPlugAPITimeout                   = 5 | 	clhHotPlugAPITimeout                   = 5 | ||||||
| 	clhStopSandboxTimeout                  = 3 | 	clhStopSandboxTimeout                  = 3 | ||||||
|  | 	clhStopSandboxTimeoutConfidentialGuest = 5 | ||||||
| 	clhSocket                              = "clh.sock" | 	clhSocket                              = "clh.sock" | ||||||
| 	clhAPISocket                           = "clh-api.sock" | 	clhAPISocket                           = "clh-api.sock" | ||||||
| 	virtioFsSocket                         = "virtiofsd.sock" | 	virtioFsSocket                         = "virtiofsd.sock" | ||||||
| @@ -272,6 +274,28 @@ var clhDebugKernelParams = []Param{ | |||||||
| // | // | ||||||
| //########################################################### | //########################################################### | ||||||
|  |  | ||||||
|  | func (clh *cloudHypervisor) getClhAPITimeout() time.Duration { | ||||||
|  | 	// Increase the APITimeout when dealing with a Confidential Guest. | ||||||
|  | 	// The value has been chosen based on tests using `ctr`, and hopefully | ||||||
|  | 	// this change can be dropped in further steps of the development. | ||||||
|  | 	if clh.config.ConfidentialGuest { | ||||||
|  | 		return clhAPITimeoutConfidentialGuest | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return clhAPITimeout | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (clh *cloudHypervisor) getClhStopSandboxTimeout() time.Duration { | ||||||
|  | 	// Increase the StopSandboxTimeout when dealing with a Confidential Guest. | ||||||
|  | 	// The value has been chosen based on tests using `ctr`, and hopefully | ||||||
|  | 	// this change can be dropped in further steps of the development. | ||||||
|  | 	if clh.config.ConfidentialGuest { | ||||||
|  | 		return clhStopSandboxTimeoutConfidentialGuest | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return clhStopSandboxTimeout | ||||||
|  | } | ||||||
|  |  | ||||||
| func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error { | func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error { | ||||||
| 	clh.config = *config | 	clh.config = *config | ||||||
|  |  | ||||||
| @@ -594,7 +618,7 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { | |||||||
| 	span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) | 	span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) | ||||||
| 	defer span.End() | 	defer span.End() | ||||||
|  |  | ||||||
| 	ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) | 	ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) | ||||||
| 	defer cancel() | 	defer cancel() | ||||||
|  |  | ||||||
| 	clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") | 	clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") | ||||||
| @@ -890,7 +914,7 @@ func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, m | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	cl := clh.client() | 	cl := clh.client() | ||||||
| 	ctx, cancelResize := context.WithTimeout(ctx, clhAPITimeout*time.Second) | 	ctx, cancelResize := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) | ||||||
| 	defer cancelResize() | 	defer cancelResize() | ||||||
|  |  | ||||||
| 	resize := *chclient.NewVmResize() | 	resize := *chclient.NewVmResize() | ||||||
| @@ -935,7 +959,7 @@ func (clh *cloudHypervisor) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (c | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Resize (hot-plug) vCPUs via HTTP API | 	// Resize (hot-plug) vCPUs via HTTP API | ||||||
| 	ctx, cancel := context.WithTimeout(ctx, clhAPITimeout*time.Second) | 	ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) | ||||||
| 	defer cancel() | 	defer cancel() | ||||||
| 	resize := *chclient.NewVmResize() | 	resize := *chclient.NewVmResize() | ||||||
| 	resize.DesiredVcpus = func(i int32) *int32 { return &i }(int32(reqVCPUs)) | 	resize.DesiredVcpus = func(i int32) *int32 { return &i }(int32(reqVCPUs)) | ||||||
| @@ -1086,9 +1110,9 @@ func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err e | |||||||
| 	clh.Logger().Debug("Stopping Cloud Hypervisor") | 	clh.Logger().Debug("Stopping Cloud Hypervisor") | ||||||
|  |  | ||||||
| 	if pidRunning && !waitOnly { | 	if pidRunning && !waitOnly { | ||||||
| 		clhRunning, _ := clh.isClhRunning(clhStopSandboxTimeout) | 		clhRunning, _ := clh.isClhRunning(uint(clh.getClhStopSandboxTimeout())) | ||||||
| 		if clhRunning { | 		if clhRunning { | ||||||
| 			ctx, cancel := context.WithTimeout(context.Background(), clhStopSandboxTimeout*time.Second) | 			ctx, cancel := context.WithTimeout(context.Background(), clh.getClhStopSandboxTimeout()*time.Second) | ||||||
| 			defer cancel() | 			defer cancel() | ||||||
| 			if _, err = clh.client().ShutdownVMM(ctx); err != nil { | 			if _, err = clh.client().ShutdownVMM(ctx); err != nil { | ||||||
| 				return err | 				return err | ||||||
| @@ -1096,7 +1120,7 @@ func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err e | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if err = utils.WaitLocalProcess(pid, clhStopSandboxTimeout, syscall.Signal(0), clh.Logger()); err != nil { | 	if err = utils.WaitLocalProcess(pid, uint(clh.getClhStopSandboxTimeout()), syscall.Signal(0), clh.Logger()); err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -1281,7 +1305,7 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) { | |||||||
| 	timeStart := time.Now() | 	timeStart := time.Now() | ||||||
| 	cl := clh.client() | 	cl := clh.client() | ||||||
| 	for { | 	for { | ||||||
| 		ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) | 		ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) | ||||||
| 		defer cancel() | 		defer cancel() | ||||||
| 		_, _, err := cl.VmmPingGet(ctx) | 		_, _, err := cl.VmmPingGet(ctx) | ||||||
| 		if err == nil { | 		if err == nil { | ||||||
| @@ -1547,7 +1571,7 @@ func (clh *cloudHypervisor) cleanupVM(force bool) error { | |||||||
| // vmInfo ask to hypervisor for current VM status | // vmInfo ask to hypervisor for current VM status | ||||||
| func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) { | func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) { | ||||||
| 	cl := clh.client() | 	cl := clh.client() | ||||||
| 	ctx, cancelInfo := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) | 	ctx, cancelInfo := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) | ||||||
| 	defer cancelInfo() | 	defer cancelInfo() | ||||||
|  |  | ||||||
| 	info, _, err := cl.VmInfoGet(ctx) | 	info, _, err := cl.VmInfoGet(ctx) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user