From c142fa2541d66a814c691e33ecdd0357399d8872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 23 Aug 2022 14:36:21 +0200 Subject: [PATCH 1/5] clh: Lift the sharedFS restriction used with TDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When booting the TDX kernel with `tdx_disable_filter`, as it's been done for QEMU, VirtioFS can work without any issues. Whether this will be part of the upstream kernel or not is a different story, but it easily could make it there as Cloud Hypervisor relies on the VIRTIO_F_IOMMU_PLATFORM feature, which forces the guest to use the DMA API, making these devices compatible with TDX. See Sebastien Boeuf's explanation of this in the 3c973fa7ce208e7113f69424b7574b83f584885d commit: """ By using DMA API, the guest triggers the TDX codepath to share some of the guest memory, in particular the virtqueues and associated buffers so that the VMM and vhost-user backends/processes can access this memory. """ Fixes: #4977 Signed-off-by: Fabiano Fidêncio --- src/runtime/config/configuration-clh.toml.in | 4 --- src/runtime/virtcontainers/clh.go | 34 +------------------- 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index f09c095f0e..59ddf43e12 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -28,10 +28,6 @@ image = "@IMAGEPATH@" # - CPU Hotplug # - Memory Hotplug # - NVDIMM devices -# - SharedFS, such as virtio-fs and virtio-fs-nydus -# -# Requirements: -# * virtio-block used as rootfs, thus the usage of devmapper snapshotter. # # Supported TEEs: # * Intel TDX diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index b14391b932..2514b59465 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -279,11 +279,6 @@ func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error { } func (clh *cloudHypervisor) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { - if !clh.supportsSharedFS() { - clh.Logger().Info("SharedFS is not supported") - return nil, nil - } - virtiofsdSocketPath, err := clh.virtioFsSocketPath(clh.id) if err != nil { return nil, err @@ -319,11 +314,6 @@ func (clh *cloudHypervisor) createVirtiofsDaemon(sharedPath string) (VirtiofsDae } func (clh *cloudHypervisor) setupVirtiofsDaemon(ctx context.Context) error { - if !clh.supportsSharedFS() { - clh.Logger().Info("SharedFS is not supported") - return nil - } - if clh.config.SharedFS == config.Virtio9P { return errors.New("cloud-hypervisor only supports virtio based file sharing") } @@ -347,11 +337,6 @@ func (clh *cloudHypervisor) setupVirtiofsDaemon(ctx context.Context) error { } func (clh *cloudHypervisor) stopVirtiofsDaemon(ctx context.Context) (err error) { - if !clh.supportsSharedFS() { - clh.Logger().Info("SharedFS is not supported") - return nil - } - if clh.state.VirtiofsDaemonPid == 0 { clh.Logger().Warn("The virtiofsd had stopped") return nil @@ -368,11 +353,6 @@ func (clh *cloudHypervisor) stopVirtiofsDaemon(ctx context.Context) (err error) } func (clh *cloudHypervisor) loadVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { - if !clh.supportsSharedFS() { - clh.Logger().Info("SharedFS is not supported") - return nil, nil - } - virtiofsdSocketPath, err := clh.virtioFsSocketPath(clh.id) if err != nil { return nil, err @@ -389,12 +369,6 @@ func (clh *cloudHypervisor) nydusdAPISocketPath(id string) (string, error) { return utils.BuildSocketPath(clh.config.VMStorePath, id, nydusdAPISock) } -func (clh *cloudHypervisor) supportsSharedFS() bool { - caps := clh.Capabilities(clh.ctx) - - return caps.IsFsSharingSupported() -} - func (clh *cloudHypervisor) enableProtection() error { protection, err := availableGuestProtection() if err != nil { @@ -1061,10 +1035,6 @@ func (clh *cloudHypervisor) AddDevice(ctx context.Context, devInfo interface{}, case types.HybridVSock: clh.addVSock(defaultGuestVSockCID, v.UdsPath) case types.Volume: - if !clh.supportsSharedFS() { - return fmt.Errorf("SharedFS is not supported") - } - err = clh.addVolume(v) default: clh.Logger().WithField("function", "AddDevice").Warnf("Add device of type %v is not supported.", v) @@ -1091,9 +1061,7 @@ func (clh *cloudHypervisor) Capabilities(ctx context.Context) types.Capabilities clh.Logger().WithField("function", "Capabilities").Info("get Capabilities") var caps types.Capabilities - if !clh.config.ConfidentialGuest { - caps.SetFsSharingSupport() - } + caps.SetFsSharingSupport() caps.SetBlockDeviceHotplugSupport() return caps } From 9f0a57c0ebd25d4dc1b348c49733dc36eac2a3f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 23 Aug 2022 15:42:28 +0200 Subject: [PATCH 2/5] clh: Increase API and SandboxStop timeouts for TDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While doing tests using `ctr`, I've noticed that I've been hitting those timeouts more frequently than expected. Till we find the root cause of the issue (which is *not* in the Kata Containers), let's increase the timeouts when dealing with a Confidential Guest. Fixes: #4978 Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 58 ++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 2514b59465..fb7f09a30b 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -66,17 +66,19 @@ const ( const ( // Values are mandatory by http API // Values based on: - clhTimeout = 10 - clhAPITimeout = 1 + clhTimeout = 10 + clhAPITimeout = 1 + clhAPITimeoutConfidentialGuest = 10 // Timeout for hot-plug - hotplug devices can take more time, than usual API calls // Use longer time timeout for it. - clhHotPlugAPITimeout = 5 - clhStopSandboxTimeout = 3 - clhSocket = "clh.sock" - clhAPISocket = "clh-api.sock" - virtioFsSocket = "virtiofsd.sock" - defaultClhPath = "/usr/local/bin/cloud-hypervisor" - virtioFsCacheAlways = "always" + clhHotPlugAPITimeout = 5 + clhStopSandboxTimeout = 3 + clhStopSandboxTimeoutConfidentialGuest = 5 + clhSocket = "clh.sock" + clhAPISocket = "clh-api.sock" + virtioFsSocket = "virtiofsd.sock" + defaultClhPath = "/usr/local/bin/cloud-hypervisor" + virtioFsCacheAlways = "always" ) // Interface that hides the implementation of openAPI client @@ -272,6 +274,28 @@ var clhDebugKernelParams = []Param{ // //########################################################### +func (clh *cloudHypervisor) getClhAPITimeout() time.Duration { + // Increase the APITimeout when dealing with a Confidential Guest. + // The value has been chosen based on tests using `ctr`, and hopefully + // this change can be dropped in further steps of the development. + if clh.config.ConfidentialGuest { + return clhAPITimeoutConfidentialGuest + } + + return clhAPITimeout +} + +func (clh *cloudHypervisor) getClhStopSandboxTimeout() time.Duration { + // Increase the StopSandboxTimeout when dealing with a Confidential Guest. + // The value has been chosen based on tests using `ctr`, and hopefully + // this change can be dropped in further steps of the development. + if clh.config.ConfidentialGuest { + return clhStopSandboxTimeoutConfidentialGuest + } + + return clhStopSandboxTimeout +} + func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error { clh.config = *config @@ -594,7 +618,7 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() - ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) defer cancel() clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") @@ -890,7 +914,7 @@ func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, m } cl := clh.client() - ctx, cancelResize := context.WithTimeout(ctx, clhAPITimeout*time.Second) + ctx, cancelResize := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) defer cancelResize() resize := *chclient.NewVmResize() @@ -935,7 +959,7 @@ func (clh *cloudHypervisor) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (c } // Resize (hot-plug) vCPUs via HTTP API - ctx, cancel := context.WithTimeout(ctx, clhAPITimeout*time.Second) + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) defer cancel() resize := *chclient.NewVmResize() resize.DesiredVcpus = func(i int32) *int32 { return &i }(int32(reqVCPUs)) @@ -1086,9 +1110,9 @@ func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err e clh.Logger().Debug("Stopping Cloud Hypervisor") if pidRunning && !waitOnly { - clhRunning, _ := clh.isClhRunning(clhStopSandboxTimeout) + clhRunning, _ := clh.isClhRunning(uint(clh.getClhStopSandboxTimeout())) if clhRunning { - ctx, cancel := context.WithTimeout(context.Background(), clhStopSandboxTimeout*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), clh.getClhStopSandboxTimeout()*time.Second) defer cancel() if _, err = clh.client().ShutdownVMM(ctx); err != nil { return err @@ -1096,7 +1120,7 @@ func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err e } } - if err = utils.WaitLocalProcess(pid, clhStopSandboxTimeout, syscall.Signal(0), clh.Logger()); err != nil { + if err = utils.WaitLocalProcess(pid, uint(clh.getClhStopSandboxTimeout()), syscall.Signal(0), clh.Logger()); err != nil { return err } @@ -1281,7 +1305,7 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) { timeStart := time.Now() cl := clh.client() for { - ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) defer cancel() _, _, err := cl.VmmPingGet(ctx) if err == nil { @@ -1547,7 +1571,7 @@ func (clh *cloudHypervisor) cleanupVM(force bool) error { // vmInfo ask to hypervisor for current VM status func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) { cl := clh.client() - ctx, cancelInfo := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) + ctx, cancelInfo := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) defer cancelInfo() info, _, err := cl.VmInfoGet(ctx) From c0cb3cd4d86369b71b2c7716f42c3cb6ef1759df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 24 Aug 2022 10:46:18 +0200 Subject: [PATCH 3/5] clh: Avoid crashing when memory hotplug is not allowed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime will crash when trying to resize memory when memory hotplug is not allowed. This happens because we cannot simply set the hotplug amount to zero, leading is to not set memory hotplug at all, and later then trying to access the value of a nil pointer. Fixes: #4979 Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index fb7f09a30b..76aaf08d2a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -877,7 +877,13 @@ func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, m return 0, MemoryDevice{}, err } - maxHotplugSize := utils.MemUnit(*info.Config.Memory.HotplugSize) * utils.Byte + // HotplugSize can be nil in cases where Hotplug is not supported, as Cloud Hypervisor API + // does *not* allow us to set 0 as the HotplugSize. + maxHotplugSize := 0 * utils.Byte + if info.Config.Memory.HotplugSize != nil { + maxHotplugSize = utils.MemUnit(*info.Config.Memory.HotplugSize) * utils.Byte + } + if reqMemMB > uint32(maxHotplugSize.ToMiB()) { reqMemMB = uint32(maxHotplugSize.ToMiB()) } From d4b67613f0c2b0d00f1754748d2966a621f1b85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 24 Aug 2022 17:04:59 +0200 Subject: [PATCH 4/5] clh: Use HVC console with TDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As right now the TDX guest kernel doesn't support "serial" console, let's switch to using HVC in this case. Fixes: #4980 Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/clh.go | 44 ++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 76aaf08d2a..3a02a645a9 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -264,7 +264,14 @@ var clhKernelParams = []Param{ } var clhDebugKernelParams = []Param{ - {"console", "ttyS0,115200n8"}, // enable serial console + {"console", "ttyS0,115200n8"}, // enable serial console +} + +var clhDebugConfidentialGuestKernelParams = []Param{ + {"console", "hvc0"}, // enable HVC console +} + +var clhDebugKernelParamsCommon = []Param{ {"systemd.log_target", "console"}, // send loggng to the console } @@ -496,7 +503,12 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Followed by extra debug parameters if debug enabled in configuration file if clh.config.Debug { - params = append(params, clhDebugKernelParams...) + if clh.config.ConfidentialGuest { + params = append(params, clhDebugConfidentialGuestKernelParams...) + } else { + params = append(params, clhDebugKernelParams...) + } + params = append(params, clhDebugKernelParamsCommon...) } else { // start the guest kernel with 'quiet' in non-debug mode params = append(params, Param{"quiet", ""}) @@ -550,15 +562,27 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Payload.SetInitramfs(initrdPath) } - // Use serial port as the guest console only in debug mode, - // so that we can gather early OS booting log - if clh.config.Debug { - clh.vmconfig.Serial = chclient.NewConsoleConfig(cctTTY) - } else { - clh.vmconfig.Serial = chclient.NewConsoleConfig(cctOFF) - } + if clh.config.ConfidentialGuest { + // Use HVC as the guest console only in debug mode, only + // for Confidential Guests + if clh.config.Debug { + clh.vmconfig.Console = chclient.NewConsoleConfig(cctTTY) + } else { + clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF) + } - clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF) + clh.vmconfig.Serial = chclient.NewConsoleConfig(cctOFF) + } else { + // Use serial port as the guest console only in debug mode, + // so that we can gather early OS booting log + if clh.config.Debug { + clh.vmconfig.Serial = chclient.NewConsoleConfig(cctTTY) + } else { + clh.vmconfig.Serial = chclient.NewConsoleConfig(cctOFF) + } + + clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF) + } cpu_topology := chclient.NewCpuTopology() cpu_topology.ThreadsPerCore = func(i int32) *int32 { return &i }(1) From dc90eae17b861d514694ebafaa5332bba98d5194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 23 Aug 2022 16:04:41 +0200 Subject: [PATCH 5/5] qemu: Drop unnecessary `tdx_guest` kernel parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current TDX kernel used with Kata Containers, `tdx_guest` is not needed, as TDX_GUEST is now a kernel configuration. With this in mind, let's just drop the kernel parameter. Fixes: #4981 Signed-off-by: Fabiano Fidêncio --- src/runtime/virtcontainers/qemu_amd64.go | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 9e9960046a..b7680a3180 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -210,7 +210,6 @@ func (q *qemuAmd64) enableProtection() error { q.qemuMachine.Options += "," } q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx" - q.kernelParams = append(q.kernelParams, Param{"tdx_guest", ""}) logger.Info("Enabling TDX guest protection") return nil case sevProtection: