diff --git a/Gopkg.lock b/Gopkg.lock index 4262b87e0..e7e754059 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -378,11 +378,11 @@ revision = "2f1d1f20f75d5404f53b9edf6b53ed5505508675" [[projects]] - digest = "1:f49a8b8840fe74235515e5bdb18b641b9d1887f3af7a38bec8f6998994e5ffca" + digest = "1:2690f7d938dd074d30aa60849f26bcb9f5dd3ad88220a1f24c895c0df63fd1ae" name = "github.com/intel/govmm" packages = ["qemu"] pruneopts = "NUT" - revision = "35a8fd3ca9a36461b7dcf24e3b292f6e1ea4e71a" + revision = "b3e7a9e78463a10f2a19e1a966c76a3afb215781" [[projects]] digest = "1:36dfd4701e98a9d8371dd3053e32d4f29e82b07bcc9e655db82138f9273bcb0f" diff --git a/Gopkg.toml b/Gopkg.toml index ddcc66a9f..4444b82f0 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -48,7 +48,7 @@ [[constraint]] name = "github.com/intel/govmm" - revision = "35a8fd3ca9a36461b7dcf24e3b292f6e1ea4e71a" + revision = "b3e7a9e78463a10f2a19e1a966c76a3afb215781" [[constraint]] name = "github.com/kata-containers/agent" diff --git a/Makefile b/Makefile index 32b4f1d3c..0d6b1b9c7 100644 --- a/Makefile +++ b/Makefile @@ -156,6 +156,11 @@ DEFAULTEXPFEATURES := [] DEFENTROPYSOURCE := /dev/urandom DEFDISABLEBLOCK := false +DEFSHAREDFS := virtio-9p +DEFVIRTIOFSDAEMON := +# Default DAX mapping cache size in MiB +DEFVIRTIOFSCACHESIZE := 8192 +DEFVIRTIOFSCACHE := always DEFENABLEIOTHREADS := false DEFENABLEMEMPREALLOC := false DEFENABLEHUGEPAGES := false @@ -320,6 +325,10 @@ USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_FC USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU +USER_VARS += DEFSHAREDFS +USER_VARS += DEFVIRTIOFSDAEMON +USER_VARS += DEFVIRTIOFSCACHESIZE +USER_VARS += DEFVIRTIOFSCACHE USER_VARS += DEFENABLEIOTHREADS USER_VARS += DEFENABLEMEMPREALLOC USER_VARS += DEFENABLEHUGEPAGES @@ -456,6 +465,10 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit -e "s|@DEFDISABLEBLOCK@|$(DEFDISABLEBLOCK)|g" \ -e "s|@DEFBLOCKSTORAGEDRIVER_FC@|$(DEFBLOCKSTORAGEDRIVER_FC)|g" \ -e "s|@DEFBLOCKSTORAGEDRIVER_QEMU@|$(DEFBLOCKSTORAGEDRIVER_QEMU)|g" \ + -e "s|@DEFSHAREDFS@|$(DEFSHAREDFS)|g" \ + -e "s|@DEFVIRTIOFSDAEMON@|$(DEFVIRTIOFSDAEMON)|g" \ + -e "s|@DEFVIRTIOFSCACHESIZE@|$(DEFVIRTIOFSCACHESIZE)|g" \ + -e "s|@DEFVIRTIOFSCACHE@|$(DEFVIRTIOFSCACHE)|g" \ -e "s|@DEFENABLEIOTHREADS@|$(DEFENABLEIOTHREADS)|g" \ -e "s|@DEFENABLEMEMPREALLOC@|$(DEFENABLEMEMPREALLOC)|g" \ -e "s|@DEFENABLEHUGEPAGES@|$(DEFENABLEHUGEPAGES)|g" \ diff --git a/cli/config/configuration-qemu.toml.in b/cli/config/configuration-qemu.toml.in index c7fd600c4..502dfb43a 100644 --- a/cli/config/configuration-qemu.toml.in +++ b/cli/config/configuration-qemu.toml.in @@ -97,6 +97,32 @@ default_memory = @DEFMEMSZ@ # 9pfs is used instead to pass the rootfs. disable_block_device_use = @DEFDISABLEBLOCK@ +# Shared file system type: +# - virtio-9p (default) +# - virtio-fs +shared_fs = "@DEFSHAREDFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Cache mode: +# +# - none +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + # Block storage driver to be used for the hypervisor in case the container # rootfs is backed by a block device. This is virtio-scsi, virtio-blk # or nvdimm. diff --git a/cli/kata-env.go b/cli/kata-env.go index af8c9a751..c8610daa4 100644 --- a/cli/kata-env.go +++ b/cli/kata-env.go @@ -92,6 +92,7 @@ type HypervisorInfo struct { MemorySlots uint32 Debug bool UseVSock bool + SharedFS string } // ProxyInfo stores proxy details @@ -352,6 +353,7 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo { UseVSock: config.HypervisorConfig.UseVSock, MemorySlots: config.HypervisorConfig.MemSlots, EntropySource: config.HypervisorConfig.EntropySource, + SharedFS: config.HypervisorConfig.SharedFS, } } diff --git a/cli/kata-env_test.go b/cli/kata-env_test.go index 21e070f63..1e1b3ee09 100644 --- a/cli/kata-env_test.go +++ b/cli/kata-env_test.go @@ -37,6 +37,7 @@ const testHypervisorVersion = "QEMU emulator version 2.7.0+git.741f430a96-6.1, C var ( hypervisorDebug = false + enableVirtioFS = false proxyDebug = false runtimeDebug = false runtimeTrace = false @@ -91,6 +92,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC enableIOThreads := true hotplugVFIOOnRootBus := true disableNewNetNs := false + sharedFS := "virtio-9p" filesToCreate := []string{ hypervisorPath, @@ -126,6 +128,10 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC return "", oci.RuntimeConfig{}, err } + if enableVirtioFS { + sharedFS = "virtio-fs" + } + hypConfig := katautils.GetDefaultHypervisorConfig() configFileOptions := katatestutils.RuntimeConfigOptions{ @@ -157,6 +163,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC NetmonDebug: netmonDebug, AgentDebug: agentDebug, AgentTrace: agentTrace, + SharedFS: sharedFS, } runtimeConfig := katatestutils.MakeRuntimeConfigFileData(configFileOptions) @@ -321,6 +328,7 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo { MemorySlots: config.HypervisorConfig.MemSlots, Debug: config.HypervisorConfig.Debug, EntropySource: config.HypervisorConfig.EntropySource, + SharedFS: config.HypervisorConfig.SharedFS, } } @@ -498,6 +506,7 @@ func TestEnvGetEnvInfo(t *testing.T) { // options are tested. for _, toggle := range []bool{false, true} { hypervisorDebug = toggle + enableVirtioFS = toggle proxyDebug = toggle runtimeDebug = toggle runtimeTrace = toggle diff --git a/pkg/katatestutils/utils.go b/pkg/katatestutils/utils.go index ef2dcc654..62f6ffc97 100644 --- a/pkg/katatestutils/utils.go +++ b/pkg/katatestutils/utils.go @@ -27,6 +27,7 @@ type RuntimeConfigOptions struct { BlockDeviceDriver string AgentTraceMode string AgentTraceType string + SharedFS string DisableBlock bool EnableIOThreads bool HotplugVFIOOnRootBus bool @@ -61,6 +62,8 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + ` enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + ` guest_hook_path = "` + config.DefaultGuestHookPath + `" + shared_fs = "` + config.SharedFS + `" + virtio_fs_daemon = "/path/to/virtiofsd" [proxy.kata] enable_debug = ` + strconv.FormatBool(config.ProxyDebug) + ` diff --git a/pkg/katautils/config.go b/pkg/katautils/config.go index 99cf9a3cd..8567f78bf 100644 --- a/pkg/katautils/config.go +++ b/pkg/katautils/config.go @@ -92,6 +92,10 @@ type hypervisor struct { MachineType string `toml:"machine_type"` BlockDeviceDriver string `toml:"block_device_driver"` EntropySource string `toml:"entropy_source"` + SharedFS string `toml:"shared_fs"` + VirtioFSDaemon string `toml:"virtio_fs_daemon"` + VirtioFSCache string `toml:"virtio_fs_cache"` + VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"` BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` @@ -326,6 +330,22 @@ func (h hypervisor) blockDeviceDriver() (string, error) { return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers) } +func (h hypervisor) sharedFS() (string, error) { + supportedSharedFS := []string{config.Virtio9P, config.VirtioFS} + + if h.SharedFS == "" { + return config.Virtio9P, nil + } + + for _, fs := range supportedSharedFS { + if fs == h.SharedFS { + return h.SharedFS, nil + } + } + + return "", fmt.Errorf("Invalid hypervisor shared file system %v specified (supported file systems: %v)", h.SharedFS, supportedSharedFS) +} + func (h hypervisor) msize9p() uint32 { if h.Msize9p == 0 { return defaultMsize9p @@ -521,6 +541,16 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + sharedFS, err := h.sharedFS() + if err != nil { + return vc.HypervisorConfig{}, err + } + + if sharedFS == config.VirtioFS && h.VirtioFSDaemon == "" { + return vc.HypervisorConfig{}, + errors.New("cannot enable virtio-fs without daemon path in configuration file") + } + useVSock := false if h.useVSock() { if utils.SupportsVsocks() { @@ -548,6 +578,10 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { EntropySource: h.GetEntropySource(), DefaultBridges: h.defaultBridges(), DisableBlockDeviceUse: h.DisableBlockDeviceUse, + SharedFS: sharedFS, + VirtioFSDaemon: h.VirtioFSDaemon, + VirtioFSCacheSize: h.VirtioFSCacheSize, + VirtioFSCache: h.VirtioFSCache, MemPrealloc: h.MemPrealloc, HugePages: h.HugePages, Mlock: !h.Swap, diff --git a/pkg/katautils/config_test.go b/pkg/katautils/config_test.go index 24b7511ab..ef296c488 100644 --- a/pkg/katautils/config_test.go +++ b/pkg/katautils/config_test.go @@ -83,6 +83,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf enableIOThreads := true hotplugVFIOOnRootBus := true disableNewNetNs := false + sharedFS := "virtio-9p" configFileOptions := ktu.RuntimeConfigOptions{ Hypervisor: "qemu", @@ -113,6 +114,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf NetmonDebug: netmonDebug, AgentDebug: agentDebug, AgentTrace: agentTrace, + SharedFS: sharedFS, } runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions) @@ -160,6 +162,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf MemSlots: defaultMemSlots, EntropySource: defaultEntropySource, GuestHookPath: defaultGuestHookPath, + SharedFS: sharedFS, + VirtioFSDaemon: "/path/to/virtiofsd", } agentConfig := vc.KataAgentConfig{} diff --git a/vendor/github.com/intel/govmm/qemu/qemu.go b/vendor/github.com/intel/govmm/qemu/qemu.go index c2f734bc5..16e268d68 100644 --- a/vendor/github.com/intel/govmm/qemu/qemu.go +++ b/vendor/github.com/intel/govmm/qemu/qemu.go @@ -748,7 +748,7 @@ type VhostUserDevice struct { TypeDevID string //variable QEMU parameter based on value of VhostUserType Address string //used for MAC address in net case Tag string //virtio-fs volume id for mounting inside guest - CacheSize uint32 //virtio-fs DAX cache size in GiB + CacheSize uint32 //virtio-fs DAX cache size in MiB SharedVersions bool //enable virtio-fs shared version metadata VhostUserType DeviceDriver @@ -824,7 +824,7 @@ func (vhostuserDev VhostUserDevice) QemuParams(config *Config) []string { devParams = append(devParams, string(driver)) devParams = append(devParams, fmt.Sprintf("chardev=%s", vhostuserDev.CharDevID)) devParams = append(devParams, fmt.Sprintf("tag=%s", vhostuserDev.Tag)) - devParams = append(devParams, fmt.Sprintf("cache-size=%dG", vhostuserDev.CacheSize)) + devParams = append(devParams, fmt.Sprintf("cache-size=%dM", vhostuserDev.CacheSize)) if vhostuserDev.SharedVersions { devParams = append(devParams, "versiontable=/dev/shm/fuse_shared_versions") } diff --git a/virtcontainers/device/config/config.go b/virtcontainers/device/config/config.go index 13eaeff35..2b5d5518e 100644 --- a/virtcontainers/device/config/config.go +++ b/virtcontainers/device/config/config.go @@ -36,6 +36,9 @@ const ( //VhostUserBlk represents a block vhostuser device type VhostUserBlk = "vhost-user-blk-pci" + + //VhostUserFS represents a virtio-fs vhostuser device type + VhostUserFS = "vhost-user-fs-pci" ) const ( @@ -52,6 +55,14 @@ const ( Nvdimm = "nvdimm" ) +const ( + // Virtio9P means use virtio-9p for the shared file system + Virtio9P = "virtio-9p" + + // VirtioFS means use virtio-fs for the shared file system + VirtioFS = "virtio-fs" +) + // Defining these as a variable instead of a const, to allow // overriding this in the tests. @@ -174,6 +185,11 @@ type VhostUserDeviceAttrs struct { // MacAddress is only meaningful for vhost user net device MacAddress string + + // These are only meaningful for vhost user fs devices + Tag string + CacheSize uint32 + Cache string } // GetHostPathFunc is function pointer used to mock GetHostPath in tests. diff --git a/virtcontainers/device/drivers/vhost_user_fs.go b/virtcontainers/device/drivers/vhost_user_fs.go new file mode 100644 index 000000000..dba493248 --- /dev/null +++ b/virtcontainers/device/drivers/vhost_user_fs.go @@ -0,0 +1,65 @@ +// Copyright (C) 2019 Red Hat, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package drivers + +import ( + "encoding/hex" + + "github.com/kata-containers/runtime/virtcontainers/device/api" + "github.com/kata-containers/runtime/virtcontainers/device/config" + "github.com/kata-containers/runtime/virtcontainers/utils" +) + +// VhostUserFSDevice is a virtio-fs vhost-user device +type VhostUserFSDevice struct { + *GenericDevice + config.VhostUserDeviceAttrs +} + +// Device interface + +func (device *VhostUserFSDevice) Attach(devReceiver api.DeviceReceiver) (err error) { + skip, err := device.bumpAttachCount(true) + if err != nil { + return err + } + if skip { + return nil + } + + defer func() { + if err != nil { + device.bumpAttachCount(false) + } + }() + + // generate a unique ID to be used for hypervisor commandline fields + randBytes, err := utils.GenerateRandomBytes(8) + if err != nil { + return err + } + id := hex.EncodeToString(randBytes) + + device.DevID = id + device.Type = device.DeviceType() + + return devReceiver.AppendDevice(device) +} + +func (device *VhostUserFSDevice) Detach(devReceiver api.DeviceReceiver) error { + _, err := device.bumpAttachCount(false) + return err +} + +func (device *VhostUserFSDevice) DeviceType() config.DeviceType { + return config.VhostUserFS +} + +// GetDeviceInfo returns device information that the device is created based on +func (device *VhostUserFSDevice) GetDeviceInfo() interface{} { + device.Type = device.DeviceType() + return &device.VhostUserDeviceAttrs +} diff --git a/virtcontainers/documentation/api/1.0/api.md b/virtcontainers/documentation/api/1.0/api.md index 0ec8fdc25..1d6077dd5 100644 --- a/virtcontainers/documentation/api/1.0/api.md +++ b/virtcontainers/documentation/api/1.0/api.md @@ -138,6 +138,20 @@ type HypervisorConfig struct { // DisableBlockDeviceUse disallows a block device from being used. DisableBlockDeviceUse bool + // Shared file system type: + // - virtio-9p (default) + // - virtio-fs + SharedFS string + + // VirtioFSDaemon is the virtio-fs vhost-user daemon path + VirtioFSDaemon string + + // VirtioFSCacheSize is the virtio-fs DAX cache size in MiB + VirtioFSCacheSize uint32 + + // VirtioFSCache cache mode for fs version cache or "none" + VirtioFSCache string + // KernelParams are additional guest kernel parameters. KernelParams []Param diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 554679445..d94215de7 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -170,6 +170,9 @@ type HypervisorConfig struct { // MemOffset specifies memory space for nvdimm device MemOffset uint32 + // VirtioFSCacheSize is the DAX cache size in MiB + VirtioFSCacheSize uint32 + // KernelParams are additional guest kernel parameters. KernelParams []Param @@ -215,6 +218,17 @@ type HypervisorConfig struct { // entropy (/dev/random, /dev/urandom or real hardware RNG device) EntropySource string + // Shared file system type: + // - virtio-9p (default) + // - virtio-fs + SharedFS string + + // VirtioFSDaemon is the virtio-fs vhost-user daemon path + VirtioFSDaemon string + + // VirtioFSCache cache mode for fs version cache or "none" + VirtioFSCache string + // customAssets is a map of assets. // Each value in that map takes precedence over the configured assets. // For example, if there is a value for the "kernel" key in this map, diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 1477025ea..956635dcc 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -65,22 +65,25 @@ var ( mountGuest9pTag = "kataShared" kataGuestSandboxDir = "/run/kata-containers/sandbox/" type9pFs = "9p" + typeVirtioFS = "virtio_fs" vsockSocketScheme = "vsock" // port numbers below 1024 are called privileged ports. Only a process with // CAP_NET_BIND_SERVICE capability may bind to these port numbers. - vSockPort = 1024 - kata9pDevType = "9p" - kataMmioBlkDevType = "mmioblk" - kataBlkDevType = "blk" - kataSCSIDevType = "scsi" - kataNvdimmDevType = "nvdimm" - sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} - shmDir = "shm" - kataEphemeralDevType = "ephemeral" - ephemeralPath = filepath.Join(kataGuestSandboxDir, kataEphemeralDevType) - grpcMaxDataSize = int64(1024 * 1024) - localDirOptions = []string{"mode=0777"} - maxHostnameLen = 64 + vSockPort = 1024 + kata9pDevType = "9p" + kataMmioBlkDevType = "mmioblk" + kataBlkDevType = "blk" + kataSCSIDevType = "scsi" + kataNvdimmDevType = "nvdimm" + kataVirtioFSDevType = "virtio-fs" + sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} + sharedDirVirtioFSOptions = []string{"default_permissions,allow_other,rootmode=040000,user_id=0,group_id=0,dax,tag=" + mountGuest9pTag, "nodev"} + shmDir = "shm" + kataEphemeralDevType = "ephemeral" + ephemeralPath = filepath.Join(kataGuestSandboxDir, kataEphemeralDevType) + grpcMaxDataSize = int64(1024 * 1024) + localDirOptions = []string{"mode=0777"} + maxHostnameLen = 64 ) const ( @@ -738,22 +741,34 @@ func (k *kataAgent) startSandbox(sandbox *Sandbox) error { // append 9p shared volume to storages only if filesystem sharing is supported if caps.IsFsSharingSupported() { - sharedDir9pOptions = append(sharedDir9pOptions, fmt.Sprintf("msize=%d", sandbox.config.HypervisorConfig.Msize9p)) - // We mount the shared directory in a predefined location // in the guest. // This is where at least some of the host config files // (resolv.conf, etc...) and potentially all container // rootfs will reside. - sharedVolume := &grpc.Storage{ - Driver: kata9pDevType, - Source: mountGuest9pTag, - MountPoint: kataGuestSharedDir, - Fstype: type9pFs, - Options: sharedDir9pOptions, - } + if sandbox.config.HypervisorConfig.SharedFS == config.VirtioFS { + sharedVolume := &grpc.Storage{ + Driver: kataVirtioFSDevType, + Source: "none", + MountPoint: kataGuestSharedDir, + Fstype: typeVirtioFS, + Options: sharedDirVirtioFSOptions, + } - storages = append(storages, sharedVolume) + storages = append(storages, sharedVolume) + } else { + sharedDir9pOptions = append(sharedDir9pOptions, fmt.Sprintf("msize=%d", sandbox.config.HypervisorConfig.Msize9p)) + + sharedVolume := &grpc.Storage{ + Driver: kata9pDevType, + Source: mountGuest9pTag, + MountPoint: kataGuestSharedDir, + Fstype: type9pFs, + Options: sharedDir9pOptions, + } + + storages = append(storages, sharedVolume) + } } if sandbox.shmSize > 0 { diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index cea8d7623..1217dfc32 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -6,13 +6,16 @@ package virtcontainers import ( + "bufio" "context" + "encoding/hex" "encoding/json" "errors" "fmt" "io/ioutil" "math" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -88,6 +91,7 @@ type qemu struct { const ( consoleSocket = "console.sock" qmpSocket = "qmp.sock" + vhostFSSocket = "vhost-fs.sock" qmpCapErrMsg = "Failed to negoatiate QMP capabilities" qmpExecCatCmd = "exec:cat" @@ -541,6 +545,10 @@ func (q *qemu) createSandbox(ctx context.Context, id string, hypervisorConfig *H return nil } +func (q *qemu) vhostFSSocketPath(id string) (string, error) { + return utils.BuildSocketPath(store.RunVMStoragePath, id, vhostFSSocket) +} + // startSandbox will start the Sandbox's VM. func (q *qemu) startSandbox(timeout int) error { span, _ := q.trace("startSandbox") @@ -580,13 +588,77 @@ func (q *qemu) startSandbox(timeout int) error { } }() + if q.config.SharedFS == config.VirtioFS { + sockPath, err := q.vhostFSSocketPath(q.id) + if err != nil { + return err + } + + // The daemon will terminate when the vhost-user socket + // connection with QEMU closes. Therefore we do not keep track + // of this child process after returning from this function. + sourcePath := filepath.Join(kataHostSharedDir, q.id) + cmd := exec.Command(q.config.VirtioFSDaemon, + "-o", "vhost_user_socket="+sockPath, + "-o", "source="+sourcePath, + "-o", "cache="+q.config.VirtioFSCache) + stderr, err := cmd.StderrPipe() + if err != nil { + return err + } + + if err = cmd.Start(); err != nil { + return err + } + defer func() { + if err != nil { + cmd.Process.Kill() + } + }() + + // Wait for socket to become available + sockReady := make(chan error, 1) + timeStart := time.Now() + go func() { + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + if strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") { + sockReady <- nil + return + } + } + if err := scanner.Err(); err != nil { + sockReady <- err + } + sockReady <- fmt.Errorf("virtiofsd did not announce socket connection") + }() + timeoutDuration := time.Duration(timeout) * time.Second + select { + case err = <-sockReady: + case <-time.After(timeoutDuration): + err = fmt.Errorf("timed out waiting for virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath) + } + if err != nil { + return err + } + + // Now reduce timeout by the elapsed time + elapsed := time.Since(timeStart) + if elapsed < timeoutDuration { + timeout = timeout - int(elapsed.Seconds()) + } else { + timeout = 0 + } + } + var strErr string strErr, err = govmmQemu.LaunchQemu(q.qemuConfig, newQMPLogger()) if err != nil { return fmt.Errorf("%s", strErr) } - return q.waitSandbox(timeout) + err = q.waitSandbox(timeout) // the virtiofsd deferred checks err's value + return err } // waitSandbox will wait for the Sandbox's VM to be up and running. @@ -1288,7 +1360,36 @@ func (q *qemu) addDevice(devInfo interface{}, devType deviceType) error { switch v := devInfo.(type) { case types.Volume: - q.qemuConfig.Devices = q.arch.append9PVolume(q.qemuConfig.Devices, v) + if q.config.SharedFS == config.VirtioFS { + q.Logger().WithField("volume-type", "virtio-fs").Info("adding volume") + + var randBytes []byte + randBytes, err = utils.GenerateRandomBytes(8) + if err != nil { + return err + } + id := hex.EncodeToString(randBytes) + + var sockPath string + sockPath, err = q.vhostFSSocketPath(q.id) + if err != nil { + return err + } + + vhostDev := config.VhostUserDeviceAttrs{ + Tag: v.MountTag, + Type: config.VhostUserFS, + CacheSize: q.config.VirtioFSCacheSize, + Cache: q.config.VirtioFSCache, + } + vhostDev.SocketPath = sockPath + vhostDev.DevID = id + + q.qemuConfig.Devices, err = q.arch.appendVhostUserDevice(q.qemuConfig.Devices, vhostDev) + } else { + q.Logger().WithField("volume-type", "virtio-9p").Info("adding volume") + q.qemuConfig.Devices = q.arch.append9PVolume(q.qemuConfig.Devices, v) + } case types.Socket: q.qemuConfig.Devices = q.arch.appendSocket(q.qemuConfig.Devices, v) case kataVSOCK: diff --git a/virtcontainers/qemu_arch_base.go b/virtcontainers/qemu_arch_base.go index f4a9e69d0..47268c0eb 100644 --- a/virtcontainers/qemu_arch_base.go +++ b/virtcontainers/qemu_arch_base.go @@ -527,6 +527,10 @@ func (q *qemuArchBase) appendVhostUserDevice(devices []govmmQemu.Device, attr co case config.VhostUserSCSI: qemuVhostUserDevice.TypeDevID = utils.MakeNameID("scsi", attr.DevID, maxDevIDSize) case config.VhostUserBlk: + case config.VhostUserFS: + qemuVhostUserDevice.TypeDevID = utils.MakeNameID("fs", attr.DevID, maxDevIDSize) + qemuVhostUserDevice.Tag = attr.Tag + qemuVhostUserDevice.CacheSize = attr.CacheSize } qemuVhostUserDevice.VhostUserType = govmmQemu.DeviceDriver(attr.Type) diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 3ade96204..4c4bf2cd3 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1737,7 +1737,7 @@ func (s *Sandbox) DecrementSandboxBlockIndex() error { // Sandbox implement DeviceReceiver interface from device/api/interface.go func (s *Sandbox) AppendDevice(device api.Device) error { switch device.DeviceType() { - case config.VhostUserSCSI, config.VhostUserNet, config.VhostUserBlk: + case config.VhostUserSCSI, config.VhostUserNet, config.VhostUserBlk, config.VhostUserFS: return s.hypervisor.addDevice(device.GetDeviceInfo().(*config.VhostUserDeviceAttrs), vhostuserDev) } return fmt.Errorf("unsupported device type")