mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-30 01:02:33 +00:00
qemu: use x-ignore-shared to implement vm template
qemu upstream has x-ignore-shared that works similar to our private bypass-shared-memory. We can use it to implement the vm template feature. Fixes: #1798 Depends-on: github.com/kata-containers/packaging#641 Signed-off-by: Peng Tao <bergwolf@hyper.sh>
This commit is contained in:
parent
bc15e44245
commit
d14968b66a
@ -421,8 +421,7 @@ func (q *qemu) setupTemplate(knobs *govmmQemu.Knobs, memory *govmmQemu.Memory) g
|
|||||||
}
|
}
|
||||||
|
|
||||||
if q.config.BootFromTemplate {
|
if q.config.BootFromTemplate {
|
||||||
incoming.MigrationType = govmmQemu.MigrationExec
|
incoming.MigrationType = govmmQemu.MigrationDefer
|
||||||
incoming.Exec = "cat " + q.config.DevicesStatePath
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -585,6 +584,98 @@ func (q *qemu) vhostFSSocketPath(id string) (string, error) {
|
|||||||
return utils.BuildSocketPath(store.RunVMStoragePath, id, vhostFSSocket)
|
return utils.BuildSocketPath(store.RunVMStoragePath, id, vhostFSSocket)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *qemu) virtiofsdArgs(sockPath string) []string {
|
||||||
|
// The daemon will terminate when the vhost-user socket
|
||||||
|
// connection with QEMU closes. Therefore we do not keep track
|
||||||
|
// of this child process after returning from this function.
|
||||||
|
sourcePath := filepath.Join(kataHostSharedDir, q.id)
|
||||||
|
args := []string{
|
||||||
|
"-o", "vhost_user_socket=" + sockPath,
|
||||||
|
"-o", "source=" + sourcePath,
|
||||||
|
"-o", "cache=" + q.config.VirtioFSCache}
|
||||||
|
if q.config.Debug {
|
||||||
|
args = append(args, "-d")
|
||||||
|
} else {
|
||||||
|
args = append(args, "-f")
|
||||||
|
}
|
||||||
|
|
||||||
|
return args
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) setupVirtiofsd(timeout int) (remain int, err error) {
|
||||||
|
sockPath, err := q.vhostFSSocketPath(q.id)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.Command(q.config.VirtioFSDaemon, q.virtiofsdArgs(sockPath)...)
|
||||||
|
stderr, err := cmd.StderrPipe()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = cmd.Start(); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
cmd.Process.Kill()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for socket to become available
|
||||||
|
sockReady := make(chan error, 1)
|
||||||
|
timeStart := time.Now()
|
||||||
|
go func() {
|
||||||
|
scanner := bufio.NewScanner(stderr)
|
||||||
|
var sent bool
|
||||||
|
for scanner.Scan() {
|
||||||
|
if q.config.Debug {
|
||||||
|
q.Logger().WithField("source", "virtiofsd").Debug(scanner.Text())
|
||||||
|
}
|
||||||
|
if !sent && strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") {
|
||||||
|
sockReady <- nil
|
||||||
|
sent = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !sent {
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
sockReady <- err
|
||||||
|
} else {
|
||||||
|
sockReady <- fmt.Errorf("virtiofsd did not announce socket connection")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q.Logger().Info("virtiofsd quits")
|
||||||
|
q.stopSandbox()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return q.waitVirtiofsd(timeStart, timeout, sockReady,
|
||||||
|
fmt.Sprintf("virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) waitVirtiofsd(start time.Time, timeout int, ready chan error, errMsg string) (int, error) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
timeoutDuration := time.Duration(timeout) * time.Second
|
||||||
|
select {
|
||||||
|
case err = <-ready:
|
||||||
|
case <-time.After(timeoutDuration):
|
||||||
|
err = fmt.Errorf("timed out waiting for %s", errMsg)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now reduce timeout by the elapsed time
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
if elapsed < timeoutDuration {
|
||||||
|
timeout = timeout - int(elapsed.Seconds())
|
||||||
|
} else {
|
||||||
|
timeout = 0
|
||||||
|
}
|
||||||
|
return timeout, nil
|
||||||
|
}
|
||||||
|
|
||||||
// startSandbox will start the Sandbox's VM.
|
// startSandbox will start the Sandbox's VM.
|
||||||
func (q *qemu) startSandbox(timeout int) error {
|
func (q *qemu) startSandbox(timeout int) error {
|
||||||
span, _ := q.trace("startSandbox")
|
span, _ := q.trace("startSandbox")
|
||||||
@ -625,81 +716,10 @@ func (q *qemu) startSandbox(timeout int) error {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
if q.config.SharedFS == config.VirtioFS {
|
if q.config.SharedFS == config.VirtioFS {
|
||||||
sockPath, err := q.vhostFSSocketPath(q.id)
|
timeout, err = q.setupVirtiofsd(timeout)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// The daemon will terminate when the vhost-user socket
|
|
||||||
// connection with QEMU closes. Therefore we do not keep track
|
|
||||||
// of this child process after returning from this function.
|
|
||||||
sourcePath := filepath.Join(kataHostSharedDir, q.id)
|
|
||||||
args := []string{
|
|
||||||
"-o", "vhost_user_socket=" + sockPath,
|
|
||||||
"-o", "source=" + sourcePath,
|
|
||||||
"-o", "cache=" + q.config.VirtioFSCache}
|
|
||||||
if q.config.Debug {
|
|
||||||
args = append(args, "-d")
|
|
||||||
} else {
|
|
||||||
args = append(args, "-f")
|
|
||||||
}
|
|
||||||
cmd := exec.Command(q.config.VirtioFSDaemon, args...)
|
|
||||||
stderr, err := cmd.StderrPipe()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = cmd.Start(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
cmd.Process.Kill()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Wait for socket to become available
|
|
||||||
sockReady := make(chan error, 1)
|
|
||||||
timeStart := time.Now()
|
|
||||||
go func() {
|
|
||||||
scanner := bufio.NewScanner(stderr)
|
|
||||||
var sent bool
|
|
||||||
for scanner.Scan() {
|
|
||||||
if q.config.Debug {
|
|
||||||
q.Logger().WithField("source", "virtiofsd").Debug(scanner.Text())
|
|
||||||
}
|
|
||||||
if !sent && strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") {
|
|
||||||
sockReady <- nil
|
|
||||||
sent = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !sent {
|
|
||||||
if err := scanner.Err(); err != nil {
|
|
||||||
sockReady <- err
|
|
||||||
} else {
|
|
||||||
sockReady <- fmt.Errorf("virtiofsd did not announce socket connection")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
q.Logger().Info("virtiofsd quits")
|
|
||||||
q.stopSandbox()
|
|
||||||
}()
|
|
||||||
timeoutDuration := time.Duration(timeout) * time.Second
|
|
||||||
select {
|
|
||||||
case err = <-sockReady:
|
|
||||||
case <-time.After(timeoutDuration):
|
|
||||||
err = fmt.Errorf("timed out waiting for virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now reduce timeout by the elapsed time
|
|
||||||
elapsed := time.Since(timeStart)
|
|
||||||
if elapsed < timeoutDuration {
|
|
||||||
timeout = timeout - int(elapsed.Seconds())
|
|
||||||
} else {
|
|
||||||
timeout = 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var strErr string
|
var strErr string
|
||||||
@ -709,9 +729,39 @@ func (q *qemu) startSandbox(timeout int) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = q.waitSandbox(timeout) // the virtiofsd deferred checks err's value
|
err = q.waitSandbox(timeout) // the virtiofsd deferred checks err's value
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if q.config.BootFromTemplate {
|
||||||
|
if err = q.bootFromTemplate(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *qemu) bootFromTemplate() error {
|
||||||
|
err := q.qmpSetup()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer q.qmpShutdown()
|
||||||
|
|
||||||
|
err = q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp)
|
||||||
|
if err != nil {
|
||||||
|
q.Logger().WithError(err).Error("set migration ignore shared memory")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
uri := fmt.Sprintf("exec:cat %s", q.config.DevicesStatePath)
|
||||||
|
err = q.qmpMonitorCh.qmp.ExecuteMigrationIncoming(q.qmpMonitorCh.ctx, uri)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return q.waitMigration()
|
||||||
|
}
|
||||||
|
|
||||||
// waitSandbox will wait for the Sandbox's VM to be up and running.
|
// waitSandbox will wait for the Sandbox's VM to be up and running.
|
||||||
func (q *qemu) waitSandbox(timeout int) error {
|
func (q *qemu) waitSandbox(timeout int) error {
|
||||||
span, _ := q.trace("waitSandbox")
|
span, _ := q.trace("waitSandbox")
|
||||||
@ -1482,9 +1532,9 @@ func (q *qemu) saveSandbox() error {
|
|||||||
// BootToBeTemplate sets the VM to be a template that other VMs can clone from. We would want to
|
// BootToBeTemplate sets the VM to be a template that other VMs can clone from. We would want to
|
||||||
// bypass shared memory when saving the VM to a local file through migration exec.
|
// bypass shared memory when saving the VM to a local file through migration exec.
|
||||||
if q.config.BootToBeTemplate {
|
if q.config.BootToBeTemplate {
|
||||||
err := q.arch.setBypassSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp)
|
err := q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
q.Logger().WithError(err).Error("set migration bypass shared memory")
|
q.Logger().WithError(err).Error("set migration ignore shared memory")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1495,6 +1545,10 @@ func (q *qemu) saveSandbox() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return q.waitMigration()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) waitMigration() error {
|
||||||
t := time.NewTimer(qmpMigrationWaitTimeout)
|
t := time.NewTimer(qmpMigrationWaitTimeout)
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
for {
|
for {
|
||||||
|
@ -27,8 +27,6 @@ const defaultQemuMachineType = QemuPC
|
|||||||
|
|
||||||
const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip,nvdimm"
|
const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip,nvdimm"
|
||||||
|
|
||||||
const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"
|
|
||||||
|
|
||||||
const qmpMigrationWaitTimeout = 5 * time.Second
|
const qmpMigrationWaitTimeout = 5 * time.Second
|
||||||
|
|
||||||
var qemuPaths = map[string]string{
|
var qemuPaths = map[string]string{
|
||||||
|
@ -102,8 +102,8 @@ type qemuArch interface {
|
|||||||
// supportGuestMemoryHotplug returns if the guest supports memory hotplug
|
// supportGuestMemoryHotplug returns if the guest supports memory hotplug
|
||||||
supportGuestMemoryHotplug() bool
|
supportGuestMemoryHotplug() bool
|
||||||
|
|
||||||
// setBypassSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
|
// setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
|
||||||
setBypassSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
|
setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type qemuArchBase struct {
|
type qemuArchBase struct {
|
||||||
@ -153,6 +153,8 @@ const (
|
|||||||
|
|
||||||
// QemuCCWVirtio is a QEMU virt machine type for for s390x
|
// QemuCCWVirtio is a QEMU virt machine type for for s390x
|
||||||
QemuCCWVirtio = "s390-ccw-virtio"
|
QemuCCWVirtio = "s390-ccw-virtio"
|
||||||
|
|
||||||
|
qmpCapMigrationIgnoreShared = "x-ignore-shared"
|
||||||
)
|
)
|
||||||
|
|
||||||
// kernelParamsNonDebug is a list of the default kernel
|
// kernelParamsNonDebug is a list of the default kernel
|
||||||
@ -579,10 +581,10 @@ func (q *qemuArchBase) supportGuestMemoryHotplug() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemuArchBase) setBypassSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error {
|
func (q *qemuArchBase) setIgnoreSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error {
|
||||||
err := qmp.ExecSetMigrationCaps(ctx, []map[string]interface{}{
|
err := qmp.ExecSetMigrationCaps(ctx, []map[string]interface{}{
|
||||||
{
|
{
|
||||||
"capability": qmpCapMigrationBypassSharedMemory,
|
"capability": qmpCapMigrationIgnoreShared,
|
||||||
"state": true,
|
"state": true,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
@ -29,8 +29,6 @@ const defaultQemuMachineType = QemuVirt
|
|||||||
|
|
||||||
const qmpMigrationWaitTimeout = 10 * time.Second
|
const qmpMigrationWaitTimeout = 10 * time.Second
|
||||||
|
|
||||||
const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"
|
|
||||||
|
|
||||||
var defaultQemuMachineOptions = "usb=off,accel=kvm,nvdimm,gic-version=" + getGuestGICVersion()
|
var defaultQemuMachineOptions = "usb=off,accel=kvm,nvdimm,gic-version=" + getGuestGICVersion()
|
||||||
|
|
||||||
var qemuPaths = map[string]string{
|
var qemuPaths = map[string]string{
|
||||||
@ -199,7 +197,7 @@ func (q *qemuArm64) appendImage(devices []govmmQemu.Device, path string) ([]govm
|
|||||||
return devices, nil
|
return devices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemuArm64) setBypassSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error {
|
func (q *qemuArm64) setIgnoreSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error {
|
||||||
// bypass-shared-memory not support in arm64 for now
|
// x-ignore-shared not support in arm64 for now
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -30,8 +30,6 @@ const defaultQemuMachineOptions = "accel=kvm,usb=off,cap-cfpc=broken,cap-sbbc=br
|
|||||||
|
|
||||||
const defaultMemMaxPPC64le = 32256 // Restrict MemMax to 32Gb on PPC64le
|
const defaultMemMaxPPC64le = 32256 // Restrict MemMax to 32Gb on PPC64le
|
||||||
|
|
||||||
const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"
|
|
||||||
|
|
||||||
const qmpMigrationWaitTimeout = 5 * time.Second
|
const qmpMigrationWaitTimeout = 5 * time.Second
|
||||||
|
|
||||||
var qemuPaths = map[string]string{
|
var qemuPaths = map[string]string{
|
||||||
|
@ -7,10 +7,11 @@ package virtcontainers
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
govmmQemu "github.com/intel/govmm/qemu"
|
govmmQemu "github.com/intel/govmm/qemu"
|
||||||
"github.com/kata-containers/runtime/virtcontainers/device/config"
|
"github.com/kata-containers/runtime/virtcontainers/device/config"
|
||||||
"github.com/kata-containers/runtime/virtcontainers/types"
|
"github.com/kata-containers/runtime/virtcontainers/types"
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type qemuS390x struct {
|
type qemuS390x struct {
|
||||||
@ -26,8 +27,6 @@ const defaultQemuMachineOptions = "accel=kvm"
|
|||||||
|
|
||||||
const virtioSerialCCW = "virtio-serial-ccw"
|
const virtioSerialCCW = "virtio-serial-ccw"
|
||||||
|
|
||||||
const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"
|
|
||||||
|
|
||||||
const qmpMigrationWaitTimeout = 5 * time.Second
|
const qmpMigrationWaitTimeout = 5 * time.Second
|
||||||
|
|
||||||
var qemuPaths = map[string]string{
|
var qemuPaths = map[string]string{
|
||||||
|
@ -13,7 +13,9 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
govmmQemu "github.com/intel/govmm/qemu"
|
govmmQemu "github.com/intel/govmm/qemu"
|
||||||
"github.com/kata-containers/runtime/virtcontainers/device/config"
|
"github.com/kata-containers/runtime/virtcontainers/device/config"
|
||||||
@ -561,3 +563,50 @@ func createQemuSandboxConfig() (*Sandbox, error) {
|
|||||||
|
|
||||||
return &sandbox, nil
|
return &sandbox, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestQemuVirtiofsdArgs(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
q := &qemu{
|
||||||
|
id: "foo",
|
||||||
|
config: HypervisorConfig{
|
||||||
|
VirtioFSCache: "none",
|
||||||
|
Debug: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
savedKataHostSharedDir := kataHostSharedDir
|
||||||
|
kataHostSharedDir = "test-share-dir"
|
||||||
|
defer func() {
|
||||||
|
kataHostSharedDir = savedKataHostSharedDir
|
||||||
|
}()
|
||||||
|
|
||||||
|
result := "-o vhost_user_socket=bar1 -o source=test-share-dir/foo -o cache=none -d"
|
||||||
|
args := q.virtiofsdArgs("bar1")
|
||||||
|
assert.Equal(strings.Join(args, " "), result)
|
||||||
|
|
||||||
|
q.config.Debug = false
|
||||||
|
result = "-o vhost_user_socket=bar2 -o source=test-share-dir/foo -o cache=none -f"
|
||||||
|
args = q.virtiofsdArgs("bar2")
|
||||||
|
assert.Equal(strings.Join(args, " "), result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQemuWaitVirtiofsd(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
q := &qemu{}
|
||||||
|
|
||||||
|
ready := make(chan error, 1)
|
||||||
|
timeout := 5
|
||||||
|
|
||||||
|
ready <- nil
|
||||||
|
remain, err := q.waitVirtiofsd(time.Now(), timeout, ready, "")
|
||||||
|
assert.Nil(err)
|
||||||
|
assert.True(remain <= timeout)
|
||||||
|
assert.True(remain >= 0)
|
||||||
|
|
||||||
|
timeout = 0
|
||||||
|
remain, err = q.waitVirtiofsd(time.Now(), timeout, ready, "")
|
||||||
|
assert.NotNil(err)
|
||||||
|
assert.True(remain == 0)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user