persist: baseline persist data format

Fixes #803

The disk persist data should be "versioned" and baselined, any modification in
persist data should be considered potential break of backward compatibility.

Signed-off-by: Wei Zhang <zhangwei555@huawei.com>
This commit is contained in:
Wei Zhang 2018-10-31 11:19:07 +08:00
parent 925193fb3e
commit e14ffb40cf
6 changed files with 584 additions and 0 deletions

View File

@ -0,0 +1,227 @@
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package persistapi
// Param is a key/value representation for hypervisor and kernel parameters.
type Param struct {
Key string
Value string
}
// Asset saves hypervisor asset
type Asset struct {
Path string `json:"path"`
Custom bool `json:"bool"`
}
// HypervisorConfig saves configurations of sandbox hypervisor
type HypervisorConfig struct {
// NumVCPUs specifies default number of vCPUs for the VM.
NumVCPUs uint32
//DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM.
DefaultMaxVCPUs uint32
// DefaultMem specifies default memory size in MiB for the VM.
MemorySize uint32
// DefaultBridges specifies default number of bridges for the VM.
// Bridges can be used to hot plug devices
DefaultBridges uint32
// Msize9p is used as the msize for 9p shares
Msize9p uint32
// MemSlots specifies default memory slots the VM.
MemSlots uint32
// MemOffset specifies memory space for nvdimm device
MemOffset uint32
// KernelParams are additional guest kernel parameters.
KernelParams []Param
// HypervisorParams are additional hypervisor parameters.
HypervisorParams []Param
// KernelPath is the guest kernel host path.
KernelPath string
// ImagePath is the guest image host path.
ImagePath string
// InitrdPath is the guest initrd image host path.
// ImagePath and InitrdPath cannot be set at the same time.
InitrdPath string
// FirmwarePath is the bios host path
FirmwarePath string
// MachineAccelerators are machine specific accelerators
MachineAccelerators string
// HypervisorPath is the hypervisor executable host path.
HypervisorPath string
// BlockDeviceDriver specifies the driver to be used for block device
// either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver
BlockDeviceDriver string
// HypervisorMachineType specifies the type of machine being
// emulated.
HypervisorMachineType string
// MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or
// BootFromTemplate is true.
MemoryPath string
// DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or
// BootFromTemplate is true.
DevicesStatePath string
// EntropySource is the path to a host source of
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
EntropySource string
// customAssets is a map of assets.
// Each value in that map takes precedence over the configured assets.
// For example, if there is a value for the "kernel" key in this map,
// it will be used for the sandbox's kernel path instead of KernelPath.
CustomAssets map[string]*Asset
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet bool
// BlockDeviceCacheDirect specifies cache-related options for block devices.
// Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
BlockDeviceCacheDirect bool
// BlockDeviceCacheNoflush specifies cache-related options for block devices.
// Denotes whether flush requests for the device are ignored.
BlockDeviceCacheNoflush bool
// DisableBlockDeviceUse disallows a block device from being used.
DisableBlockDeviceUse bool
// EnableIOThreads enables IO to be processed in a separate thread.
// Supported currently for virtio-scsi driver.
EnableIOThreads bool
// Debug changes the default hypervisor and kernel parameters to
// enable debug output where available.
Debug bool
// MemPrealloc specifies if the memory should be pre-allocated
MemPrealloc bool
// HugePages specifies if the memory should be pre-allocated from huge pages
HugePages bool
// Realtime Used to enable/disable realtime
Realtime bool
// Mlock is used to control memory locking when Realtime is enabled
// Realtime=true and Mlock=false, allows for swapping out of VM memory
// enabling higher density
Mlock bool
// DisableNestingChecks is used to override customizations performed
// when running on top of another VMM.
DisableNestingChecks bool
// UseVSock use a vsock for agent communication
UseVSock bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// BootToBeTemplate used to indicate if the VM is created to be a template VM
BootToBeTemplate bool
// BootFromTemplate used to indicate if the VM should be created from a template VM
BootFromTemplate bool
// DisableVhostNet is used to indicate if host supports vhost_net
DisableVhostNet bool
// GuestHookPath is the path within the VM that will be used for 'drop-in' hooks
GuestHookPath string
}
// KataAgentConfig is a structure storing information needed
// to reach the Kata Containers agent.
type KataAgentConfig struct {
LongLiveConn bool
UseVSock bool
}
// HyperstartConfig is a structure storing information needed for
// hyperstart agent initialization.
type HyperstartConfig struct {
SockCtlName string
SockTtyName string
}
// ProxyConfig is a structure storing information needed from any
// proxy in order to be properly initialized.
type ProxyConfig struct {
Path string
Debug bool
}
// ShimConfig is the structure providing specific configuration
// for shim implementation.
type ShimConfig struct {
Path string
Debug bool
}
// NetworkConfig is the network configuration related to a network.
type NetworkConfig struct {
}
// SandboxConfig is a sandbox configuration.
// Refs: virtcontainers/sandbox.go:SandboxConfig
type SandboxConfig struct {
HypervisorType string
HypervisorConfig HypervisorConfig
// only one agent config can be non-nil according to agent type
AgentType string
KataAgentConfig *KataAgentConfig `json:",omitempty"`
HyperstartConfig *HyperstartConfig `json:",omitempty"`
ProxyType string
ProxyConfig ProxyConfig
ShimType string
KataShimConfig ShimConfig
NetworkModel string
NetworkConfig NetworkConfig
ShmSize uint64
// SharePidNs sets all containers to share the same sandbox level pid namespace.
SharePidNs bool
// Stateful keeps sandbox resources in memory across APIs. Users will be responsible
// for calling Release() to release the memory resources.
Stateful bool
// SystemdCgroup enables systemd cgroup support
SystemdCgroup bool
// Experimental enables experimental features
Experimental bool
// Information for fields not saved:
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
// if you know this data needs to persist, please gives it
// a specific field
}

View File

@ -0,0 +1,112 @@
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package persistapi
import (
"os"
"time"
)
// ============= container level resources =============
// DeviceMap saves how host device maps to container device
// one hypervisor device can be
// Refs: virtcontainers/container.go:ContainerDevice
type DeviceMap struct {
// ID reference to VM device
ID string
// ContainerPath is device path displayed in container
ContainerPath string
// FileMode permission bits for the device.
FileMode os.FileMode
// UID is user ID in the container namespace
UID uint32
// GID is group ID in the container namespace
GID uint32
}
// Mount describes a container mount.
type Mount struct {
Source string
Destination string
// Type specifies the type of filesystem to mount.
Type string
// Options list all the mount options of the filesystem.
Options []string
// HostPath used to store host side bind mount path
HostPath string
// ReadOnly specifies if the mount should be read only or not
ReadOnly bool
// BlockDeviceID represents block device that is attached to the
// VM in case this mount is a block device file or a directory
// backed by a block device.
BlockDeviceID string
}
// RootfsState saves state of container rootfs
type RootfsState struct {
// BlockDeviceID represents container rootfs block device ID
// when backed by devicemapper
BlockDeviceID string
// RootFStype is file system of the rootfs incase it is block device
FsType string
}
// Process gathers data related to a container process.
// Refs: virtcontainers/container.go:Process
type Process struct {
// Token is the process execution context ID. It must be
// unique per sandbox.
// Token is used to manipulate processes for containers
// that have not started yet, and later identify them
// uniquely within a sandbox.
Token string
// Pid is the process ID as seen by the host software
// stack, e.g. CRI-O, containerd. This is typically the
// shim PID.
Pid int
StartTime time.Time
}
// ContainerState represents container state
type ContainerState struct {
// State is container running status
State string
// Rootfs contains information of container rootfs
Rootfs RootfsState
// CgroupPath is the cgroup hierarchy where sandbox's processes
// including the hypervisor are placed.
CgroupPath string `json:"cgroupPath,omitempty"`
// DeviceMaps is mapping between sandbox device to dest in container
DeviceMaps []DeviceMap
// Mounts is mount info from OCI spec
Mounts []Mount
// Process on host representing container process
// FIXME: []Process or Process ?
Process []Process
// BundlePath saves container OCI config.json, which can be unmarshaled
// and translated to "CompatOCISpec"
BundlePath string
}

View File

@ -0,0 +1,100 @@
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package persistapi
// ============= sandbox level resources =============
// BlockDrive represents a block storage drive which may be used in case the storage
// driver has an underlying block storage device.
type BlockDrive struct {
// File is the path to the disk-image/device which will be used with this drive
File string
// Format of the drive
Format string
// ID is used to identify this drive in the hypervisor options.
ID string
// Index assigned to the drive. In case of virtio-scsi, this is used as SCSI LUN index
Index int
// PCIAddr is the PCI address used to identify the slot at which the drive is attached.
PCIAddr string
// SCSI Address of the block device, in case the device is attached using SCSI driver
// SCSI address is in the format SCSI-Id:LUN
SCSIAddr string
// VirtPath at which the device appears inside the VM, outside of the container mount namespace
VirtPath string
}
// VFIODev represents a VFIO drive used for hotplugging
type VFIODev struct {
// ID is used to identify this drive in the hypervisor options.
ID string
// Type of VFIO device
Type string
// BDF (Bus:Device.Function) of the PCI address
BDF string
// Sysfsdev of VFIO mediated device
SysfsDev string
}
// VhostUserDeviceAttrs represents data shared by most vhost-user devices
type VhostUserDeviceAttrs struct {
DevID string
SocketPath string
Type string
// MacAddress is only meaningful for vhost user net device
MacAddress string
}
// DeviceState is sandbox level resource which represents host devices
// plugged to hypervisor, one Device can be shared among containers in POD
// Refs: virtcontainers/device/drivers/generic.go:GenericDevice
type DeviceState struct {
ID string
// Type is used to specify driver type
// Refs: virtcontainers/device/config/config.go:DeviceType
Type string
RefCount uint
AttachCount uint
// Type of device: c, b, u or p
// c , u - character(unbuffered)
// p - FIFO
// b - block(buffered) special file
// More info in mknod(1).
DevType string
// Major, minor numbers for device.
Major int64
Minor int64
// DriverOptions is specific options for each device driver
// for example, for BlockDevice, we can set DriverOptions["blockDriver"]="virtio-blk"
DriverOptions map[string]string
// ============ device driver specific data ===========
// BlockDrive is specific for block device driver
BlockDrive *BlockDrive `json:",omitempty"`
// VFIODev is specific VFIO device driver
VFIODevs []*VFIODev `json:",omitempty"`
// VhostUserDeviceAttrs is specific for vhost-user device driver
VhostUserDev *VhostUserDeviceAttrs `json:",omitempty"`
// ============ end device driver specific data ===========
}

View File

@ -0,0 +1,32 @@
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package persistapi
// ============= sandbox level resources =============
// NetworkEndpoint contains network interface information
type NetworkEndpoint struct {
Type string
// ID used to pass the netdev option to qemu
ID string
// Name of the interface
Name string
// Index of interface
Index int
}
// NetworkInfo contains network information of sandbox
type NetworkInfo struct {
NetNsPath string
NetmonPID int
NetNsCreated bool
InterworkingModel string
Endpoints []NetworkEndpoint
}

View File

@ -0,0 +1,94 @@
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package persistapi
// ============= sandbox level resources =============
// SetFunc is function hook used for setting sandbox/container state
// It can be registered to dynamically set state files when dump
type SetFunc (func(*SandboxState, map[string]ContainerState) error)
// Bridge is a bridge where devices can be hot plugged
type Bridge struct {
// Address contains information about devices plugged and its address in the bridge
DeviceAddr map[uint32]string
// Type is the type of the bridge (pci, pcie, etc)
Type string
//ID is used to identify the bridge in the hypervisor
ID string
// Addr is the PCI/e slot of the bridge
Addr int
}
// CPUDevice represents a CPU device which was hot-added in a running VM
type CPUDevice struct {
// ID is used to identify this CPU in the hypervisor options.
ID string
}
// HypervisorState saves state of hypervisor
// Refs: virtcontainers/qemu.go:QemuState
type HypervisorState struct {
Pid int
Bridges []Bridge
// HotpluggedCPUs is the list of CPUs that were hot-added
HotpluggedVCPUs []CPUDevice
HotpluggedMemory int
UUID string
HotplugVFIOOnRootBus bool
// TODO: should this be map[index]bool to indicate available block id??
BlockIndex int
}
// ProxyState save proxy state data
type ProxyState struct {
// Pid of proxy process
Pid int
// URL to connect to proxy
URL string
}
// SandboxState contains state information of sandbox
type SandboxState struct {
// PersistVersion of persist data format, can be used for keeping compatibility later
PersistVersion uint
// State is sandbox running status
State string
// SandboxContainer specifies which container is used to start the sandbox/vm
SandboxContainer string
// GuestMemoryHotplugProbe determines whether guest kernel supports memory hotplug probe interface
GuestMemoryHotplugProbe bool `json:"guestMemoryHotplugProbe"`
// CgroupPath is the cgroup hierarchy where sandbox's processes
// including the hypervisor are placed.
CgroupPath string `json:"cgroupPath,omitempty"`
// GuestMemoryBlockSizeMB is the size of memory block of guestos
GuestMemoryBlockSizeMB uint32
// Devices plugged to sandbox(hypervisor)
Devices []DeviceState
// HypervisorState saves hypervisor specific data
HypervisorState HypervisorState
// ProxyState saves state data of proxy process
ProxyState ProxyState
// Network saves network configuration of sandbox
Network NetworkInfo
// Config saves config information of sandbox
Config SandboxConfig
}

View File

@ -0,0 +1,19 @@
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package persistapi
const (
// CurPersistVersion is current persist data version.
// This can help keep backward compatibility, if you make
// some changes in persistapi package which needs different
// handling process between different runtime versions, you
// should modify `CurPersistVersion` and handle persist data
// according to it.
// If you can't be sure if the change in persistapi package
// requires a bump of CurPersistVersion or not, do it for peace!
// --@WeiZhang555
CurPersistVersion uint = 1
)