Compare commits

..

307 Commits

Author SHA1 Message Date
snir911
3704f2aadf Merge pull request #3398 from snir911/2.4.0-alpha1-branch-bump
# Kata Containers 2.4.0-alpha1
2022-01-06 11:24:29 +02:00
Snir Sheriber
117fc9c9e9 release: Kata Containers 2.4.0-alpha1
- kata-deploy: fix tar command in dockerfile
- vendor: update to containerd v1.6.0-beta.4
- versions: Upgrade to Cloud Hypervisor v20.2
- vc: remove swagger binary
- agent: Refactor command line parsing to use a framework
- move the oci and protocols crates from agent to upper libs
- docs: Remove word duplication
- osbuilder: Restore Debian as a rootfs
- runtime: fix a typo in kata-collect-data.sh
- agent: return detail error message for RPC calls from shim
- use-cases: clarify SPDK vhost-user-nvme target status in using-spdk-v…
- Delint dockerfiles
- Makefile: update `make go-test` call
- docs: add how-to on DinD in Kata
- agent: Ignore unknown seccomp system calls
- agent: mount: Remove unneeded mount_point local variable
- docs: Fix outdated links
- docs: Fix kernel configs README spelling errors
- security: Update rust crate versions
- kata-manager: Retrieve static tarball
- osbuilder: avoid to copy versions.txt which already deprecated
- qemu: Disable libudev for QEMU 5.2 and newer
- osbuilder: Add protoc to the alpine container
- docs: Clarify where to run agent API generation commands
- packaging/qemu: partial git clone
- docs: Fix arch doc formatting
- CI: Switch to a mirror as gnu.org is down
- Split architecture doc into separate files
- docs: Update the stable branch strategy
- tracing: Add span name to logging error
- docs: Update code PR advice document
- agent: Add config file option to cli
- update container type handling
- docs: Update architecture document
- runtime: update golang to 1.16 and remove ioutil package
- kata-deploy: Deal with empty containerd conf file
- src: reorg source code directory
- osbuilder: show usage if no options/arguments specified
- Upgrade to Cloud Hypervisor v20.1
- image_build: add help info for '-f' option and 'BLOCK_SIZE' env.
- osbuilder: be runtime consistent with podman build
- osbuilder: Revert to using apk.static for Alpine
- runtime/template: Handling new attributes for hypervisor config
- docs: fix check-markdown test
- runtime: correct span name for stopSandbox function
- runtime: only call stopVirtiofsd when shared_fs is virtio-fs
- snap: read initrd and image distros from version.yaml
- versions: Use Ubuntu initrd for non-musl archs
- packaging: Fix missing commit message in building kata-runtime
- virtcontainers: clh: Upgrade to openapi-generator v5.3.0
- agent: user container ID as watchable storage key for hashmap
- runtime: enable vhost-net for rootless hypervisor
- packaging: add help information for '-f' option in install_go.sh
- Cleanup some unused variables, definitions
- Upgrade to Cloud Hypervisor v20.0
- docs: Update limitation document regarding docker swarm
- runtime: Enable FUSE_DAX kernel config for DAX
- agent: copy empty directories for watchable-bind mounts
- runtime: Update comments for virtcontainers to use kata 2.0
- Update rust crate versions
- osbuilder: Remove debian as a rootfs

e2c1e65e kata-deploy: fix tar command in dockerfile
615224e9 agent: move the protocols to upper libs
330e3dcc agent: move the oci crate to upper libs
7b03d78f vendor: update to containerd v1.6.0-beta.4
1f581a04 versions: Upgrade to Cloud Hypervisor v20.2
623d8f08 docs: Remove word duplication
1c4edb96 agent: Refactor arg parsing to use clap
3093f93a osbuilder: Restore Debian as a rootfs
073a3459 use-cases: clarify vhost-user-nvme status in using-spdk-vhost-user
2254fa86 runtime: fix a typo in kata-collect-data.sh
2d0f9d2d vc: remove swagger binary
cf91307c agent: return detail error message for rpc calls from shim
137e217b docs: Fix outdated k8s link
55bac67a docs: Fix kernel configs README spelling errors
205420d2 docs: Replicate branch rename on runtime-spec
91abebf9 agent: mount: Remove unneeded mount_point local variable
b1f4e945 security: Update rust crate versions
d79268ac tools/packaging: add copyright to kata-monitor's Dockerfile
428cf0a6 packaging: delint tests dockerfiles
1ea9b703 packaging: delint kata-deploy dockerfiles
3669e1b6 ci/openshift-ci: delint dockerfiles
aeb2b673 osbuilder: delint dockerfiles
bc120289 packaging: delint kata-monitor dockerfiles
bc71dd58 packaging: delint static-build dockerfiles
99ef52a3 osbuilder: Add protoc to the alpine container
c2578cd9 docs: Clarify where to run agent API generation commands
321995b7 CI: Switch to a mirror as gnu.org is down
fb1989b2 docs: Fix arch doc formatting
2938bb7f packaging/qemu: Use QEMU script to update submodules
5d49ccd6 packaging/qemu: Use partial git clone
87a219a1 docs: Update the stable branch strategy
d1bc409d osbuilder: avoid to copy versions.txt which already deprecated
1653dd4a tracing: Add span name to logging error
12c8e41c qemu: Disable libudev for QEMU 5.2 and newer
233015a6 docs: Split guest assets details out of arch doc
db411c23 docs: Split k8s info out of arch doc
7ac619b2 docs: Split networking out of arch doc
5df0cb64 docs: Split storage out of arch doc
7229b7a6 docs: Split background and example out of arch doc
283d7d52 docs: Split history out of arch doc
6f9efb40 docs: Move arch doc to separate directory
02608e13 docs: Update code PR advice document
cb5c948a kata-manager: Retrieve static tarball
51bf9807 docs: Update architecture document
f3a97e94 docs: add how-to on Docker in Kata
7a989a83 runtime: api-test: fixup
52f79aef utils: update container type handling
5b002f3c docs: change io/ioutil to io/os packages
03546f75 runtime: change io/ioutil to io/os packages
24a530ce versions: bump minimum golang version to 1.16.10
7c4263b3 src: reorg source directories
1a34fbcd agent: Add config file option to cli
bbfb10e1 versions: Upgrade to Cloud Hypervisor v20.1
84571506 kata-deploy: Deal with empty containerd conf file
3f7cf7ae osbuilder: show usage if no options/arguments specified
2ebaaac7 osbuilder: be runtime consistent also with podman build
f3103696 docs: fix check-markdown test
2204ecac versions: Upgrade Alpine, using minor version
dfd0732f osbuilder: Revert to using apk.static for Alpine
6b3e4c21 image_build: add help info for '-f' option and 'BLOCK_SIZE' env.
b92babf9 runtime/template: Handling new attributes for hypervisor config
40bd34ca runtime: only call stopVirtiofsd when shared_fs is virtio-fs
33f343ee runtime: correct span name for stopSandbox function
d7cc952c versions: Use Ubuntu initrd for non-musl archs
ff929fc0 snap: read initrd and image distros from version.yaml
8fae2631 packaging: Fix missing commit message in building kata-runtime
99530026 virtcontainers: clh: Upgrade to openapi-generator v5.3.0
b3bcb7b2 runtime: enable vhost-net for rootless hypervisor
7cb7b9d5 agent: remove unused field in mount handling
f6ae1582 agent: drop unused fields from network
4756a04b virtcontainers: clh: Re-generate the client code
0bf4d257 versions: Upgrade to Cloud Hypervisor v20.0
647082b2 docs: Update limitation document regarding docker swarm
39b35d00 agent: user container ID as watchable storage key for hashmap
1e6f58e5 packaging: add help information for '-f' option in install_go.sh
2af95bc5 agent: create directories for watchable-bind mounts
6105e3ee runtime: enable FUSE_DAX kernel config for DAX
591d4af1 runtime: Update comments for virtcontainers to use kata 2.0
923e098d osbuilder: Remove debian as a rootfs
afb96c00 agent: Wrap remaining nix errors with anyhow
aba572e0 rustjail: Wrap remaining nix errors with anyhow
30d60078 uevent: Fix clippy issue in test code
4a2be13c agent: Upgrade nix version for security fix
256d5008 agent: Update crate versions
13257986 agent-ctl: Update rust lockfile
4ebdd424 forwarder: Update rust lockfile
6007322d agent: Fixed invalid error message
7b356151 agent: Log unknown seccomp system calls
7304e52a Makefile: update `make go-test` call
c66b5668 agent: Ignore unknown seccomp system calls

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-01-06 08:37:28 +02:00
Fabiano Fidêncio
f9b4d0b60e Merge pull request #3395 from snir911/fix_kata_deploy
kata-deploy: fix tar command in dockerfile
2022-01-05 23:42:26 +01:00
Eric Ernst
e073c0936b Merge pull request #3279 from egernst/containerd-vendor-bump
vendor: update to containerd v1.6.0-beta.4
2022-01-05 11:13:05 -08:00
Bo Chen
dca220ad4d Merge pull request #3384 from likebreath/0104/clh_v20.2
versions: Upgrade to Cloud Hypervisor v20.2
2022-01-05 10:51:55 -08:00
Snir Sheriber
e2c1e65e27 kata-deploy: fix tar command in dockerfile
tar params are passed wrongly

Fixes: #3394
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-01-05 20:07:52 +02:00
Bin Liu
94f14cf6f7 Merge pull request #3363 from zhsj/remove-binary
vc: remove swagger binary
2022-01-05 20:40:33 +08:00
Bin Liu
f622d9491f Merge pull request #3253 from stevenhorsman/agent-config-cmdline
agent: Refactor command line parsing to use a framework
2022-01-05 20:25:57 +08:00
Bin Liu
59ec112337 Merge pull request #3355 from lifupan/main
move the oci and protocols crates from agent to upper libs
2022-01-05 20:19:59 +08:00
Fupan Li
615224e993 agent: move the protocols to upper libs
move the protocols to upper libs thus it can
be shared between agent and other rust runtime.

Depends-on: github.com/kata-containers/tests#4306

Fixes: #3348

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2022-01-05 16:58:06 +08:00
Fupan Li
330e3dcc93 agent: move the oci crate to upper libs
Move the oci crate to upper libs thus it can be
shared between agent and other rust runtimes.

Fixes: #3348

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2022-01-05 16:58:06 +08:00
Bin Liu
3339ba90cf Merge pull request #3382 from GabyCT/topic/updateupgradingdoc
docs: Remove word duplication
2022-01-05 14:50:26 +08:00
Bin Liu
b2166560fa Merge pull request #3375 from zhaojizhuang/debianrootfs
osbuilder: Restore Debian as a rootfs
2022-01-05 10:27:47 +08:00
Eric Ernst
7b03d78f15 vendor: update to containerd v1.6.0-beta.4
Update our containerd vendoring. In particular, we're interested in
grabbing the updated annotation definitions for defining sandbox sizing.

- go get github.com/containerd/containerd@v1.6.0-beta.4
- edit go.mod to remove containerd v1.5.8 replacement directive
- go mod vendor
- go mod tidy

Fixes: #3276

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-01-04 17:15:17 -08:00
GabyCT
caa4e89dfc Merge pull request #3366 from Kvasscn/kata_dev_fix_kata-collect-data_typo
runtime: fix a typo in kata-collect-data.sh
2022-01-04 17:03:34 -06:00
Bo Chen
1f581a0405 versions: Upgrade to Cloud Hypervisor v20.2
This is a bug release from Cloud Hypervisor addressing the following
issues: 1) Don't error out when setting up the SIGWINCH handler (for
console resize) when this fails due to older kernel; 2) Seccomp rules
were refined to remove syscalls that are now unused; 3) Fix reboot on
older host kernels when SIGWINCH handler was not initialised; 4) Fix
virtio-vsock blocking issue.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v20.2

Fixes: #3383

Signed-off-by: Bo Chen <chen.bo@intel.com>
2022-01-04 14:37:35 -08:00
Gabriela Cervantes
623d8f086a docs: Remove word duplication
This PR removes a word duplication in the Upgrading documentation.

Fixes #3381

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-01-04 15:58:50 +00:00
James O. D. Hunt
a838a598ef Merge pull request #3354 from liubin/fix/3353-return-error-details
agent: return detail error message for RPC calls from shim
2022-01-04 14:06:25 +00:00
stevenhorsman
1c4edb9619 agent: Refactor arg parsing to use clap
Fixes: #3284

Co-authored-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
Co-authored-by: stevenhorsman <steven@uk.ibm.com>

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2022-01-04 09:14:08 +00:00
zhaojizhuang
3093f93a6f osbuilder: Restore Debian as a rootfs
Restore Debian as a rootfs.
1. revert of #3154, but some change
2. update debian version to 10.11
3. update  `libstdc++-6-dev` to `libstdc++-8-dev`
4.  changes discarded in QAT are not restored

Fixes: #3372
Signed-off-by: zhaojizhuang <571130360@qq.com>
2022-01-04 11:54:34 +08:00
Bin Liu
883b0d1dc3 Merge pull request #2840 from optimistyzy/1014_fix_vhost_nvme
use-cases: clarify SPDK vhost-user-nvme target status in using-spdk-v…
2022-01-04 11:42:15 +08:00
Ziye Yang
073a345908 use-cases: clarify vhost-user-nvme status in using-spdk-vhost-user
SPDK vhost-user-nvme target is removed from SPDK 21.07 release since
upstreamed QEMU version does not support. Fixes this usage.

Fixes #3371

Signed-off-by: Ziye Yang <ziye.yang@intel.com>
2021-12-31 02:24:59 +00:00
Wainer Moschetta
820dc930db Merge pull request #3109 from wainersm/delint_dockerfiles
Delint dockerfiles
2021-12-28 10:11:51 -03:00
zhanghj
2254fa8657 runtime: fix a typo in kata-collect-data.sh
Fix a typo while to check if mountpoint exist.

Fixes: #3365

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-12-28 10:03:18 +08:00
Shengjing Zhu
2d0f9d2d06 vc: remove swagger binary
Fixes: #3362

Signed-off-by: Shengjing Zhu <zhsj@debian.org>
2021-12-25 22:41:29 +08:00
bin
cf91307c66 agent: return detail error message for rpc calls from shim
For calls from shim to agent, the return error will be processed like this:

match self.do_start_container(req).await {
    Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
    Ok(_) => Ok(Empty::new()),
}

The e.to_string() return only a part of the error(for example set by context()),
this may lead lack of information.

The `format!("{:?}", err)` will return more info.

Fixes: #3353

Signed-off-by: bin <bin@hyper.sh>
2021-12-24 17:17:29 +08:00
Fupan Li
0fe20854e7 Merge pull request #2481 from Bevisy/main-1494
Makefile: update `make go-test` call
2021-12-24 09:57:06 +08:00
James O. D. Hunt
302c7c34f3 Merge pull request #3137 from t3hmrman/docs/2474-add-dind-how-to
docs: add how-to on DinD in Kata
2021-12-23 12:24:36 +00:00
James O. D. Hunt
ba22a04265 Merge pull request #2958 from ManaSugi/ignore-unknown-systemcall
agent: Ignore unknown seccomp system calls
2021-12-23 12:12:47 +00:00
Peng Tao
8b6fbf9108 Merge pull request #3331 from dubek/mount-remove-var
agent: mount: Remove unneeded mount_point local variable
2021-12-23 11:53:14 +08:00
Peng Tao
65343b3fdc Merge pull request #3337 from Jakob-Naucke/cgroups-main
docs: Fix outdated links
2021-12-23 11:40:32 +08:00
Peng Tao
08367643dc Merge pull request #3339 from Jakob-Naucke/spell-kernel-readme
docs: Fix kernel configs README spelling errors
2021-12-23 11:40:09 +08:00
Jakob Naucke
137e217b85 docs: Fix outdated k8s link
in virtcontainers readme

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-22 19:40:25 +01:00
Jakob Naucke
55bac67ac6 docs: Fix kernel configs README spelling errors
- `fragments` in backticks
- s/perfoms/performs/

Fixes: #3338
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-22 18:57:47 +01:00
Jakob Naucke
205420d21b docs: Replicate branch rename on runtime-spec
renamed branch `master` to `main`

Fixes: #3336
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-22 18:15:01 +01:00
Fabiano Fidêncio
562fc73769 Merge pull request #3297 from jodh-intel/cargo-audit-fixes
security: Update rust crate versions
2021-12-22 16:10:10 +01:00
Dov Murik
91abebf92e agent: mount: Remove unneeded mount_point local variable
We already have a `mount_path` local Path variable which holds the mount
point.

Use it instead of creating a new `mount_point` variable with identical
type and content.

Fixes: #3332

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
2021-12-22 14:11:50 +02:00
James O. D. Hunt
b1f4e945b3 security: Update rust crate versions
Update the rust dependencies that have upstream security fixes. Issues
fixed by this change:

- [`RUSTSEC-2020-0002`](https://rustsec.org/advisories/RUSTSEC-2020-0002) (`prost` crate)
- [`RUSTSEC-2020-0036`](https://rustsec.org/advisories/RUSTSEC-2020-0036) (`failure` crate)
- [`RUSTSEC-2021-0073`](https://rustsec.org/advisories/RUSTSEC-2021-0073) (`prost-types` crate)
- [`RUSTSEC-2021-0119`](https://rustsec.org/advisories/RUSTSEC-2021-0119) (`nix` crate)

This change also includes:

- Minor code changes for the new version of `prometheus` for the agent.

- A *downgrade* of the version of the `futures` crate to the (new)
  latest version (`0.3.17`) since version `0.3.18` was removed [1].

Fixes: #3296.

[1] - See https://crates.io/crates/futures/versions

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-22 07:41:16 +00:00
Fabiano Fidêncio
ee66155a72 Merge pull request #3271 from Jakob-Naucke/kata-manager-static
kata-manager: Retrieve static tarball
2021-12-21 16:09:50 +01:00
Fabiano Fidêncio
67f0ab4092 Merge pull request #3294 from Kvasscn/kata_dev_osbuilder_makefile
osbuilder: avoid to copy versions.txt which already deprecated
2021-12-21 16:07:01 +01:00
Wainer dos Santos Moschetta
d79268ac65 tools/packaging: add copyright to kata-monitor's Dockerfile
The kata-monitor's Dockerfile was added by Eric Ernst on commit 2f1cb7995f
but for some reason the static checker did not catch the file misses the copyright statement
at the time it was added. But it is now complaining about it. So this assign the copyright to
him to make the static-checker happy.

Fixes #3329
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 10:01:11 -05:00
Fabiano Fidêncio
79153c3845 Merge pull request #3288 from gkurz/qemu-disable-libudev
qemu: Disable libudev for QEMU 5.2 and newer
2021-12-21 15:56:16 +01:00
Wainer dos Santos Moschetta
428cf0a685 packaging: delint tests dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 09:54:44 -05:00
Wainer dos Santos Moschetta
1ea9b70383 packaging: delint kata-deploy dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 09:54:44 -05:00
Wainer dos Santos Moschetta
3669e1b6d9 ci/openshift-ci: delint dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 09:54:44 -05:00
Wainer dos Santos Moschetta
aeb2b673b3 osbuilder: delint dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 09:54:44 -05:00
Wainer dos Santos Moschetta
bc120289ec packaging: delint kata-monitor dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 09:54:44 -05:00
Wainer dos Santos Moschetta
bc71dd5812 packaging: delint static-build dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-21 09:54:41 -05:00
Fabiano Fidêncio
aa7ba1741b Merge pull request #3324 from fidencio/wip/add-protoc-to-alpine-image
osbuilder: Add protoc to the alpine container
2021-12-21 15:52:25 +01:00
Fabiano Fidêncio
99ef52a35d osbuilder: Add protoc to the alpine container
It seems the lack of protoc in the alpine containers is causing issues
with some of our CIs, such as the VFIO one.

Fixes: #3323

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-12-21 13:57:18 +01:00
Archana Shinde
ae271a7e7b Merge pull request #3318 from jodh-intel/docs-agent-protoc
docs: Clarify where to run agent API generation commands
2021-12-21 00:28:01 -08:00
Peng Tao
b990868b11 Merge pull request #3302 from wainersm/static_qemu-partial_clone
packaging/qemu: partial git clone
2021-12-21 10:52:49 +08:00
James O. D. Hunt
c2578cd9a1 docs: Clarify where to run agent API generation commands
Make it clear when reading the table in the agent's "Change the agent
API" documentation that the commands in the "Generation method" column
should be run in the agent repo.

Fixes: #3317.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-20 15:45:36 +00:00
James O. D. Hunt
464d1a653e Merge pull request #3312 from jodh-intel/docs-arch-fix-formatting
docs: Fix arch doc formatting
2021-12-20 14:04:36 +00:00
James O. D. Hunt
cd20bf95e9 Merge pull request #3315 from jodh-intel/ci-use-mirror-for-gnu.org
CI: Switch to a mirror as gnu.org is down
2021-12-20 11:53:14 +00:00
James O. D. Hunt
321995b7df CI: Switch to a mirror as gnu.org is down
All CI jobs are failing as www.gnu.org is down, so switch to a mirror
for the time being.

Fixes: #3314.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-20 11:22:56 +00:00
James O. D. Hunt
fb1989b27a docs: Fix arch doc formatting
PR #3298 failed to move the named link for the debug console to the
`guest-assets.md` meaning the debug console cells in the "User
accessible" column in the table in the "Root filesystem image" section
do not work as a link.

Fixes: #3311.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-20 10:33:48 +00:00
James O. D. Hunt
2ebae2d279 Merge pull request #3287 from jodh-intel/docs-split-arch-doc
Split architecture doc into separate files
2021-12-20 10:11:30 +00:00
Julio Montes
e329dcf2ff Merge pull request #3299 from fidencio/wip/update-stable-branch-strategy
docs: Update the stable branch strategy
2021-12-17 13:29:10 -06:00
Chelsea Mafrica
e4c0b71e40 Merge pull request #3290 from cmaf/tracing-span-logging-error
tracing: Add span name to logging error
2021-12-17 11:13:41 -08:00
Jakob Naucke
7fdb425918 Merge pull request #3286 from zmlcc/pr-advice-expect-211216
docs: Update code PR advice document
2021-12-17 15:35:05 +01:00
Wainer dos Santos Moschetta
2938bb7f89 packaging/qemu: Use QEMU script to update submodules
Currently QEMU's submodules are git cloned but there is the scripts/git-submodule.sh
which is meant for that. Let's use that script.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-17 10:20:59 -03:00
Wainer dos Santos Moschetta
5d49ccd613 packaging/qemu: Use partial git clone
The static build of QEMU takes a good amount of time on cloning the
source tree because we do a full git clone. In order to speed up that
operation this changed the Dockerfile so that it is carried out a
partial clone by using --depth=1 argument.

Fixes #3291
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-12-17 10:20:29 -03:00
Fabiano Fidêncio
87a219a1c9 docs: Update the stable branch strategy
On the last architecture committee meeting, the one held on December
14th 2021, we reached the agreement that minor releases will be cut once
every 16 weeks (instead of 12), and that patch releases will be cut
every 4 weeks (instead of 3)

Fixes: #3298

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-12-17 13:48:26 +01:00
zhanghj
d1bc409d57 osbuilder: avoid to copy versions.txt which already deprecated
Currently the versions.txt in rootfs-builder dir is already removed,
so avoid to copy it in list of helper files.

Fixes: #3267

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-12-17 17:23:05 +08:00
Chelsea Mafrica
1653dd4a30 tracing: Add span name to logging error
Add span name to logging error to help with debugging when the context
is not set before the span is created.

Fixes #3289

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-12-16 12:44:42 -08:00
Greg Kurz
12c8e41c75 qemu: Disable libudev for QEMU 5.2 and newer
Commit 112ea25859 disabled libudev for static builds because it was
breaking snap. It turns out that the only users of libudev in QEMU are
qemu-pr-helper and USB. Kata already disables USB and doesn't use
qemu-pr-helper. Disable libudev for all builds if QEMU supports it, i.e.
version 5.2 or newer.

Fixes #3078

Signed-off-by: Greg Kurz <groug@kaod.org>
2021-12-16 16:12:02 +01:00
James O. D. Hunt
233015a6d9 docs: Split guest assets details out of arch doc
Move the guest assets details out of the architecture doc and into a
separate file.

Fixes: #3246.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 14:18:49 +00:00
James O. D. Hunt
db411c23e8 docs: Split k8s info out of arch doc
Move the Kubernetes information out of the architecture doc and into a
separate file.

Partially fixes: #3246.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 14:18:47 +00:00
James O. D. Hunt
7ac619b24e docs: Split networking out of arch doc
Move the networking details out of the architecture doc and into a
separate file.

Partially fixes: #3246.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 14:18:45 +00:00
James O. D. Hunt
5df0cb6420 docs: Split storage out of arch doc
Move the storage details in the architecture doc to a separate file.

Partially fixes: #3246.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 14:18:41 +00:00
James O. D. Hunt
7229b7a69d docs: Split background and example out of arch doc
Move the background and example command details out of the architecture
doc and into separate files.

Partially fixes: #3246.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 14:18:38 +00:00
James O. D. Hunt
283d7d52c8 docs: Split history out of arch doc
Move the historical details out of the architecture doc
and into a separate file.

Partially fixes: #3246.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 14:17:59 +00:00
James O. D. Hunt
6f9efb4043 docs: Move arch doc to separate directory
Move the architecture document into a new `docs/design/architecture/` directory
in preparation for splitting it into more manageable pieces.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-16 12:26:17 +00:00
Zack
02608e13ab docs: Update code PR advice document
Allow using `expect()` for `Mutex.lock()` because it is almost
unrecoverable if failed in the lock acquisition

Fixes: #3285

Signed-off-by: Zack <zmlcc@linux.alibaba.com>
2021-12-16 19:23:17 +08:00
Steve Horsman
39cf2b27c1 Merge pull request #3261 from stevenhorsman/native-agent-config-opt
agent: Add config file option to cli
2021-12-16 10:00:56 +00:00
Eric Ernst
3865a1bcf6 Merge pull request #2918 from egernst/update-container-type-handling
update container type handling
2021-12-15 10:41:23 -08:00
Eric Ernst
32d62c85c2 Merge pull request #3195 from jodh-intel/docs-update-architecture
docs: Update architecture document
2021-12-15 09:25:20 -08:00
Jakob Naucke
cb5c948a0a kata-manager: Retrieve static tarball
In `utils/kata-manager.sh`, we download the first asset listed for the
release, which used to be the static x86_64 tarball. If that happened to
not match the system architecture, we would abort. Besides that logic
being invalid for !x86_64 (despite not distributing other tarballs at
the moment), the first asset listed is also not the static tarball any
more, it is the vendored source tarball. Retrieve all _static_ tarballs
and select the appropriate one depending on architecture.

Fixes: #3254
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-15 14:34:14 +01:00
James O. D. Hunt
51bf98073d docs: Update architecture document
Refresh the content and formatting of the architecture document.

Out of scope of these changes:

- Diagram updates.
- Updates to the Networking section.

Fixes: #3190.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-12-15 10:46:46 +00:00
Jakob Naucke
a40e4877e9 Merge pull request #3266 from liubin/fix/3265-update-golang-to-1.16-and-remove-ioutil
runtime: update golang to 1.16 and remove ioutil package
2021-12-15 10:09:23 +01:00
vados
f3a97e94b2 docs: add how-to on Docker in Kata
Add documentation on how to use Docker in Docker

Fixes: #2474

Signed-off-by: vados <vados@vadosware.io>
2021-12-15 12:43:58 +09:00
Eric Ernst
7a989a8333 runtime: api-test: fixup
not clear why this was commented out before -- ensure that we set
approprate annotation on the sandbox container's annotations to indicate
this is a sandbox.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-12-14 18:55:18 -08:00
Eric Ernst
52f79aef91 utils: update container type handling
Today we assume that if the CRI/upper layer doesn't provide a container
type annotation, it should be treated as a sandbox. Up to this point, a
sandbox with a pause container in CRI context and a single container
(ala ctr run) are treated the same.

For VM sizing and container constraining, it'll be useful to know if
this is a sandbox or if this is a single container.

In updating this, we cleanup the type handling tests and we update the
containerd annotations vendoring.

Fixes: #2926

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-12-14 17:59:19 -08:00
bin
5b002f3c88 docs: change io/ioutil to io/os packages
Change io/ioutil to io/os packages because io/ioutil package
is deprecated from 1.16:

TempDir => os.MkdirTemp

Details: https://go.dev/doc/go1.16#ioutil

Fixes: #3265

Signed-off-by: bin <bin@hyper.sh>
2021-12-15 07:31:57 +08:00
bin
03546f75a6 runtime: change io/ioutil to io/os packages
Change io/ioutil to io/os packages because io/ioutil package
is deprecated from 1.16:

Discard => io.Discard
NopCloser => io.NopCloser
ReadAll => io.ReadAll
ReadDir => os.ReadDir
ReadFile => os.ReadFile
TempDir => os.MkdirTemp
TempFile => os.CreateTemp
WriteFile => os.WriteFile

Details: https://go.dev/doc/go1.16#ioutil

Fixes: #3265

Signed-off-by: bin <bin@hyper.sh>
2021-12-15 07:31:48 +08:00
Jakob Naucke
70274b9d39 Merge pull request #3258 from fidencio/wip/kata-deploy-count-with-a-non-existend-containerd-config-file
kata-deploy: Deal with empty containerd conf file
2021-12-14 20:14:41 +01:00
Bin Liu
6c34446f49 Merge pull request #3244 from bergwolf/reorg-code
src: reorg source code directory
2021-12-14 21:57:07 +08:00
bin
24a530ced1 versions: bump minimum golang version to 1.16.10
According to https://endoflife.date/go golang 1.11.10 is not supported
anymore, 1.16.10 is the minimum supported version.

Fixes: #3265

Signed-off-by: bin <bin@hyper.sh>
2021-12-14 17:03:53 +08:00
Tim Zhang
4f96ea4e2b Merge pull request #3257 from liubin/fix/3256-show-usage-if-no-arguments-specified
osbuilder: show usage if no options/arguments specified
2021-12-14 11:41:06 +08:00
Peng Tao
7c4263b3e1 src: reorg source directories
To make the code directory structure more clear:

└── src
    ├── agent
    ├── libs
    │   └── logging
    ├── runtime
    ├── runtime-rs (to be added)
    └── tools
        ├── agent-ctl
        └── trace-forwarder

Fixes: #3204
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-12-14 10:30:08 +08:00
stevenhorsman
1a34fbcdbd agent: Add config file option to cli
- Add option to pass in config with -c/--config

Fixes: #3252

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2021-12-13 21:57:23 +00:00
Bo Chen
9d13d1b208 Merge pull request #3263 from likebreath/1213/clh_v20.1
Upgrade to Cloud Hypervisor v20.1
2021-12-13 12:51:27 -08:00
Bo Chen
bbfb10e169 versions: Upgrade to Cloud Hypervisor v20.1
This is a bug release from Cloud Hypervisor addressing the following
issues: 1) Networking performance regression with virtio-net; 2) Limit
file descriptors sent in vfio-user support; 3) Fully advertise PCI MMIO
config regions in ACPI tables; 4) Set the TSS and KVM identity maps so
they don't overlap with firmware RAM; 5) Correctly update the DeviceTree
on restore.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v20.1

Fixes: #3262

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-12-13 10:09:44 -08:00
Fabiano Fidêncio
8457150684 kata-deploy: Deal with empty containerd conf file
As containerd can properly run without having a existent
`/etc/containerd/config.toml` file (it'd run using the default
cobnfiguration), let's explicitly create the file in those cases.

This will avoid issues on ammending runtime classes to a non-existent
file.

Fixes: #3229

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
Tested-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-13 11:20:22 +01:00
bin
3f7cf7ae67 osbuilder: show usage if no options/arguments specified
Now if no options/arguments specified, the shell scripts will return an error:

ERROR: Invalid rootfs directory: ''

This commit will show usage if no options/arguments specified.

Fixes: #3256

Signed-off-by: bin <bin@hyper.sh>
2021-12-13 16:10:55 +08:00
Bin Liu
978b13c9e8 Merge pull request #3235 from Kvasscn/kata_dev_image_builer_help
image_build: add help info for '-f' option and 'BLOCK_SIZE' env.
2021-12-09 22:55:24 +08:00
Julio Montes
70062e1563 Merge pull request #3238 from snir911/wip/build_with_runtime
osbuilder: be runtime consistent with podman build
2021-12-09 08:06:00 -06:00
Fabiano Fidêncio
c868172510 Merge pull request #3222 from Jakob-Naucke/apk-static
osbuilder: Revert to using apk.static for Alpine
2021-12-09 13:33:35 +01:00
Fabiano Fidêncio
602d87295b Merge pull request #3226 from liubin/fix/3193-fill-hypervisorconfig
runtime/template: Handling new attributes for hypervisor config
2021-12-09 13:29:23 +01:00
Snir Sheriber
2ebaaac73d osbuilder: be runtime consistent also with podman build
Use the same runtime used for podman run also for the podman build cmd
Additionally remove "docker" from the docker_run_args variable

Fixes: #3239
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-12-09 11:28:16 +02:00
Fabiano Fidêncio
251be90dc0 Merge pull request #3241 from devimc/2021-12-06/fixCheckMarkdown
docs: fix check-markdown test
2021-12-09 08:16:57 +01:00
Julio Montes
f310369698 docs: fix check-markdown test
Unit-Test-Advice.md was moved to kata-containers repo but URLs pointing
to that document were not updated. This patch updates these URLs.

Depends-on: github.com/kata-containers/tests#4273

fixes #3240

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-12-08 14:38:12 -06:00
Jakob Naucke
2204ecac39 versions: Upgrade Alpine, using minor version
- Upgrade Alpine guest rootfs to 3.15
- Specify a minor version rather than patch level as the Alpine
  repositories use that.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-08 15:18:44 +01:00
Jakob Naucke
dfd0732ff9 osbuilder: Revert to using apk.static for Alpine
#2399 partially reverted #418, missing on returning to bootstrapping a
rootfs with `apk.static` instead of copying the entire root, which can
result in drastically larger (more than 10x) images. Revert this as well
(requires some updates to URL building).

Fixes: #3216
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-08 15:18:43 +01:00
zhanghj
6b3e4c212c image_build: add help info for '-f' option and 'BLOCK_SIZE' env.
The help information of '-f' option is missing, and same issue
with 'BLOCK_SIZE' env variables, fix it in usage() function.

Fixes: #3231

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-12-08 17:33:07 +08:00
Chelsea Mafrica
7522109abc Merge pull request #3218 from liubin/fix/3217-fix-span-name
runtime: correct span name for stopSandbox function
2021-12-07 16:36:14 -08:00
Julio Montes
712c5ac6ba Merge pull request #3220 from liubin/fix/3219-stop-virtiofsd-when-needed
runtime: only call stopVirtiofsd when shared_fs is virtio-fs
2021-12-07 07:51:08 -06:00
bin
b92babf91b runtime/template: Handling new attributes for hypervisor config
Some new attributes are added to hypervisor config:
- VMStorePath
- RunStorePath
- SharedPath

These attributes should be handled in two places:

- reset when check the new hypervisor's config is suitable
  to the base config.
- copy from new hypervisor's config when create new VM

Fixes: #3193

Signed-off-by: bin <bin@hyper.sh>
2021-12-07 19:31:03 +08:00
Fabiano Fidêncio
1a7fcd0583 Merge pull request #3211 from devimc/2021-11-06/snap/readVerFromYaml
snap: read initrd and image distros from version.yaml
2021-12-07 09:07:10 +01:00
bin
40bd34caaf runtime: only call stopVirtiofsd when shared_fs is virtio-fs
If shared_fs is set to virtio-9p, the virtiofsd is not started,
so there is no need to stop it.

Fixes: #3219

Signed-off-by: bin <bin@hyper.sh>
2021-12-07 16:06:26 +08:00
bin
33f343ee08 runtime: correct span name for stopSandbox function
Normally the span name should be the same as function
name, so chagne `StopVM` to `stopSandbox`.

Fixes: #3217

Signed-off-by: bin <bin@hyper.sh>
2021-12-07 15:59:18 +08:00
Fabiano Fidêncio
e091409404 Merge pull request #3213 from Jakob-Naucke/ppc64le-s390x-ubuntu-initrd
versions: Use Ubuntu initrd for non-musl archs
2021-12-06 22:52:53 +01:00
Jakob Naucke
d7cc952cb1 versions: Use Ubuntu initrd for non-musl archs
ppc64le & s390x have no (well supported) musl target for Rust,
therefore, the agent must use glibc and cannot use Alpine. Specify
Ubuntu as the distribution to be used for initrd.

Fixes: #3212
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-06 17:13:38 +01:00
Julio Montes
ff929fc081 snap: read initrd and image distros from version.yaml
Build initrd or image rootfs using the distro name specified
in the versions.yaml

fixes #3208

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-12-06 08:42:07 -06:00
Bin Liu
ce75785d87 Merge pull request #3197 from Bevisy/main-3196
packaging: Fix missing commit message in building kata-runtime
2021-12-06 11:37:29 +08:00
Binbin Zhang
8fae263170 packaging: Fix missing commit message in building kata-runtime
add `git` package to the shim-v2 build image

Fixes: #3196

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-12-04 11:59:59 +08:00
Eric Ernst
c14080fd08 Merge pull request #3200 from likebreath/1203/upgrade_openapi_generator
virtcontainers: clh: Upgrade to openapi-generator v5.3.0
2021-12-03 14:15:51 -08:00
Bo Chen
995300260e virtcontainers: clh: Upgrade to openapi-generator v5.3.0
The latest release of openapi-generator v5.3.0 contains the fix for
`dropping err` bug [1]. This patch also re-generated the client code of
Cloud Hypervisor to have the bug fixed.

[1] https://github.com/OpenAPITools/openapi-generator/pull/10275

Fixes: #3201

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-12-03 08:55:38 -08:00
Carlos Venegas
d02a0932d6 Merge pull request #3173 from liubin/fix/3172
agent: user container ID as watchable storage key for hashmap
2021-12-03 09:35:32 -06:00
Fabiano Fidêncio
3fdc97e110 Merge pull request #3183 from fengwang666/nonroot-vhost-bug-fix
runtime: enable vhost-net for rootless hypervisor
2021-12-03 10:42:50 +01:00
Bin Liu
86d9d2eed5 Merge pull request #3169 from Kvasscn/kata_dev_add_install_go_help
packaging: add help information for '-f' option in install_go.sh
2021-12-03 14:39:05 +08:00
Feng Wang
b3bcb7b251 runtime: enable vhost-net for rootless hypervisor
vhost-net is disabled in the rootless kata runtime feature, which has been abandoned since kata 2.0.
I reused the rootless flag for nonroot hypervisor and would like to enable vhost-net.

Fixes #3182

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-12-02 21:55:31 -08:00
Bin Liu
4b57548838 Merge pull request #3181 from egernst/topic/clean-lint
Cleanup some unused variables, definitions
2021-12-03 11:06:42 +08:00
Eric Ernst
7cb7b9d5ba agent: remove unused field in mount handling
In our parsing of mountinfo, majority of the fields are unused.
Let's stop saving these.

Fixes: #3180

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-12-02 17:03:46 -08:00
Eric Ernst
f6ae15826e agent: drop unused fields from network
We don't utilize routes or inteface vectors. Let's drop them.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-12-02 17:03:41 -08:00
Chelsea Mafrica
cb4bf486ef Merge pull request #3179 from likebreath/1202/clh_v20.0
Upgrade to Cloud Hypervisor v20.0
2021-12-02 15:31:14 -08:00
Bo Chen
4756a04b2d virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor v19.0.
Note: The client code of cloud-hypervisor's (CLH) OpenAPI is
automatically generated by openapi-generator [1-2].

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-12-02 12:09:12 -08:00
Bo Chen
0bf4d2578a versions: Upgrade to Cloud Hypervisor v20.0
Highlights from the Cloud Hypervisor release v20.0: 1) Multiple PCI
segments support (now support up to 496 PCI devices); 2) CPU pinning; 3)
Improved VFIO support; 4) Safer code; 5) Extended documentation; 6) Bug
fixes.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v20.0

Fixes: #3178

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-12-02 12:09:05 -08:00
GabyCT
6edddcced9 Merge pull request #3175 from GabyCT/topic/limitations
docs: Update limitation document regarding docker swarm
2021-12-02 12:03:36 -06:00
Gabriela Cervantes
647082b2c8 docs: Update limitation document regarding docker swarm
This PR removes the information about docker swarm and docker compose
as currently for kata 2.0 we have not support for docker swarm and docker
compose and the links and references that the document is referring are
currently not part of kata 1.0

Fixes #3174

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-12-02 16:38:13 +00:00
bin
39b35d0073 agent: user container ID as watchable storage key for hashmap
Use sandbox ID as the key will cause the failed containers' storage
leak.

Fixes: #3172

Signed-off-by: bin <bin@hyper.sh>
2021-12-02 23:28:25 +08:00
Bin Liu
4895015eac Merge pull request #3166 from fengwang666/dax-bug-fix
runtime: Enable FUSE_DAX kernel config for DAX
2021-12-02 16:08:06 +08:00
zhanghj
1e6f58e562 packaging: add help information for '-f' option in install_go.sh
add help info for force install, and remove unused '-p' option.

Fixes: #3168

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-12-02 02:58:12 -05:00
Bin Liu
3992d28f00 Merge pull request #3152 from liubin/fix/3140-create-empty-dir
agent: copy empty directories for watchable-bind mounts
2021-12-02 14:46:25 +08:00
bin
2af95bc536 agent: create directories for watchable-bind mounts
In function `update_target`, if the updated source is a directory,
we should create the corresponding directory.

Fixes: #3140

Signed-off-by: bin <bin@hyper.sh>
2021-12-02 06:31:03 +08:00
Feng Wang
6105e3ee85 runtime: enable FUSE_DAX kernel config for DAX
Otherwise DAX device cannot be set up.

Fixes #3165

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-12-01 13:38:57 -08:00
GabyCT
45854147d0 Merge pull request #3164 from GabyCT/topic/fixconfigtoml
runtime: Update comments for virtcontainers to use kata 2.0
2021-12-01 12:19:26 -06:00
Gabriela Cervantes
591d4af1ea runtime: Update comments for virtcontainers to use kata 2.0
This PR updates the comments in the configuration.toml to point to
the current kata containers repository instead of the kata 1.x.

Fixes #3163

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-12-01 16:16:46 +00:00
Fupan Li
87f350db53 Merge pull request #3125 from jodh-intel/update-rust-crate-versions
Update rust crate versions
2021-12-01 18:00:33 +08:00
James O. D. Hunt
bc7fde2096 Merge pull request #3154 from GabyCT/topic/removedebian
osbuilder: Remove debian as a rootfs
2021-12-01 09:29:02 +00:00
Gabriela Cervantes
923e098db6 osbuilder: Remove debian as a rootfs
Currently we do not have debian as part of the kata CI as we
do not have a mantainer, this PR removes debian as a supported
rootfs in order to have only the distros that we are supporting
and mantainining.

Fixes #3153

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-11-30 19:31:33 +00:00
James O. D. Hunt
afb96c0044 agent: Wrap remaining nix errors with anyhow
Wrap `nix` `Error`'s in an `anyhow` error for consistency with the way
`rustjail` handles errors.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 13:26:15 +00:00
James O. D. Hunt
aba572e01d rustjail: Wrap remaining nix errors with anyhow
Replace `Result` values that use a "bare" `nix` `Error` like this:

```rust
return Err(nix::Error::EINVAL.into());
```

... to the following which wraps the nix` error in an `anyhow` call for
consistency with the other errors returned by `rustjail`:

```rust
return Err(anyhow!(nix::Error::EINVAL));
```

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 13:24:04 +00:00
James O. D. Hunt
30d6007893 uevent: Fix clippy issue in test code
Remove a bare `return` from a test function. This looks wrong but isn't
because the callers are all tests that just wait for a state change
caused by this test function.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 12:58:15 +00:00
James O. D. Hunt
4a2be13c60 agent: Upgrade nix version for security fix
Running `cargo audit` showed that the `nix` package for the agent and
the `rustjail` and `vsock-exporter` local crates need to be updated to
resolve rust security issue
[RUSTSEC-2021-0119](https://rustsec.org/advisories/RUSTSEC-2021-0119).
Hence, bumped `nix` to the latest version (which required changes to
work with the new, simpler `errno` handling).

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 12:58:15 +00:00
James O. D. Hunt
256d5008dc agent: Update crate versions
Run `cargo update` to update to the latest crate dependency versions.

The agent is an application so this includes expanding the partially
specified semvers to full semver values for the following crates,
which makes those crates consistent with the other agent dependencies:

- `futures`
- `regex`
- `scan_fmt`
- `tokio`

Fixes: #3124.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 12:58:15 +00:00
James O. D. Hunt
13257986ae agent-ctl: Update rust lockfile
Ran `cargo update` to bump crate versions.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 12:58:15 +00:00
James O. D. Hunt
4ebdd424de forwarder: Update rust lockfile
Ran `cargo update` to bump crate versions.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 12:58:15 +00:00
James O. D. Hunt
6007322daa agent: Fixed invalid error message
Remove the format specifier in the `"failed to get VFIO group"` error
returned by `vfio_device_handler()`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-30 12:58:15 +00:00
Fabiano Fidêncio
3e3e3a0253 Merge pull request #3149 from fidencio/2.4.0-alpha0-branch-bump
# Kata Containers 2.4.0-alpha0
2021-11-29 20:24:19 +01:00
Fabiano Fidêncio
72b8144b56 release: Kata Containers 2.4.0-alpha0
- osbuilder: fix missing cpio package when building rootfs-initrd image
- osbuilder: add coreutils to guest rootfs
- workflows: only allow org members to run `/test_kata_deploy`
- agent: use temp directory for test containers
- tools/osbuilder: build QAT kernel in fedora 34
- agent: refactor find_process function and add test cases
- Hypervisor cleanup, refactoring
- agent: clear cargo test warnings
- docs: Add a code PR advice document
- tools: Automatically revert kata-deploy changes
- runtime: delete netmon
- agent: Remove some unwrap and expect calls
- agent: fixed the `make optimize` bug
- docs: make kata-deploy more visible
- workflows: Add back the checks for running test-kata-deploy
- kata-deploy: Ensure we test HEAD with `/test_kata_deploy`
- docs: update using-SPDK-vhostuser-and-kata.md
- Update k8s SR-IOV plugin environment variables to work properly with Kata
- watchers: don't dereference symlinks when copying files
- kata-deploy: Add back stable & latest tags
- agent: fix the issue of missing create a new session for container
- runtime: Update containerd to 1.5.8
- qemu: fix snap build on ppc64le
- virtcontainers: fix failing template test on ppc64le
- agent: Update README
- Remove cruft, do some simple non-functional cleanup in the runtime
- macvlan: drop bridged part of name
- clh: Fix race condition that prevent start pods
- Update CRI-O documentation
- cgroups: Fix systemd cgroup support
- runtime: merge virtcontainers/pkg/types into virtcontainers/types
- workflows: Remove non-used main.yaml
- agent/src: improve unit test coverage for src/namespace.rs
- doc: update kata metrics documentation
- runtime: delete not used codes
- versions: bump golang to 1.17.x
- release: Use ${GOPATH}/bin/yq for upload-libseccomp-tarball action
- agent-ctl: Allow API specification in JSON format
- virtcontainers: Lint protection types
- agent: check environment variables if empty or invalid
- runtime: Revert "runtime: use containerd package instead of cri-containerd"
- rustjail: Fix created time of container
- agent: Remove dynamic tracing APIs
- kernel: add VFIO kernel dependencies for ppc64le
- logging: Always run crate tests

8ee67aae osbuilder: fix missing cpio package when building rootfs-initrd image
f59d3ff6 osbuilder: add coreutils to guest rootfs
5e7c1a29 workflows: only allow org members to run `/test_kata_deploy`
857501d8 tools/osbuilder: build QAT kernel in fedora 34
a32e02a1 agent: use temp directory as root of test containers
f0734f52 docs: Remove extraneous whitespace
aff32756 docs: Add a code PR advice document
d41c375c docs: Add more advice to the UT advice doc
baf4f76d docs: More detail on running tests as different users
fcf45b0c docs: Use more idiomatic rust string check
9fed7d0b docs: Mention anyhow for error handling in UT doc
318b3f18 docs: No present continuous in UT advice doc
e8bb6b26 docs: Correct repo name usage
c1111a1d docs: Use leading caps for lang names in UT advice doc
597b239e docs: Remove TOC in UT advice doc
cf360fad docs: Move unit test advice doc from tests repo
bc955814 docs: Move doc requirements section higher
6a0b7165 agent: refactor find_process function and add test cases
5ba2f52c tools: Quote functions arguments in the update repos script
5dbd752f tools: Remove the check for the VERSION file
85eb743f tools: Make hub usage slightly less fragile
76540dbd tools: Automatically revert kata-deploy changes
36d73c96 tools: Do the kata-deploy changes on its own commit
c8e22daf tools: Use vars for the registry in the update repo script
ac958a30 tools: Use vars for the yaml files used in the update repo script
edca8292 tools: Rewrite the logic around kata-deploy changes
31f6c2c2 tools: Update comments about the kata-deploy yaml changes
75bb3401 shimv2/service: fix defer funtions never run with os.Exit()
bd3217da agent: Remove redundant returns
adab6434 agent: Remove some unwrap and expect calls
351cef7b agent: Remove unwrap from verify_cid()
a7d1c70c agent: Improve baremount
09abcd4d agent-ctl: Remove some unwrap and expect calls
35db75ba agent-ctl: Remove redundant returns
46e45958 agent-ctl: Simplify main
c7349d0b agent-ctl: Simplify error handling
ddc68131 runtime: delete netmon
705687dc docs: Add kata-deploy as part of the install docs
acece849 docs: Use the default notation for "Note" on install README
143fb278 kata-deploy: Use the default notation for "Note"
45d76407 kata-deploy: Don't mention arch specific binaries in the README
0c6c0735 agent: fixed the `make optimize` bug
a7c08aa4 workflows: Add back the checks for running test-kata-deploy
ce0693d6 agent: clear cargo test warnings
ce92cadc vc: hypervisor: remove setSandbox
2227c46c vc: hypervisor: use our own logger
4c2883f7 vc: hypervisor: remove dependency on persist API
34f23de5 vc: hypervisor: Remove need to get shared address from sandbox
c28e5a78 acrn: remove dependency on sandbox, persistapi datatypes
a0e0e186 hypervisors: introduce pkg to unbreak vc/persist dependency
b5dfcf26 watcher: tests: ensure there is 20ms delay between fs writes
78dff468 agent/device: Adjust PCIDEVICE_* container environment variables for VM
4530e7df agent/device: Use simpler structure in update_spec_devices()
b6062278 agent/device: Correct misleading comment on test case
89ff7000 agent/device: Remove unnecessary check for empty container_path
c855a312 agent/device: Make DevIndex local to update_spec_devices()
084538d3 agent/device: Change update_spec_device to handle multiple devices at once
d6a3ebc4 agent/device: Obtain guest major/minor numbers when creating DevNumUpdate
f4982130 agent/device: Check for conflicting device updates
f10e8c81 agent/device: Batch changes to the OCI specification
46a4020e agent/device: Types to represent update for a device in the OCI spec
e7beed54 agent/device: Remove unneeded clone() from several device handlers
2029eeeb agent/device: Improve update_spec_device() final_path handling
57541315 agent/device: Correct misleading parameter name in update_spec_device()
0c51da3d agent/device: Correct misleading error message in update_spec_device()
94b7936f agent/device: Use nix::sys::stat::{major,minor} instead of libc::*
296e76f8 watchers: handle symlinked directories, dir removal
2b6dfe41 watchers: don't dereference symlinks when copying files
3c9ae7fb kata-deploy: Ensure we test HEAD with `/test_kata_deploy`
0380b9bd runtime: Update containerd to 1.5.8
112ea258 qemu: fix snap build by disabling libudev
d5a18173 virtcontainers: fix failing template test on ppc64le
6955d144 kata-deploy: Add back stable & latest tags
bbaf57ad agent: fix the issue of missing create a new session for container
46fd5069 docs: update using-SPDK-vhostuser-and-kata.md
7e6f2b8d vc-utils: don't export unused function
860f3088 virtcontainers: move oci, uuid packages top level
8acb3a32 virtcontainers: remove unused package nsenter
4788cb82 vc-network: remove unused functions
b6ebddd7 oci: remove unused function GetContainerType
599bc0c2 agent: Update README
1e7cb4bc macvlan: drop bridged part of name
55412044 monitor: Fix monitor race condition doing hypervisor.check()
eb11d053 cri-o: Update deployment documentation
92e3a140 cri-o: Update links for the CRI-O github page
0a19340a cri-o: Remove outdated documentation
a3b3c85e workflows: Remove non-used main.yaml
09f7962f runtime: merge virtcontainers/pkg/types into virtcontainers/types
6acedc25 runtime: delete not used codes
395638c4 versions: bump golang to 1.17.x
570915a8 docs: update kata 2.0 metrics documentation
bcf181b7 cgroups: Fix systemd cgroup support
34307235 release: Use ${GOPATH}/bin/yq for upload-libseccomp-tarball action
6339fdd1 docs: update kata metrics architecture image
57bb7ffa agent: check environment variables if empty or invalid
8ab90e10 agent-ctl: Allow API specification in JSON format
eacfcdec runtime: Revert "runtime: use containerd package instead of cri-containerd"
e7856ff1 rustjail: Fix created time of container
b7b89905 virtcontainers: Lint protection types
7566b736 kernel: add VFIO kernel dependencies for ppc64le
87f67606 agent: Remove dynamic tracing APIs
b09dd7a8 docs: Fix typo
d47484e7 logging: Always run crate tests
5c9c0b6e build: Fix default target
b34ed403 cgroups: pass vhost-vsock device to cgroup
7362e1e8 runtime: remove prefix when cgroups are managed by systemd
1b1790fd agent/src: improve unit test coverage for src/namespace.rs

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-29 18:34:45 +01:00
Fabiano Fidêncio
f8aaefc919 Merge pull request #3147 from Bevisy/main-3144
osbuilder: fix missing cpio package when building rootfs-initrd image
2021-11-29 18:27:49 +01:00
Binbin Zhang
8ee67aae4f osbuilder: fix missing cpio package when building rootfs-initrd image
1. install cpio package before building rootfs-initrd image
2. add `pipefaili;errexit` check to the scripts

Fixes: #3144

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-11-29 23:42:44 +08:00
Fabiano Fidêncio
879ec4e0e9 Merge pull request #3139 from bergwolf/coreutils
osbuilder: add coreutils to guest rootfs
2021-11-29 10:19:39 +01:00
Fabiano Fidêncio
a6219cb5e0 Merge pull request #3134 from fidencio/wip/only-allow-users-who-are-part-of-the-org-to-run-test-kata-deploy
workflows: only allow org members to run `/test_kata_deploy`
2021-11-29 07:55:40 +01:00
Peng Tao
f59d3ff600 osbuilder: add coreutils to guest rootfs
So that the debug console is more useful. In the meantime, remove
iptables as it is not used by kata-agent any more.

Fixes: #3138
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-11-29 11:22:07 +08:00
Fabiano Fidêncio
7364cd4983 Merge pull request #3129 from liubin/fix/3122-use-tempdir-for-test-container
agent: use temp directory for test containers
2021-11-26 23:11:27 +01:00
Fabiano Fidêncio
5e7c1a290f workflows: only allow org members to run /test_kata_deploy
Let's take advantage of the "is-organization-member" action and only
allow members who are part of the `kata-containers` organization to
trigger `/test_kata_deploy`.

One caveat with this approach is that for the user to be considered as
part of an organization, they **must** have their "Organization
Visibility" configured as Public (and I think the default is Private).

This was found out and suggested by @jcvenegas!

Fixes: #3130

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-26 23:02:51 +01:00
Julio Montes
06d28d50ed Merge pull request #3136 from devimc/2021-11-26/fixQATci
tools/osbuilder: build QAT kernel in fedora 34
2021-11-26 15:38:57 -06:00
Julio Montes
857501d8dd tools/osbuilder: build QAT kernel in fedora 34
kernel compiled in fedora 35 (latest) is not working, following error
is reported:

```
qemu-system-x86_64: Error loading uncompressed kernel without PVH ELF
Note
```

Build QAT kernel in fedora 34 container to fix it

fixes #3135

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-11-26 13:56:43 -06:00
bin
a32e02a1ee agent: use temp directory as root of test containers
Some tests in sandbox.rs need root user to run, because they need create
directories under /run/agent directories, actually this is a limit
that shouldn't be there. By using a temp directory for test containers
will not need run tests as root user.

Fixes: #3122

Signed-off-by: bin <bin@hyper.sh>
2021-11-26 15:18:38 +08:00
Manabu Sugimoto
7b35615191 agent: Log unknown seccomp system calls
Kata agent logs unknown system calls given by seccomp profiles
in advance before the log file descriptor closes.

Fixes: #2957

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-11-26 15:10:04 +09:00
Peng Tao
c3de161168 Merge pull request #3118 from liubin/fix/3117-refactor-find_process
agent: refactor find_process function and add test cases
2021-11-26 10:22:48 +08:00
Peng Tao
01b6ffc0a4 Merge pull request #3028 from egernst/hypervisor-hacking
Hypervisor cleanup, refactoring
2021-11-26 10:21:49 +08:00
James O. D. Hunt
9412be39ba Merge pull request #3092 from liubin/fix/3091-fix-test-warnings
agent: clear cargo test warnings
2021-11-25 17:22:27 +00:00
James O. D. Hunt
a813378ac5 Merge pull request #3100 from jodh-intel/docs-code-pr-advice
docs: Add a code PR advice document
2021-11-25 15:46:13 +00:00
James O. D. Hunt
f0734f52c1 docs: Remove extraneous whitespace
Remove trailing whitespace in the unit test advice doc.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:49 +00:00
James O. D. Hunt
aff3275608 docs: Add a code PR advice document
Add a document giving advice to code PR authors.

Fixes: #3099.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:46 +00:00
James O. D. Hunt
d41c375c4f docs: Add more advice to the UT advice doc
Add information to the unit test advice document on test strategies and
the test environment.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
baf4f76d97 docs: More detail on running tests as different users
Add some more detail to the unit test advice document about running
tests as different users.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
fcf45b0c92 docs: Use more idiomatic rust string check
Rather than comparing a string to a literal in the rust example,
use `.is_empty()` as that approach is more idiomatic and preferred.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
9fed7d0bde docs: Mention anyhow for error handling in UT doc
Add a comment stating that `anyhow` and `thiserror` should be used in
real rust code, rather than the unwieldy default `Result` handling
shown in the example.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
318b3f187b docs: No present continuous in UT advice doc
Change some headings to avoid using the present continuous tense which
should not be used for headings.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
e8bb6b2666 docs: Correct repo name usage
Change reference from "runtime repo" to "main repo" in unit test advice
document.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
c1111a1d2d docs: Use leading caps for lang names in UT advice doc
Use a capital letter when referring to Golang and Rust (and remove
unnecessary backticks for Rust).

> **Note:**
>
> We continue refer to "Go" as "Golang" since it's a common alias,
> but, crucially, familiarity with this name makes searching for
> information using this term possible: "Go" is too generic a word.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
597b239ef3 docs: Remove TOC in UT advice doc
Remove the table of contents in the Unit Test Advice document since
GitHub auto-generates these now.

See: https://github.com/kata-containers/kata-containers/pull/2023

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
cf360fad92 docs: Move unit test advice doc from tests repo
Unit tests necessarily need to be maintained with the code they test so
it makes sense to keep the Unit Test Advice document into the main repo
since that is where the majority of unit tests reside.

Note: The
[`Unit-Test-Advice.md` file](https://github.com/kata-containers/tests/blob/main/Unit-Test-Advice.md)
was copied from the `tests` repo when it's `HEAD` was
38855f1f40.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
James O. D. Hunt
bc9558149c docs: Move doc requirements section higher
Move the documentation requirements document link up so that it appears
immediately below the "How to Contribute" section.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-25 14:44:40 +00:00
Fabiano Fidêncio
abf39ddef0 Merge pull request #3089 from fidencio/wip/kata-deploy-remove-files-and-revert-removal-as-part-of-the-release-scripts
tools: Automatically revert kata-deploy changes
2021-11-25 15:23:52 +01:00
Chelsea Mafrica
ed7eb26bff Merge pull request #3113 from liubin/fix/3112-delete-netmon
runtime: delete netmon
2021-11-24 17:58:13 -08:00
bin
6a0b7165ba agent: refactor find_process function and add test cases
Delete redundant parameter init in find_process function and
add test case for it.

Fixes: #3117

Signed-off-by: bin <bin@hyper.sh>
2021-11-25 09:47:25 +08:00
Fupan Li
2938f60abb Merge pull request #3012 from jodh-intel/agent-rm-unwraps
agent: Remove some unwrap and expect calls
2021-11-25 09:37:39 +08:00
Fabiano Fidêncio
5ba2f52c73 tools: Quote functions arguments in the update repos script
Although this is not strictly needed, better be safe than sorry on those
cases.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:09:58 +01:00
Fabiano Fidêncio
5dbd752f8f tools: Remove the check for the VERSION file
All repos we release (https://github.com/kata-containers/kata-containers
and https://github.com/kata-containers/tests) have a VERSION file.

Keeping a check for it, although useful for a new repo, just complicates
the use-case we currently deal with.

While here, let's also anchor the '#' and potentially exclude blank
lines, following James' suggestion.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:09:49 +01:00
Fabiano Fidêncio
85eb743f46 tools: Make hub usage slightly less fragile
`grep`ing by a specific output, in a specific language, is quite fragile
and could easily break `hub`.  For now, let's work this around following
James' suggestion of setting `LC_ALL=C LANG=C` when calling `hub`.

> **Note**: I don't think we should invest much time on fixing `hub`
> usage, as it'll be soon replaced by `gh`, see:
> https://github.com/kata-containers/kata-containers/issues/3083

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:09:30 +01:00
Fabiano Fidêncio
76540dbdd1 tools: Automatically revert kata-deploy changes
When branching the "stable-x.y" branch, we need to do some quite
specific changes to kata-deploy / kata-cleanup files, such as:
* changing the tags from "latest" to "stable-x.y".
* removing the kata-deploy / kata-cleanup stable files.

However, after the branching is done, we need to get the `main` repo to
its original state, with the kata-deploy / kata-cleanup using the
"latest" tag, and with the stable files present there, and this commit
ensures that, during the release process, a new PR is automatically
created with these changes.

Fixes: #3069

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:07:53 +01:00
Fabiano Fidêncio
36d73c96c8 tools: Do the kata-deploy changes on its own commit
Rather than doing the kata-deploy changes as part of the release bump
commit, let's split those on its own changes, as it will both make the
life of the reviewer less confusing and also allows us to start
preparing the field for a possible automated revert of these changes,
whenever it becomes needed.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:07:52 +01:00
Fabiano Fidêncio
c8e22daf67 tools: Use vars for the registry in the update repo script
Similarly to what was done for the yaml files, let's use a var for
representing the registry where our images will be pushed to and avoid
repetition and too long lines.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:07:03 +01:00
Fabiano Fidêncio
ac958a3073 tools: Use vars for the yaml files used in the update repo script
Instead of always writing the full path of some files, let's just create
some vars and avoid both repetition (which is quite error prone) and too
long lines (which makes the file not so easy to read).

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:06:45 +01:00
Fabiano Fidêncio
edca829242 tools: Rewrite the logic around kata-deploy changes
We can simplify the code a little bit, as at least now we group common
operationr together.  Hopefully this will improve the maintainability
and the readability of the code.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 22:05:35 +01:00
Fabiano Fidêncio
31f6c2c2ea tools: Update comments about the kata-deploy yaml changes
The comments were mentioning kata-deploy-base files while it really
should mention kata-deploy-stable files.

While here, I've also added a missing '"' to one of the tags.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-24 21:17:40 +01:00
Binbin Zhang
75bb340137 shimv2/service: fix defer funtions never run with os.Exit()
os.Exit() will terminate program immediately, the defer functions
won't be executed, so we add defer functions again before os.Exit().
Refer to https://pkg.go.dev/os#Exit

Fixes: #3059

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-11-24 15:59:59 +01:00
James O. D. Hunt
bd3217daeb agent: Remove redundant returns
Remove an unnecessary `return` statement identified by clippy.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
adab64349c agent: Remove some unwrap and expect calls
Replace some `unwrap()` and `expect()` calls with code to return the
error to the caller.

Fixes: #3011.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
351cef7b6a agent: Remove unwrap from verify_cid()
Improved the `verify_cid()` function that validates container ID's by
removing the need for an `unwrap()`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
a7d1c70c4b agent: Improve baremount
Change `baremount()` to accept `Path` values rather than string values
since:

- `Path` is more natural given the function deals with paths.
- This minimises the caller having to convert between string and `Path`
  types, which simplifies the surrounding code.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
09abcd4dc6 agent-ctl: Remove some unwrap and expect calls
Replace some `unwrap()` and `expect()` calls with code to return the
error to the caller.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
35db75baa1 agent-ctl: Remove redundant returns
Remove a number of redundant `return`'s.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
46e459584d agent-ctl: Simplify main
Make the `main()` function simpler.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
James O. D. Hunt
c7349d0bf1 agent-ctl: Simplify error handling
Replace `ok_or().map_err()` combinations with the simpler `ok_or_else()`
construct.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-24 11:43:49 +00:00
bin
ddc68131df runtime: delete netmon
Netmon is not used anymore.

Fixes: #3112

Signed-off-by: bin <bin@hyper.sh>
2021-11-24 15:08:18 +08:00
Carlos Venegas
ac058b3897 Merge pull request #3105 from YchauWang/wyc-agent-make-02
agent: fixed the `make optimize` bug
2021-11-23 13:17:05 -06:00
Fabiano Fidêncio
181f876fdb Merge pull request #3098 from fidencio/wip/move_kata-deploy-install-instruction_to_docs
docs: make kata-deploy more visible
2021-11-23 18:32:42 +01:00
João Vanzuita
705687dc42 docs: Add kata-deploy as part of the install docs
This PR links the kata-deloy installation instructions to the
docs/install folder.

Fixes: #2450

Signed-off-by: João Vanzuita <joao.vanzuita@de.bosch.com>
Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-23 13:57:22 +01:00
Fabiano Fidêncio
acece84906 docs: Use the default notation for "Note" on install README
Let's use the default GitHub notation for notes in documentation, as
describe here:
https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requir

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
Suggested-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-23 13:27:35 +01:00
Fabiano Fidêncio
143fb27802 kata-deploy: Use the default notation for "Note"
Let's use the default GitHub notation for notes in documentation, as
describe here:
https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#notes

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
Suggested-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-23 13:24:42 +01:00
Fabiano Fidêncio
45d76407aa kata-deploy: Don't mention arch specific binaries in the README
Although the binary name of the shipped binary is `qemu-system-x86_64`,
and we only ship kata-deploy for `x86_64`, we better leaving the
architecture specific name out of our README file.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-23 13:21:37 +01:00
wangyongchao.bj
0c6c0735ec agent: fixed the make optimize bug
The unrecognized option: 'deny-warnings' args caused `make optimize` failed.
Fixed the Makefile of the agent project, make sure the `make optimize` command
execute correctly. This PR modify the rustc args from '--deny-warnings' to
'--deny warnings'.

Fixes: #3104

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-11-23 09:44:05 +08:00
Fabiano Fidêncio
0ae77e1232 Merge pull request #3102 from fidencio/wip/add-back-wrongly-removed-check-for-test-kata-deploy
workflows: Add back the checks for running test-kata-deploy
2021-11-22 22:36:03 +01:00
Fabiano Fidêncio
a7c08aa4b6 workflows: Add back the checks for running test-kata-deploy
Commit 3c9ae7f made /test_kata_deploy run
against HEAD, but it also mistakenly removed all the checks that ensure
/test_kata_deploy only runs when explicitly called.

Mea culpa on this, and let's add the tests back.

Fixes: #3101

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-22 18:33:10 +01:00
Carlos Venegas
3be15aed1c Merge pull request #3071 from fidencio/wip/test-kata-deploy-should-use-the-latest-builds
kata-deploy: Ensure we test HEAD with `/test_kata_deploy`
2021-11-22 10:48:35 -06:00
bin
ce0693d6dc agent: clear cargo test warnings
Function parameters in test config is not used. This
commit will add under score before variable name
in test config.

Fixes: #3091

Signed-off-by: bin <bin@hyper.sh>
2021-11-22 20:45:46 +08:00
Tim Zhang
cad279b37d Merge pull request #3055 from liubin/fix/3054-update-spdk-doc
docs: update using-SPDK-vhostuser-and-kata.md
2021-11-22 15:47:02 +08:00
Binbin Zhang
7304e52a59 Makefile: update make go-test call
1. use ci/go-test.sh to replace the direct call to go test
2. fix data race test
3. install hook whether it is root or not

Fixes #1494

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-11-22 13:59:22 +08:00
David Gibson
1b28d7180f Merge pull request #2927 from dgibson/vfio-env-mangling
Update k8s SR-IOV plugin environment variables to work properly with Kata
2021-11-22 13:44:19 +11:00
Eric Ernst
a0919b0865 Merge pull request #2998 from egernst/fix-symlinks
watchers: don't dereference symlinks when copying files
2021-11-19 12:43:22 -08:00
Eric Ernst
ce92cadc7d vc: hypervisor: remove setSandbox
The hypervisor interface implementation should not know a thing about
sandboxes.

Fixes: #2882

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 12:20:41 -08:00
Eric Ernst
2227c46c25 vc: hypervisor: use our own logger
This'll end up moving to hypervisors pkg, but let's stop using virtLog,
instead introduce hvLogger.

Fixes: #2884

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 12:20:41 -08:00
Eric Ernst
4c2883f7e2 vc: hypervisor: remove dependency on persist API
Today the hypervisor code in vc relies on persist pkg for two things:
1. To get the VM/run store path on the host filesystem,
2. For type definition of the Load/Save functions of the hypervisor
   interface.

For (1), we can simply remove the store interface from the hypervisor
config and replace it with just the path, since this is all we really
need. When we create a NewHypervisor structure, outside of the
hypervisor, we can populate this path.

For (2), rather than have the persist pkg define the structure, let's
let the hypervisor code (soon to be pkg) define the structure. persist
API already needs to call into hypervisor anyway; let's allow us to
define the structure.

We'll probably want to look at following similar pattern for other parts
of vc that we want to make independent of the persist API.

In doing this, we started an initial hypervisors pkg, to hold these
types (avoid a circular dependency between virtcontainers and persist
pkg). Next step will be to remove all other dependencies and move the
hypervisor specific code into this pkg, and out of virtcontaienrs.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 12:20:41 -08:00
Eric Ernst
34f23de512 vc: hypervisor: Remove need to get shared address from sandbox
Add shared path as part of the hypervisor config

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 12:20:41 -08:00
Eric Ernst
c28e5a7807 acrn: remove dependency on sandbox, persistapi datatypes
Today, acrn relies on sandbox level information, as well as a store
provided by common parts of the hypervisor. As we cleanup the
abstractions within our runtime, we need to ensure that there aren't
cross dependencies between the sandbox, the persistence logic and the
hypervisor.

Ensure that ACRN still compiles, but remove the setSandbox usage as
well as persist driver setup.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 12:20:41 -08:00
Eric Ernst
a0e0e18639 hypervisors: introduce pkg to unbreak vc/persist dependency
Initial hypervisors pkg, with just basic state types defined.

Fixes: #2883

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 12:20:41 -08:00
Eric Ernst
b5dfcf2653 watcher: tests: ensure there is 20ms delay between fs writes
We noticed s390x test failures on several of the watcher unit tests.

Discovered that on s390 in particular, if we update a file in quick
sucecssion, the time stampe on the file would not be unique between the
writes. Through testing, we observe that a 20 millisecond delay is very
reliable for being able to observe the timestamp update. Let's ensure we
have this delay between writes for our tests so our tests are more
reliable.

In "the real world" we'll be polling for changes every 2 seconds, and
frequency of filesystem updates will be on order of minutes and days,
rather that microseconds.

Fixes: #2946

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 11:33:36 -08:00
Fabiano Fidêncio
d08bcde7aa Merge pull request #3068 from fidencio/wip/kata-deploy-re-add-latest-and-stable-tags
kata-deploy: Add back stable & latest tags
2021-11-19 15:58:55 +01:00
David Gibson
78dff468bf agent/device: Adjust PCIDEVICE_* container environment variables for VM
The k8s SR-IOV plugin, when it assigns a VFIO device to a container, adds
an variable of the form PCIDEVICE_<identifier> to the container's
environment, so that the payload knows which device is which.  The contents
of the variable gives the PCI address of the device to use.

Kata allows VFIO devices to be passed in to a Kata container, however it
runs within a VM which has a different PCI topology.  In order for the
payload to find the right device, the environment variables therefore need
to be converted to list the guest PCI addresses instead of the host PCI
addresses.

fixes #2897

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 17:44:05 +11:00
David Gibson
4530e7df29 agent/device: Use simpler structure in update_spec_devices()
update_spec_devices() takes a bunch of updates for the device entries in
the OCI spec and applies them, adjusting things in both the linux.devices
and linux.resources.devices sections of the spec.

It's important that each entry in the spec only be updated once.  Currently
we ensure this by first creating an index of where the entries are, then
consulting that as we apply each update, so that earlier updates don't
cause us to incorrectly detect an entry as being relevant to a later
update.  This method works, but it's quite awkward.

This inverts the loop structure in update_spec_devices() to make this
clearer.  Instead of stepping through each update and finding the relevant
entries in the spec to change, we step through each entry in the spec and
find the relevant update.  This makes it structurally clear that we're only
updating each entry once.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 17:21:11 +11:00
Tim Zhang
653b461dc2 Merge pull request #3064 from lifupan/main
agent: fix the issue of missing create a new session for container
2021-11-19 11:28:54 +08:00
David Gibson
b60622786d agent/device: Correct misleading comment on test case
We have a test case commented as testing the case where linux.devices is
empty in the OCI spec.  While it's true that linux.devices is empth in this
example, the reason it fails isn't specifically because it's empty but
because it doesn't contain a device for the update we're trying to apply.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:25:04 +11:00
David Gibson
89ff700038 agent/device: Remove unnecessary check for empty container_path
update_spec_devices() explicitly checks for being called with an empty
container path and fails.  We have a unit test to verify this behaviour.

But while an empty container_path probably does mean something has gone
wrong elsewhere, that's also true of any number of other bad paths.  Having
an empty string here doesn't prevent what we're doing in this function
making sense - we can compare it to the strings in the OCI spec perfectly
well (though more likely we simply won't find it there).

So, there's no real reason to check this one particular odd case.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:25:03 +11:00
David Gibson
c855a312f0 agent/device: Make DevIndex local to update_spec_devices()
The DevIndex data structure keeps track of devices in the OCI
specification.  We used to carry it around to quite a lot of
functions, but it's now used only within update_spec_devices().  That
means we can simplify things a bit by just open coding the maps we
need, rather than declaring a special type.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:24:47 +11:00
David Gibson
084538d334 agent/device: Change update_spec_device to handle multiple devices at once
update_spec_device() adjusts the OCI spec for device differences
between the host and guest.  It is called repeatedly for each device
we need to alter.  These calls are now all in a single loop in
add_devices(), so it makes more sense to move the loop into a renamed
update_spec_devices() and process all the fixups in one call.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:23:58 +11:00
David Gibson
d6a3ebc496 agent/device: Obtain guest major/minor numbers when creating DevNumUpdate
Currently the DevNumUpdate structure is created with a path to a
device node in the VM, which is then used by update_spec_device().
However the only piece of information that update_spec_device()
actually needs is the VM side major and minor numbers for the device.
We can determine those when we create the DevNumUpdate structure.
This means we detect errors earlier and as a bonus we don't need to
make a copy of the vm path string.

Since that change requires updating 2 of the log statements, we take the
opportunity to update all the log statements to structured style.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:23:36 +11:00
David Gibson
f4982130e1 agent/device: Check for conflicting device updates
For each device in the OCI spec we need to update it to reflect the guest
rather than the host.  We do this with additional device information
provided by the runtime.  There should only be one update for each device
though, if there are multiple, something has gone horribly wrong.

Detect and report this situation, for safety.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:23:34 +11:00
David Gibson
f10e8c8165 agent/device: Batch changes to the OCI specification
As we process container devices in the agent, we repeatedly call
update_spec_device() to adjust the OCI spec as necessary for differences
between the host and the VM.  This means that for the whole of a pretty
complex call graph, the spec is in a partially-updated state - neither
fully as it was on the host, not fully as it will be for the container
within the VM.

Worse, it's not discernable from the contents itself which parts of the
spec have already been updated and which have not.  We used to have real
bugs because of this, until the DevIndex structure was introduced, but that
means a whole, fairly complex, parallel data structure needs to be passed
around this call graph just to keep track of the state we're in.

Start simplifying this by having the device handler functions not directly
update the spec, but instead return an update structure describing the
change they need.  Once all the devices are added, add_devices() will
process all the updates as a batch.

Note that collecting the updates in a HashMap, rather than a simple Vec
doesn't make a lot of sense in the current code, but will reduce churn
in future changes which make use of it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 14:23:15 +11:00
David Gibson
46a4020e9e agent/device: Types to represent update for a device in the OCI spec
Currently update_spec_device() takes parameters 'vm_path' and 'final_path'
to give it the information it needs to update a single device in the OCI
spec for the guest.  This bundles these parameters into a single structure
type describing the updates to a single device.  This doesn't accomplish
much immediately, but will allow a number of further cleanups.

At the same time we change the representation of vm_path from a Unicode
string to a std::path::Path, which is a bit more natural since we are
performing file operations on it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 12:27:52 +11:00
David Gibson
e7beed5430 agent/device: Remove unneeded clone() from several device handlers
virtio_blk_device_handler(), virtio_blk_ccw_device_handler() and
virtio_scsi_device_handler() all take a clone of their 'device' parameter.
They appear to do this in order to get a mutable copy in which they can
update the vm_path field.

However, the copy is dropped at the end of the function, so the only thing
that's used in it is the vm_path field passed to update_spec_device()
afterwards.

We can avoid the clone by just using a local variable for the vm_path.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 12:27:52 +11:00
David Gibson
2029eeebca agent/device: Improve update_spec_device() final_path handling
update_spec_device() takes a 'final_path' parameter which gives the
name the device should be given in the "inner" OCI spec.  We need this
for VFIO devices where the name the payload sees needs to match the
VM's IOMMU groups.  However, in all other cases (for now, and maybe
forever), this is the same as the original 'container_path' given in
the input OCI spec.  To make this clearer and simplify callers, make
this parameter an Option, and only update the device name if it is
non-None.

Additionally, update_spec_device() needs to call to_string() on
update_path to get an owned version.  Rust convention[0] is to let the
caller decide whether it should copy, or just give an existing owned
version to the function.  Change from &str to String to allow that; it
doesn't buy us anything right now, but will make some things a little
nicer in future.

[0] https://rust-lang.github.io/api-guidelines/flexibility.html?highlight=clone#caller-decides-where-to-copy-and-place-data-c-caller-control

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 12:27:52 +11:00
David Gibson
57541315db agent/device: Correct misleading parameter name in update_spec_device()
update_spec_device() takes a 'host_path' parameter which it uses to locate
the device to correct in the OCI spec.  Although this will usually be the
path of the device on the host, it doesn't have to be - a traditional
runtime like runc would create a device node of that name in the container
with the given (host) major and minor numbers.  To clarify that, rename it
to 'container_path'.

We also update the block comment to explain the distinctions more
carefully.  Finally we update some variable names in tests to match.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 12:27:52 +11:00
David Gibson
0c51da3dd0 agent/device: Correct misleading error message in update_spec_device()
This error is returned if we have information for a device from the
runtime, but a matching device does not appear in the OCI spec.  However,
the name for the device we print is the name from the VM, rather than the
name from the container which is what we actually expect in the spec.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 12:27:52 +11:00
David Gibson
94b7936f51 agent/device: Use nix::sys::stat::{major,minor} instead of libc::*
update_spec_devices() includes an unsafe block, in order to call the libc
functions to get the major and minor numbers from a device ID.  However,
the nix crate already has a safe wrapper for this function, which we use in
other places in the file.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-11-19 12:27:52 +11:00
Eric Ernst
296e76f8ee watchers: handle symlinked directories, dir removal
- Even a directory could be a symlink - check for this. This is very
common when using configmaps/secrets
- Add unit test to better mimic a configmap, configmap update
- We would never remove directories before. Let's ensure that these are
added to the watched_list, and verify in unit tests
- Update unit tests which exercise maximum number of files per entry. There's a change
in behavior now that we consider directories/symlinks watchable as well.
For these tests, it means we support one less file in a watchable mount.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-18 16:23:45 -08:00
Eric Ernst
2b6dfe414a watchers: don't dereference symlinks when copying files
The current implementation just copies the file, dereferencing any
simlinks in the process. This results in symlinks no being preserved,
and a change in layout relative to the mount that we are making
watchable.

What we want is something like "cp -d"

This isn't available in a crate, so let's go ahead and introduce a copy
function which will create a symlink with same relative path if the
source file is a symlink. Regular files are handled with the standard
fs::copy.

Introduce a unit test to verify symlinks are now handled appropriately.

Fixes: #2950

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-18 16:23:45 -08:00
Fabiano Fidêncio
3c9ae7fb4b kata-deploy: Ensure we test HEAD with /test_kata_deploy
Is the past few releases we ended up hitting issues that could be easily
avoided if `/test_kata_deploy` would use HEAD instead of a specific
tarball.

By the end of the day, we want to ensure kata-deploy works, but before
we cut a release we also want to ensure that the binaries used in that
release are in a good shape.  If we don't do that we end up either
having to roll a release back, or to cut a second release in a really
short time (and that's time consuming).

Note: there's code duplication here that could and should be avoided,b
but I sincerely would prefer treating it in a different PR.

Fixes: #3001

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-18 23:38:55 +01:00
Greg Kurz
c01189d4a6 Merge pull request #3075 from c3d/bugs/3074-containerd-update
runtime: Update containerd to 1.5.8
2021-11-18 22:42:05 +01:00
Christophe de Dinechin
0380b9bda7 runtime: Update containerd to 1.5.8
Release 1.5.8 of containerd contains fixes for two low-severity advisories:

[GHSA-5j5w-g665-5m35](https://github.com/opencontainers/distribution-spec/security/advisories/GHSA-mc8v-mgrf-8f4m)
[GHSA-77vh-xpmg-72qh](https://github.com/opencontainers/image-spec/security/advisories/GHSA-77vh-xpmg-72qh)

Fixes: #3074

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-11-18 18:38:27 +01:00
Greg Kurz
bdde8beb52 Merge pull request #3003 from Amulyam24/snap_ppc
qemu: fix snap build on ppc64le
2021-11-18 17:46:23 +01:00
Greg Kurz
f80ca66300 Merge pull request #2921 from Amulyam24/template_test
virtcontainers: fix failing template test on ppc64le
2021-11-18 17:32:18 +01:00
Amulyam24
112ea25859 qemu: fix snap build by disabling libudev
While building snap, static qemu is considered. Disable libudev
as it doesn't have static libraries on most of the distros of all
archs.

Fixes: #3002

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-11-18 18:50:19 +05:30
Amulyam24
d5a18173b9 virtcontainers: fix failing template test on ppc64le
If a file/directory doesn't exist, os.Stat() returns an
error. Assert the returned value with os.IsNotExist() to
prevent it from failing.

Fixes: #2920

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-11-18 15:37:40 +05:30
Fabiano Fidêncio
6955d1442f kata-deploy: Add back stable & latest tags
stable-2.3 was the first time we branched the repo since
43a72d76e2 was merged.  One bit that I
didn't notice while working on this, regardless of being warned by
@amshinde (sorry!), was that the change would happen on `main` branch,
rather than on the branched `stable-2.3` one.

In my mind, the workflow was:
* we branch.
* we do the changes, including removing the files.
* we tag a release.

However, the workflow actually is:
* we do the changes, including removing the files.
* we branch.
* we tag a release.

A better way to deal with this has to be figured out before 2.4.0 is
out, but for now let's just re-add the files back.

Fixes: #3067

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-18 09:41:54 +01:00
James O. D. Hunt
7269352fd4 Merge pull request #3057 from jodh-intel/docs-update-agent-readme
agent: Update README
2021-11-18 08:02:10 +00:00
Fupan Li
bbaf57adb0 agent: fix the issue of missing create a new session for container
When the container didn't had a tty console, it would be in a same
process group with the kata-agent, which wasn't expected. Thus,
create a new session for the container process.

Fixes: #3063

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2021-11-18 14:12:51 +08:00
bin
46fd5069c9 docs: update using-SPDK-vhostuser-and-kata.md
Use `ctr` instead of `Docker`.

Fixes: #3054

Signed-off-by: bin <bin@hyper.sh>
2021-11-18 09:41:12 +08:00
Eric Ernst
076dbe6cea Merge pull request #2973 from egernst/remove-cruft
Remove cruft, do some simple non-functional cleanup in the runtime
2021-11-17 15:26:12 -08:00
Eric Ernst
7e6f2b8d64 vc-utils: don't export unused function
Many of these functions are just used on one place throughout the rest
of the code base. If we create hypervisor package, newtork package, etc, we may want to
parse this out.

Fixes: #3049

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-17 14:12:57 -08:00
Eric Ernst
860f30882a virtcontainers: move oci, uuid packages top level
This will be useful at runtime level; no need for oci or uuid to be subpkg of
virtcontainers.

While at it, ensure we run gofmt on the changed files.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-17 14:12:57 -08:00
Eric Ernst
8acb3a32b6 virtcontainers: remove unused package nsenter
Package is not utilized. Remove.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-17 14:12:57 -08:00
Eric Ernst
4788cb8263 vc-network: remove unused functions
Unused functions -- let's clean up!

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-17 14:12:57 -08:00
Eric Ernst
b6ebddd7ef oci: remove unused function GetContainerType
This is unused - we utilize ContainerType directly.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-17 14:12:57 -08:00
James O. D. Hunt
599bc0c2a9 agent: Update README
Update the agent README by removing the historical details about the
conversion from golang to rust which (occurred at the start of Kata 2.x
development) and replacing it with information that developers and
testers should find more useful.

Fixes: #3056.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-17 17:57:45 +00:00
Fabiano Fidêncio
e34893a0c4 Merge pull request #3051 from egernst/macvlan-rename
macvlan: drop bridged part of name
2021-11-17 10:21:07 +01:00
Eric Ernst
1e7cb4bc3a macvlan: drop bridged part of name
The fact that we need to "bridge" the endpoint is a bit irrelevant. To
be consistent with the rest of the endpoints, let's just call this
"macvlan"

Fixes: #3050

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-16 16:44:29 -08:00
Carlos Venegas
15b5d22e81 Merge pull request #2778 from jcvenegas/clh-race-condition-check
clh: Fix race condition that prevent start pods
2021-11-16 14:15:06 -06:00
Carlos Venegas
55412044df monitor: Fix monitor race condition doing hypervisor.check()
The thread monitor will check if the agent and the VMM are alive every
second in a blocking thread. The Cloud hypervisor API server is
single-threaded, if the monitor does a `check()`, while a slow request
is still in progress, the monitor check() method will timeout. The
monitor thread will stop all the shim-v2 execution.

This commit modifies the monitor thread to make it check the status of
the hypervisor after 5 seconds. Additionally, the `check()` method from
cloud-hypervisor will use the method `clh.isClhRunning(timeout)` with a
10 seconds timeout. The monitor function does no timeout, so even if
`hypervisor.check()` takes more 10 seconds, the isClhRunning method
handles errors doing a VmmPing and retry in case of errors until the
timeout is reached.

Reduce the time to the next check to 5 should not affect any functionality,
but it will reduce the overhead polling the hypervisor.

Fixes: #2777

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-11-16 18:28:29 +00:00
James O. D. Hunt
480343671b Merge pull request #3046 from fidencio/wip/update-crio-documentation
Update CRI-O documentation
2021-11-16 08:33:29 +00:00
Fabiano Fidêncio
eb11d053d5 cri-o: Update deployment documentation
CRI-O deployment documentation was quite outdated, giving info from the
`1.x` era.  Let's update this to reflect what we currently have.

Fixes: #2498

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-15 18:30:40 +01:00
Fabiano Fidêncio
92e3a14023 cri-o: Update links for the CRI-O github page
The links are either pointing to the not-used-anymore `master` branch,
or to the kubernetes-incubator page.

Let's always point to the CRI-O github page, using the `main`branch.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-15 11:39:09 +01:00
Fabiano Fidêncio
0a19340a93 cri-o: Remove outdated documentation
Although the documentation removed is correct, it's not relevant to the
current supported versions of CRI-O.

Related: #2498

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-15 11:39:08 +01:00
snir911
b046c1ef6b Merge pull request #2959 from snir911/wip/cgroups-systemd-fix
cgroups: Fix systemd cgroup support
2021-11-15 10:44:45 +02:00
Eric Ernst
e89c06e68b Merge pull request #3032 from liubin/fix/3031-merge-two-types-packages
runtime: merge virtcontainers/pkg/types into virtcontainers/types
2021-11-12 14:23:21 -08:00
Chelsea Mafrica
b585264555 Merge pull request #3034 from fidencio/wip/remove-non-used-actions
workflows: Remove non-used main.yaml
2021-11-12 11:25:47 -08:00
Chelsea Mafrica
d38135c93b Merge pull request #2570 from YchauWang/wyc-agent-test
agent/src: improve unit test coverage for src/namespace.rs
2021-11-12 11:24:13 -08:00
Fabiano Fidêncio
a3b3c85ec3 workflows: Remove non-used main.yaml
The main.yaml workflow was created and used only on 1.x.  We inherited
it, but we didn't remove it after deprecating the 1.x repos.

While here, let's also update the reference to the `main.yaml` file,
and point to `release.yaml` (the file that's actually used for 2.x).

Fixes: #3033

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-12 18:17:11 +01:00
Chelsea Mafrica
6b48d3754a Merge pull request #3013 from fgiudici/kata_metrics_doc
doc: update kata metrics documentation
2021-11-12 09:11:36 -08:00
Chelsea Mafrica
c8f2ef9488 Merge pull request #3030 from liubin/fix/3029-delete-codes
runtime: delete not used codes
2021-11-12 08:53:20 -08:00
bin
09f7962ff1 runtime: merge virtcontainers/pkg/types into virtcontainers/types
There are two types packages under virtcontainers, and the
virtcontainers/pkg/types has a few codes, merging them into
one can make it easy for outstanding and using types package.

Fixes: #3031

Signed-off-by: bin <bin@hyper.sh>
2021-11-12 15:06:39 +08:00
bin
6acedc2531 runtime: delete not used codes
Functions EnvVars and GetOCIConfig in runtime/virtcontainers/pkg/oci/utils.go
are not used anymore.

Fixes: #3029

Signed-off-by: bin <bin@hyper.sh>
2021-11-12 11:35:31 +08:00
Fabiano Fidêncio
c0aea3f662 Merge pull request #3017 from fidencio/wip/bump-golang
versions: bump golang to 1.17.x
2021-11-11 16:57:50 +01:00
Fabiano Fidêncio
7c947357ad Merge pull request #3015 from ManaSugi/fix-yq-path
release: Use ${GOPATH}/bin/yq for upload-libseccomp-tarball action
2021-11-11 10:48:42 +01:00
Fabiano Fidêncio
395638c4bc versions: bump golang to 1.17.x
According to https://endoflife.date/go golang 1.15 is not supported
anymore.  Let's remove it from out tests, add 1.17.x, and bump the
newest version known to work when building kata to 1.17.3.

Fixes: #3016

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-11 10:43:18 +01:00
Bin Liu
bf24eb6b33 Merge pull request #2979 from jodh-intel/agent-ctl-json-api-spec
agent-ctl: Allow API specification in JSON format
2021-11-11 16:45:30 +08:00
Francesco Giudici
570915a8c3 docs: update kata 2.0 metrics documentation
We now support any container engine CRI compliant in kata-monitor.
Update documentation to reflect it.

Fixes: #980

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-11-11 09:33:01 +01:00
Snir Sheriber
bcf181b7ee cgroups: Fix systemd cgroup support
As github.com/containerd/cgroups doesn't support scope
units which are essential in some cases lets create
the cgroups manually and load it trough the cgroups
api
This is currently done only when there's single sandbox
cgroup (sandbox_cgroup_only=true), otherwise we set it
as static cgroup path as it used to be (until a proper
soultion for overhead cgroup under systemd will be
suggested)

Fixes: #2868
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-11-11 08:51:45 +02:00
Manabu Sugimoto
3430723594 release: Use ${GOPATH}/bin/yq for upload-libseccomp-tarball action
We need to explicitly call `${GOPATH}/bin/yq` that is installed by
`ci/install_yq.sh`.

Fixes: #3014

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-11-11 13:42:12 +09:00
Bin Liu
04185bd068 Merge pull request #2997 from Jakob-Naucke/lint-protection
virtcontainers: Lint protection types
2021-11-11 08:34:48 +08:00
Fabiano Fidêncio
05cf7cdddb Merge pull request #3007 from liubin/fix/3006-check-env-key-value
agent: check environment variables if empty or invalid
2021-11-10 19:19:47 +01:00
Francesco Giudici
6339fdd1f6 docs: update kata metrics architecture image
We now support any CRI container engine in kata-monitor, notably CRI-O.
Add both containerd and CRI-O in the kata metrics architecture image.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-11-10 18:58:15 +01:00
bin
57bb7ffae3 agent: check environment variables if empty or invalid
Invalid environment variable key/value will cause set_env panic.

Refer: https://doc.rust-lang.org/std/env/fn.set_var.html#panics

Fixes: #3006

Signed-off-by: bin <bin@hyper.sh>
2021-11-10 20:54:21 +08:00
Fabiano Fidêncio
653976c0fd Merge pull request #3000 from bergwolf/crioptions
runtime: Revert "runtime: use containerd package instead of cri-containerd"
2021-11-10 13:41:24 +01:00
Tim Zhang
fbf3bb55c0 Merge pull request #2995 from Tim-Zhang/fix-container-created-time
rustjail: Fix created time of container
2021-11-10 19:44:04 +08:00
James O. D. Hunt
8ab90e1068 agent-ctl: Allow API specification in JSON format
Update the `agent-ctl` tool to allow API fields to be specified in JSON
format, either directly on the command-line, or via a file URI.

This feature is made possible by enabling `serde` support in the agent
`protocols` crate. Careful use of the `serde` macros allows the
`agent-ctl` tool to accept _partially_ specified API objects in JSON
format; fields that are not specified are set to the default value for
their respective types.

`build.rs` changes based on work by Fupan.

Fixes: #2978.

Contributions-by: Fupan Li <lifupan@gmail.com>
Contributions-by: Bin Liu <bin@hyper.sh>
Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-10 10:16:04 +00:00
James O. D. Hunt
18c47fe8f3 Merge pull request #2986 from jodh-intel/rm-dynamic-tracing-api
agent: Remove dynamic tracing APIs
2021-11-10 10:10:14 +00:00
Peng Tao
eacfcdec19 runtime: Revert "runtime: use containerd package instead of cri-containerd"
This reverts commit 76f16fd1a7 to bring
back cri-containerd crioptions parsing so that kata works with older
containerd versions like v1.3.9 and v1.4.6.

Fixes: #2999
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-11-10 16:06:42 +08:00
Tim Zhang
e7856ff10c rustjail: Fix created time of container
Got wrong created time of container after an exec
this commit will fix this problem.

Fixes: #2994

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-11-10 10:43:03 +08:00
Chelsea Mafrica
8b01666109 Merge pull request #2992 from Amulyam24/kernel_vfio
kernel: add VFIO kernel dependencies for ppc64le
2021-11-09 15:22:16 -08:00
Jakob Naucke
b7b89905d4 virtcontainers: Lint protection types
Protection types like tdxProtection or seProtection were marked nolint,
remove this. As a side effect, ARM needs dummy tests for these.

Fixes: #2801
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-11-09 18:36:32 +01:00
Amulyam24
7566b736ac kernel: add VFIO kernel dependencies for ppc64le
Recently added VFIO kernel configs require addtional
dependencies on pcc64le.

Fixes: #2991

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-11-09 14:38:03 +05:30
James O. D. Hunt
87f676062c agent: Remove dynamic tracing APIs
Remove the `StartTracing` and `StopTracing` agent APIs that toggle
dynamic tracing. This is not supported in Kata 2.x, as documented in the
[tracing proposals document](https://github.com/kata-containers/kata-containers/pull/2062).

Fixes: #2985.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-09 08:39:06 +00:00
James O. D. Hunt
b09dd7a883 docs: Fix typo
Correct a typo identified by the static checker's spell checker.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-09 08:38:42 +00:00
James O. D. Hunt
b192d388c1 Merge pull request #2970 from jodh-intel/logging-create-tests-and-checks
logging: Always run crate tests
2021-11-08 13:16:48 +00:00
Manabu Sugimoto
c66b56683b agent: Ignore unknown seccomp system calls
If Kata agent cannot resolve the system calls given by seccomp profiles,
the agent ignores the system calls and continues to run without an error.

Fixes: #2957

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-11-05 21:00:41 +09:00
James O. D. Hunt
d47484e7c1 logging: Always run crate tests
Ensure the tests in the local `logging` crate are run for all consumers
of it.

Additionally, add a new test which checks that output is generated by a
range of different log level `slog` macros. This is designed to ensure
debug level output is always available for the consumers of the
`logging` crate.

Fixes: #2969.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-04 17:26:52 +00:00
James O. D. Hunt
5c9c0b6e62 build: Fix default target
Fixed the top-level build which was broken: the kata deploy
Makefile was being sourced, but it was defining the first target, which
became the default.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-04 16:30:50 +00:00
Snir Sheriber
b34ed403c5 cgroups: pass vhost-vsock device to cgroup
for the sandbox cgroup

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-11-04 10:59:10 +02:00
Snir Sheriber
7362e1e8a9 runtime: remove prefix when cgroups are managed by systemd
as done previously in 9949daf4dc

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-11-04 10:13:22 +02:00
wangyongchao.bj
1b1790fdbc agent/src: improve unit test coverage for src/namespace.rs
Improve unit test coverage for src/namespace.rs for Kata 2.0 agent

Fixes: #289

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-17 14:15:14 +08:00
783 changed files with 36188 additions and 29574 deletions

View File

@@ -8,20 +8,25 @@ COMPONENTS =
COMPONENTS += agent
COMPONENTS += runtime
COMPONENTS += trace-forwarder
# List of available tools
TOOLS =
TOOLS += agent-ctl
TOOLS += trace-forwarder
STANDARD_TARGETS = build check clean install test vendor
default: all
all: logging-crate-tests build
logging-crate-tests:
make -C src/libs/logging
include utils.mk
include ./tools/packaging/kata-deploy/local-build/Makefile
all: build
# Create the rules
$(eval $(call create_all_rules,$(COMPONENTS),$(TOOLS),$(STANDARD_TARGETS)))
@@ -34,4 +39,10 @@ generate-protocols:
static-checks: build
bash ci/static-checks.sh
.PHONY: all default static-checks binary-tarball install-binary-tarball
.PHONY: \
all \
binary-tarball \
default \
install-binary-tarball \
logging-crate-tests \
static-checks

View File

@@ -70,8 +70,8 @@ The table below lists the remaining parts of the project:
| [packaging](tools/packaging) | infrastructure | Scripts and metadata for producing packaged binaries<br/>(components, hypervisors, kernel and rootfs). |
| [kernel](https://www.kernel.org) | kernel | Linux kernel used by the hypervisor to boot the guest image. Patches are stored [here](tools/packaging/kernel). |
| [osbuilder](tools/osbuilder) | infrastructure | Tool to create "mini O/S" rootfs and initrd images and kernel for the hypervisor. |
| [`agent-ctl`](tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
| [`trace-forwarder`](src/trace-forwarder) | utility | Agent tracing helper. |
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
| [`ci`](https://github.com/kata-containers/ci) | CI | Continuous Integration configuration files and scripts. |
| [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |

View File

@@ -1 +1 @@
2.3.1
2.4.0-alpha1

View File

@@ -1,3 +1,4 @@
#!/bin/bash
#
# Copyright (c) 2020 Intel Corporation
#

View File

@@ -41,7 +41,8 @@ cflags="-O2"
# gperf_version=$(get_version "externals.gperf.version")
# gperf_url=$(get_version "externals.gperf.url")
gperf_version="3.1"
gperf_url="https://ftp.gnu.org/gnu/gperf"
# XXX: gnu.org currently unavailable - see https://github.com/kata-containers/kata-containers/issues/3314
gperf_url="https://www.mirrorservice.org/sites/ftp.gnu.org/gnu/gperf"
gperf_tarball="gperf-${gperf_version}.tar.gz"
gperf_tarball_url="${gperf_url}/${gperf_tarball}"

View File

@@ -86,21 +86,6 @@ All other configurations are supported and are working properly.
## Networking
### Docker swarm and compose support
The newest version of Docker supported is specified by the
`externals.docker.version` variable in the
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
Basic Docker swarm support works. However, if you want to use custom networks
with Docker's swarm, an older version of Docker is required. This is specified
by the `externals.docker.meta.swarm-version` variable in the
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
See issue https://github.com/kata-containers/runtime/issues/175 for more information.
Docker compose normally uses custom networks, so also has the same limitations.
## Resource management
Due to the way VMs differ in their CPU and memory allocation, and sharing

View File

@@ -41,7 +41,7 @@ Documents that help to understand and contribute to Kata Containers.
### Design and Implementations
* [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
* [Kata Containers Architecture](design/architecture): Architectural overview of Kata Containers
* [Kata Containers E2E Flow](design/end-to-end-flow.md): The entire end-to-end flow of Kata Containers
* [Kata Containers design](./design/README.md): More Kata Containers design documents
* [Kata Containers threat model](./threat-model/threat-model.md): Kata Containers threat model
@@ -52,6 +52,18 @@ Documents that help to understand and contribute to Kata Containers.
* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md)
* [Code of Conduct](../CODE_OF_CONDUCT.md)
## Help Writing a Code PR
* [Code PR advice](code-pr-advice.md).
## Help Writing Unit Tests
* [Unit Test Advice](Unit-Test-Advice.md)
## Help Improving the Documents
* [Documentation Requirements](Documentation-Requirements.md)
### Code Licensing
* [Licensing](Licensing-strategy.md): About the licensing strategy of Kata Containers.
@@ -61,10 +73,6 @@ Documents that help to understand and contribute to Kata Containers.
* [Release strategy](Stable-Branch-Strategy.md)
* [Release Process](Release-Process.md)
## Help Improving the Documents
* [Documentation Requirements](Documentation-Requirements.md)
## Website Changes
If you have a suggestion for how we can improve the

View File

@@ -120,7 +120,7 @@ stable and main. While this is not in place currently, it should be considered i
### Patch releases
Releases are made every three weeks, which include a GitHub release as
Releases are made every four weeks, which include a GitHub release as
well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
for the next `MAJOR` or `MINOR` is created from main. If there are no changes across all the repositories, no
release is created and an announcement is made on the developer mailing list to highlight this.
@@ -136,8 +136,7 @@ The process followed for making a release can be found at [Release Process](Rele
### Frequency
Minor releases are less frequent in order to provide a more stable baseline for users. They are currently
running on a twelve week cadence. As the Kata Containers code base has reached a certain level of
maturity, we have increased the cadence from six weeks to twelve weeks. The release schedule can be seen on the
running on a sixteen weeks cadence. The release schedule can be seen on the
[release rotation wiki page](https://github.com/kata-containers/community/wiki/Release-Team-Rota).
### Compatibility

379
docs/Unit-Test-Advice.md Normal file
View File

@@ -0,0 +1,379 @@
# Unit Test Advice
## Overview
This document offers advice on writing a Unit Test (UT) in
[Golang](https://golang.org) and [Rust](https://www.rust-lang.org).
## General advice
### Unit test strategies
#### Positive and negative tests
Always add positive tests (where success is expected) *and* negative
tests (where failure is expected).
#### Boundary condition tests
Try to add unit tests that exercise boundary conditions such as:
- Missing values (`null` or `None`).
- Empty strings and huge strings.
- Empty (or uninitialised) complex data structures
(such as lists, vectors and hash tables).
- Common numeric values (such as `-1`, `0`, `1` and the minimum and
maximum values).
#### Test unusual values
Also always consider "unusual" input values such as:
- String values containing spaces, Unicode characters, special
characters, escaped characters or null bytes.
> **Note:** Consider these unusual values in prefix, infix and
> suffix position.
- String values that cannot be converted into numeric values or which
contain invalid structured data (such as invalid JSON).
#### Other types of tests
If the code requires other forms of testing (such as stress testing,
fuzz testing and integration testing), raise a GitHub issue and
reference it on the issue you are using for the main work. This
ensures the test team are aware that a new test is required.
### Test environment
#### Create unique files and directories
Ensure your tests do not write to a fixed file or directory. This can
cause problems when running multiple tests simultaneously and also
when running tests after a previous test run failure.
#### Assume parallel testing
Always assume your tests will be run *in parallel*. If this is
problematic for a test, force it to run in isolation using the
`serial_test` crate for Rust code for example.
### Running
Ensure you run the unit tests and they all pass before raising a PR.
Ideally do this on different distributions on different architectures
to maximise coverage (and so minimise surprises when your code runs in
the CI).
## Assertions
### Golang assertions
Use the `testify` assertions package to create a new assertion object as this
keeps the test code free from distracting `if` tests:
```go
func TestSomething(t *testing.T) {
assert := assert.New(t)
err := doSomething()
assert.NoError(err)
}
```
### Rust assertions
Use the standard set of `assert!()` macros.
## Table driven tests
Try to write tests using a table-based approach. This allows you to distill
the logic into a compact table (rather than spreading the tests across
multiple test functions). It also makes it easy to cover all the
interesting boundary conditions:
### Golang table driven tests
Assume the following function:
```go
// The function under test.
//
// Accepts a string and an integer and returns the
// result of sticking them together separated by a dash as a string.
func joinParamsWithDash(str string, num int) (string, error) {
if str == "" {
return "", errors.New("string cannot be blank")
}
if num <= 0 {
return "", errors.New("number must be positive")
}
return fmt.Sprintf("%s-%d", str, num), nil
}
```
A table driven approach to testing it:
```go
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestJoinParamsWithDash(t *testing.T) {
assert := assert.New(t)
// Type used to hold function parameters and expected results.
type testData struct {
param1 string
param2 int
expectedResult string
expectError bool
}
// List of tests to run including the expected results
data := []testData{
// Failure scenarios
{"", -1, "", true},
{"", 0, "", true},
{"", 1, "", true},
{"foo", 0, "", true},
{"foo", -1, "", true},
// Success scenarios
{"foo", 1, "foo-1", false},
{"bar", 42, "bar-42", false},
}
// Run the tests
for i, d := range data {
// Create a test-specific string that is added to each assert
// call. It will be displayed if any assert test fails.
msg := fmt.Sprintf("test[%d]: %+v", i, d)
// Call the function under test
result, err := joinParamsWithDash(d.param1, d.param2)
// update the message for more information on failure
msg = fmt.Sprintf("%s, result: %q, err: %v", msg, result, err)
if d.expectError {
assert.Error(err, msg)
// If an error is expected, there is no point
// performing additional checks.
continue
}
assert.NoError(err, msg)
assert.Equal(d.expectedResult, result, msg)
}
}
```
### Rust table driven tests
Assume the following function:
```rust
// Convenience type to allow Result return types to only specify the type
// for the true case; failures are specified as static strings.
// XXX: This is an example. In real code use the "anyhow" and
// XXX: "thiserror" crates.
pub type Result<T> = std::result::Result<T, &'static str>;
// The function under test.
//
// Accepts a string and an integer and returns the
// result of sticking them together separated by a dash as a string.
fn join_params_with_dash(str: &str, num: i32) -> Result<String> {
if str.is_empty() {
return Err("string cannot be blank");
}
if num <= 0 {
return Err("number must be positive");
}
let result = format!("{}-{}", str, num);
Ok(result)
}
```
A table driven approach to testing it:
```rust
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_join_params_with_dash() {
// This is a type used to record all details of the inputs
// and outputs of the function under test.
#[derive(Debug)]
struct TestData<'a> {
str: &'a str,
num: i32,
result: Result<String>,
}
// The tests can now be specified as a set of inputs and outputs
let tests = &[
// Failure scenarios
TestData {
str: "",
num: 0,
result: Err("string cannot be blank"),
},
TestData {
str: "foo",
num: -1,
result: Err("number must be positive"),
},
// Success scenarios
TestData {
str: "foo",
num: 42,
result: Ok("foo-42".to_string()),
},
TestData {
str: "-",
num: 1,
result: Ok("--1".to_string()),
},
];
// Run the tests
for (i, d) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{}]: {:?}", i, d);
// Call the function under test
let result = join_params_with_dash(d.str, d.num);
// Update the test details string with the results of the call
let msg = format!("{}, result: {:?}", msg, result);
// Perform the checks
if d.result.is_ok() {
assert!(result == d.result, msg);
continue;
}
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
let actual_error = format!("{}", result.unwrap_err());
assert!(actual_error == expected_error, msg);
}
}
}
```
## Temporary files
Always delete temporary files on success.
### Golang temporary files
```go
func TestSomething(t *testing.T) {
assert := assert.New(t)
// Create a temporary directory
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
// Delete it at the end of the test
defer os.RemoveAll(tmpdir)
// Add test logic that will use the tmpdir here...
}
```
### Rust temporary files
Use the `tempfile` crate which allows files and directories to be deleted
automatically:
```rust
#[cfg(test)]
mod tests {
use tempfile::tempdir;
#[test]
fn test_something() {
// Create a temporary directory (which will be deleted automatically
let dir = tempdir().expect("failed to create tmpdir");
let filename = dir.path().join("file.txt");
// create filename ...
}
}
```
## Test user
[Unit tests are run *twice*](https://github.com/kata-containers/tests/blob/main/.ci/go-test.sh):
- as the current user
- as the `root` user (if different to the current user)
When writing a test consider which user should run it; even if the code the
test is exercising runs as `root`, it may be necessary to *only* run the test
as a non-`root` for the test to be meaningful. Add appropriate skip
guards around code that requires `root` and non-`root` so that the test
will run if the correct type of user is detected and skipped if not.
### Run Golang tests as a different user
The main repository has the most comprehensive set of skip abilities. See:
- https://github.com/kata-containers/kata-containers/tree/main/src/runtime/pkg/katatestutils
### Run Rust tests as a different user
One method is to use the `nix` crate along with some custom macros:
```
#[cfg(test)]
mod tests {
#[allow(unused_macros)]
macro_rules! skip_if_root {
() => {
if nix::unistd::Uid::effective().is_root() {
println!("INFO: skipping {} which needs non-root", module_path!());
return;
}
};
}
#[allow(unused_macros)]
macro_rules! skip_if_not_root {
() => {
if !nix::unistd::Uid::effective().is_root() {
println!("INFO: skipping {} which needs root", module_path!());
return;
}
};
}
#[test]
fn test_that_must_be_run_as_root() {
// Not running as the superuser, so skip.
skip_if_not_root!();
// Run test *iff* the user running the test is root
// ...
}
}
```

View File

@@ -102,7 +102,7 @@ first
[install the latest release](#determine-latest-version).
See the
[manual installation installation documentation](install/README.md#manual-installation)
[manual installation documentation](install/README.md#manual-installation)
for details on how to automatically install and configuration a static release
with containerd.
@@ -114,7 +114,7 @@ with containerd.
> kernel or image.
If you are using custom
[guest assets](design/architecture.md#guest-assets),
[guest assets](design/architecture/README.md#guest-assets),
you must upgrade them to work with Kata Containers 2.x since Kata
Containers 1.x assets will **not** work.

247
docs/code-pr-advice.md Normal file
View File

@@ -0,0 +1,247 @@
# Code PR Advice
Before raising a PR containing code changes, we suggest you consider
the following to ensure a smooth and fast process.
> **Note:**
>
> - All the advice in this document is optional. However, if the
> advice provided is not followed, there is no guarantee your PR
> will be merged.
>
> - All the check tools will be run automatically on your PR by the CI.
> However, if you run them locally first, there is a much better
> chance of a successful initial CI run.
## Assumptions
This document assumes you have already read (and in the case of the
code of conduct agreed to):
- The [Kata Containers code of conduct](https://github.com/kata-containers/community/blob/main/CODE_OF_CONDUCT.md).
- The [Kata Containers contributing guide](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md).
## Code
### Architectures
Do not write architecture-specific code if it is possible to write the
code generically.
### General advice
- Do not write code to impress: instead write code that is easy to read and understand.
- Always consider which user will run the code. Try to minimise
the privileges the code requires.
### Comments
Always add comments if the intent of the code is not obvious. However,
try to avoid comments if the code could be made clearer (for example
by using more meaningful variable names).
### Constants
Don't embed magic numbers and strings in functions, particularly if
they are used repeatedly.
Create constants at the top of the file instead.
### Copyright and license
Ensure all new files contain a copyright statement and an SPDX license
identifier in the comments at the top of the file.
### FIXME and TODO
If the code contains areas that are not fully implemented, make this
clear a comment which provides a link to a GitHub issue that provides
further information.
Do not just rely on comments in this case though: if possible, return
a "`BUG: feature X not implemented see {bug-url}`" type error.
### Functions
- Keep functions relatively short (less than 100 lines is a good "rule of thumb").
- Document functions if the parameters, return value or general intent
of the function is not obvious.
- Always return errors where possible.
Do not discard error return values from the functions this function
calls.
### Logging
- Don't use multiple log calls when a single log call could be used.
- Use structured logging where possible to allow
[standard tooling](https://github.com/kata-containers/tests/tree/main/cmd/log-parser)
be able to extract the log fields.
### Names
Give functions, macros and variables clear and meaningful names.
### Structures
#### Golang structures
Unlike Rust, Go does not enforce that all structure members be set.
This has lead to numerous bugs in the past where code like the
following is used:
```go
type Foo struct {
Key string
Value string
}
// BUG: Key not set, but nobody noticed! ;(
let foo1 = Foo {
Value: "foo",
}
```
A much safer approach is to create a constructor function to enforce
integrity:
```go
type Foo struct {
Key string
Value string
}
func NewFoo(key, value string) (*Foo, error) {
if key == "" {
return nil, errors.New("Foo needs a key")
}
if value == "" {
return nil, errors.New("Foo needs a value")
}
return &Foo{
Key: key,
Value: value,
}, nil
}
func testFoo() error {
// BUG: Key not set, but nobody noticed! ;(
badFoo := Foo{Value: "value"}
// Ok - the constructor performs needed validation
goodFoo, err := NewFoo("name", "value")
if err != nil {
return err
}
return nil
```
> **Note:**
>
> The above is just an example. The *safest* approach would be to move
> `NewFoo()` into a separate package and make `Foo` and it's elements
> private. The compiler would then enforce the use of the constructor
> to guarantee correctly defined objects.
### Tracing
Consider if the code needs to create a new
[trace span](https://github.com/kata-containers/kata-containers/blob/main/docs/tracing.md).
Ensure any new trace spans added to the code are completed.
## Tests
### Unit tests
Where possible, code changes should be accompanied by unit tests.
Consider using the standard
[table-based approach](Unit-Test-Advice.md)
as it encourages you to make functions small and simple, and also
allows you to think about what types of value to test.
### Other categories of test
Raised a GitHub issue in the
[`tests`](https://github.com/kata-containers/tests) repository that
explains what sort of test is required along with as much detail as
possible. Ensure the original issue is referenced on the `tests` issue.
### Unsafe code
#### Rust language specifics
Minimise the use of `unsafe` blocks in Rust code and since it is
potentially dangerous always write [unit tests][#unit-tests]
for this code where possible.
`expect()` and `unwrap()` will cause the code to panic on error.
Prefer to return a `Result` on error rather than using these calls to
allow the caller to deal with the error condition.
The table below lists the small number of cases where use of
`expect()` and `unwrap()` are permitted:
| Area | Rationale for permitting |
|-|-|
| In test code (the `tests` module) | Panics will cause the test to fail, which is desirable. |
| `lazy_static!()` | This magic macro cannot "return" a value as it runs before `main()`. |
| `defer!()` | Similar to golang's `defer()` but doesn't allow the use of `?`. |
| `tokio::spawn(async move {})` | Cannot currently return a `Result` from an `async move` closure. |
| If an explicit test is performed before the `unwrap()` / `expect()` | *"Just about acceptable"*, but not ideal `[*]` |
| `Mutex.lock()` | Almost unrecoverable if failed in the lock acquisition |
`[*]` - There can lead to bad *future* code: consider what would
happen if the explicit test gets dropped in the future. This is easier
to happen if the test and the extraction of the value are two separate
operations. In summary, this strategy can introduce an insidious
maintenance issue.
## Documentation
### General requirements
- All new features should be accompanied by documentation explaining:
- What the new feature does
- Why it is useful
- How to use the feature
- Any known issues or limitations
Links should be provided to GitHub issues tracking the issues
- The [documentation requirements document](Documentation-Requirements.md)
explains how the project formats documentation.
### Markdown syntax
Run the
[markdown checker](https://github.com/kata-containers/tests/tree/main/cmd/check-markdown)
on your documentation changes.
### Spell check
Run the
[spell checker](https://github.com/kata-containers/tests/tree/main/cmd/check-spelling)
on your documentation changes.
## Finally
You may wish to read the documentation that the
[Kata Review Team](https://github.com/kata-containers/community/blob/main/Rota-Process.md) use to help review PRs:
- [PR review guide](https://github.com/kata-containers/community/blob/main/PR-Review-Guide.md).
- [documentation review process](https://github.com/kata-containers/community/blob/main/Documentation-Review-Process.md).

View File

@@ -2,7 +2,7 @@
Kata Containers design documents:
- [Kata Containers architecture](architecture.md)
- [Kata Containers architecture](architecture)
- [API Design of Kata Containers](kata-api-design.md)
- [Design requirements for Kata Containers](kata-design-requirements.md)
- [VSocks](VSocks.md)

View File

@@ -1 +1 @@
<mxfile host="Chrome" modified="2020-07-02T06:44:28.736Z" agent="5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" etag="r7FpfnbGNK7jbg54Gu9x" version="13.3.5" type="device"><diagram id="XNV8G0dePIPkhS_Khqr4" name="Page-1">7VvZcuI4FP0aHqFky+sjkNCTqfR0qtLV6fTLlMDy0hiLscWWrx8Zy3iRQkhjQxbygnVlhK1zz9HRkg4cztZfYjT3vxIHhx0VOOsOvOqoKrRthX2kkU0WMTWYBbw4cLIQKAL3wRPOgkoeXQQOTngsC1FCQhrMq8EJiSI8oZUYimOyqt7mktCpBObIw0LgfoJCMfoQONTPoiqEdlHxFw48n/80hIA/+Qzld/NA4iOHrEoheN2Bw5gQml3N1kMcpr1X7ZjRM7W7J4txRA/5wrd/v5rDewTubvrjyZDYg1l/01W0rJklChf8lfnT0k3eBzFZRA5OW1E6cLDyA4rv52iS1q4Y6izm01nIq10SUQ4jwxAOvBg5AXvCIQlJvG0PmhgZOK1zgzAsxR2ELXfC4gmNyRSXaoyJhccuqxHfmXfDEscUr0sh3gdfMJlhGm/YLby2q+i6nn2J56QOzKy8KhDWctT8Eri7rEQ8q7xd60W/swve9a+BQW8PBlWTw+C6jm0YIgyu66oTKQyOMTZ0oykYNLsGQ/7OJRgYnUQYFENvCYYWUXgvZFDss5PBuHBBVc7OBQUKvY4dNjbyIompTzwSofC6iA4KXNKULu65JWTO0fiNKd1wONCCkipWeB3Qn6Xrx7Spns5LV2ve8raw4YUyy7R9iCRkEU/4u3i3328se/zw+97wp99Wf4fTh2I0pCj2MN3TOZwjaYfsBTjGIaLBsmomGodKhQJjKI3nEymAt2jMPFql01EYeBG7nrAOwyy/B2nqBswE9XnFLHCcDF+cBE9ovG0v7fo5CSK6fR190NGvDgJjb7YJpNlZO/6rFfMkJRPoKbahVUUtKx2MBm/8Ln27Ustqmonldrt2tQ3iugnLmzqeu4c8COK9mVkRRSNkXTpwgmUFZeO/RWopt0h0ky0UfXaDos3XWzzyenblpZ+sfykKIhw73cQPZt0poqi73DXPHnf7C9nNzY2Hmqi2yhgpWJWpLQDGdX/EW6jqM/trSoUts6bCUBwLFdPMk6Csw0YDg6EcePMV3H6D4szwiDc/y4XSt9Ji8bVtSSbq5nGibouivpdjL6o6TxjQA5ZuVjMGHKc0jQqJfKwQzdQHzpwj7YAkc+Sj17nswN7HLklGofHNCbglCrjhWKahyQQc9nUN5i20JeBMnMHLAi6bzLQm35r+megGjqKbeqhQw0OF+jR8U0W+3cVp2z5eJOmL45gl8ccmnm1XiGdIltQUS2uHePJx7py8K7j2WKbaC7wrqPaYt3eSYQ5KZr1yMTsb76QQi1Min9K5FPe3OelVnyHaq+e8oMfYVWXgkVPefIKrKL3aAlN71lRcxXgWz5PxWP2YRIYHEnmXXxBa1fSyj8uvRrMJ/XBvb7q/6A348c9DF/34nvjgzANA61lzgizJMX4jNguKer9dqpqRKKCkXX917pUpW1drS48yh6Xqp1yZ0kS9Tshk2hwOsl0xCwATynDo6wBooG0cLFibYFoiCjIQYGs2VxH6+43OL/9omNu32vLyqozxpuyqKurXe9uleZYyf+BYoa6rx3mInJWGUuFkVwOn86yKgOllW6Zp0TVr2zKaRHRPvS2jiWT+8IOfqcBeDQodqucd/8TdMeSl7/iRzaCm1bYpVREEWwZCW0dFLAGEnXilCtcsGJLTO7bpANOUEEbHliNdFbXUMczO+1SBGo0AGI2aAgqqdaC0XKTK0qWdkjB79oZYWJw0f1qsDMkgc1Kk8vN15fWwzRzHyyCRzHbZi9IqGNWOjEiEa73OQ4f7Shn61blE/aidT+LgKc2vsPPCssWrzizWBVB2ZlF2ZLE1qEQb6C1wkprUKY6j9Ez8u4CrmeEJVNGBsk1Y46TwiCdKP59L0HOhOpdLkJxkutgE2dCjw/PbBGW/p7v4hB1Y5tl9gmjpLj5B6hNka+Yn9QmqaOkuPmGHjtaeT2DF4h/tsrW/4v8V4fX/</diagram></mxfile>
<mxfile host="app.diagrams.net" modified="2021-11-05T13:07:32.992Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36" etag="j5e7J3AOXxeQrt-Zz2uw" version="15.6.8" type="device"><diagram id="XNV8G0dePIPkhS_Khqr4" name="Page-1">7Vxdd9o4EP01nLP7QI5s+fORUNhNT7rNbnqaZl/2CCywG2OxQhDIr18Z29iyZD6CDZRuHho8toQ9986dGVlNC3Yny98omvqfiIfDlg68ZQt+aOm6AUyb/4otq8TiGnpiGNPAS0wgNzwGbzgxapl1Hnh4ltoSEyMkZMFUNA5JFOEhE2yIUvIqXjYioScYpmiMJcPjEIWy9SnwmJ9YdQjd/MTvOBj76VdDCNI7n6Ds6tQw85FHXgsm2GvBLiWEJZ8myy4OY++JjulXnN3cGcUR22fAn3fPzx+jj7e9HrIXA330feIZ7czPCxTO00dO75atMh+MKZlP08swZXip8jwaZJcD+ca0zeNyomAywYyu+CXpRG3NNM1kUMoSXU+PX3OfG9nEfsHdG56gFOfxZvbcE/xD6gy1Yz7/88nuPiLwcNcZDLvEvZ10Vm1zt1+4WyIPx5NoLXj76gcMP07RMD77yqOB23w2CdPTIxKxlN78nuHtmCIv4A7qkpDQ9XzQxsjC8blREIYFu4ewMxpy+4xR8oILZ6yhgwcjfkZ2+Xa0yzDK2JzP81ZzntcNtedHI8+1LNnzo9FIHyo971kDy7Qa8Xx61gJCSGhyRHCtkXHRLLMhXGwJF659/KFrCwtHBkAbIA3rKgAAsHqdfjqDCBn/aRIYTRORUWiVa8rAwKZwcSRcuCQzFESYcrNWLz6K4HFtD9i2QrZM7HiGCjtHH8Ak3ETs+n0A+v0msdNhCTv7RkZPM1TwNSV37lb4asw6VwifDc4MnqJ88ldTTBfBjHulDB1/SibiI/o2IhEuAZGaUBiMI3445B7lvIC3sc8CXqZ20hOTwPPir1ESIqcMUORDn9DgLaZcmF7QnHKWUhqQ4TNUpUZj6GkSeuM5nvGUBl4wjeJW5odAsDnAzBJihiLgrIYgUz6DrJYSRqdvVwzblol82qJZM3Y75sfrV9wKGC+pXdEa7BTP16/s7/lLbVc0uY+8hn7lYGCkdgVKyJy0XdHkPvJn6VcOxu4C2xVte7t5zf3K0fCdv12Ry6efpl05XDgvrFvR5V7zmruVw/E6Z7OiDjcoQYK9MX5MDwllPhmTCIW93FpyXn7NPSHTFMXvmLFV6lI0Z0TEGC8D9q3w+TmeiueN5OjDMp15fbCSMdJijDg0dPUtuzI+KMwSH+bTrI+yeWRss3xO5nSYsfb+y53jDp6+P1r+y+fXj+HLU97AMETHmG1xalo+xI7cygqKQ8SCBRZuQwXxemiHUrQqXDAlQcRmhZkfYoPQBNqOmJstu0gYxQjD1ksjnBLFkrvICbd5nCNUA0qqwRidDpXMvEcDLiMCm/ZXAopnwVvaV8dcSF3IJzdvW+YHBctktmwNo727+erWHdxormWIMpEcHUYXGV0osmFz09kUZDSaYSZJymEIqyNHLqirEb5A7Xmv1hTZZB+nPa6sPVtFaqf45HyDBrRFvtnHEa5WQqklQy7ir5g6aiE6hjpqEbuQtOUCUf52p63yCFOzS6w7Lm1t9WtB1LqFNrMXjfmnlm6FcYE74CZrHH/6ZdOLeq04F/T5v92/7tqff5UoXeeyj+U5tqXsPWHHNGA2w37LPtlLib0L37auOa4IKpQXpDXHkktfq4bSV4lf1tb+HBpyXOmr75t+4L7pZ28ROQpjXY7RBxrP7eP5LH5wTBdYXla4rsATyz4LqALPaCbw1Mn7nHGXx9pzMdR2xF0eas/ZfCfJ3VDRcqrFrPa4e2fytk1bSbfq5F0eYTpmrclbyUH5XaTP2FRJzMvsOPUKJTi44QQ3Um6uqd/MXm9l/aZuiFM01x7ICwqV6J5MdqCY8QGwTqI98aQPmAbcpTFTj1wC21+P4J56tGGhCQEUK8QjaVhNs8NVzbHFezNAaSP7TlUrjTha1dDX0f1d+292B77+8dRGX7/MfHCmzPpOplaycPcah9tIslM1lpq4jWZTPGWTJBGTjjuGYVXftKXpLY0wHbh9hA5ca9uIZtpkKKfaF8RQe0KigCle6V3sXofDa2/NNfWbCgIVqm/dVLxgba76lrcvXGjb+64yetvK1u4VMKtuYTkOKjl0frQXI5VR8446FZqmXlNlCsqvQkqyXktpqnxnvMdWvOZ3hzqGmAgMR7EmYG928hR1yW5s05XCMcnaqRcsBAdZ/87j/5C4pmR7tuZkh1+gGdPlmpjZ+WzFNV9wbc/8YNJep5/FZnp+t+tvSC6uLx8ZDeejrfQ6aDtqBdTNrbzKujYjw5f6XK/a8esAwIt4hes7JgAGOKXrs/2o2o1e2qUtb51T1QZ1bL5SPoL8mvYCxEn5pkDNWKcpxir3rv+vToeGiF1BnMtSJ3n16ArUaX/XV6qTKW9Wq0md+GH+RwaSSiv/Ww2w9x8=</diagram></mxfile>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

After

Width:  |  Height:  |  Size: 90 KiB

View File

@@ -1,290 +0,0 @@
# Kata Containers Architecture
## Overview
This is an architectural overview of Kata Containers, based on the 2.0 release.
The primary deliverable of the Kata Containers project is a CRI friendly shim. There is also a CRI friendly library API behind them.
The [Kata Containers runtime](../../src/runtime)
is compatible with the [OCI](https://github.com/opencontainers) [runtime specification](https://github.com/opencontainers/runtime-spec)
and therefore works seamlessly with the [Kubernetes\* Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-node/container-runtime-interface.md)
through the [CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and
[Containerd\*](https://github.com/containerd/containerd) implementation.
Kata Containers creates a QEMU\*/KVM virtual machine for pod that `kubelet` (Kubernetes) creates respectively.
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/cmd/containerd-shim-kata-v2/)
is the Kata Containers entrypoint, which
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
Before `shimv2` (as done in [Kata Containers 1.x releases](https://github.com/kata-containers/runtime/releases)), we need to create a `containerd-shim` and a [`kata-shim`](https://github.com/kata-containers/shim) for each container and the Pod sandbox itself, plus an optional [`kata-proxy`](https://github.com/kata-containers/proxy) when VSOCK is not available. With `shimv2`, Kubernetes can launch Pod and OCI compatible containers with one shim (the `shimv2`) per Pod instead of `2N+1` shims, and no standalone `kata-proxy` process even if no VSOCK is available.
![Kubernetes integration with shimv2](arch-images/shimv2.svg)
The container process is then spawned by
[`kata-agent`](../../src/agent), an agent process running
as a daemon inside the virtual machine. `kata-agent` runs a [`ttRPC`](https://github.com/containerd/ttrpc-rust) server in
the guest using a VIRTIO serial or VSOCK interface which QEMU exposes as a socket
file on the host. `shimv2` uses a `ttRPC` protocol to communicate with
the agent. This protocol allows the runtime to send container management
commands to the agent. The protocol is also used to carry the I/O streams (stdout,
stderr, stdin) between the containers and the manage engines (e.g. CRI-O or containerd).
For any given container, both the init process and all potentially executed
commands within that container, together with their related I/O streams, need
to go through the VSOCK interface exported by QEMU.
The container workload, that is, the actual OCI bundle rootfs, is exported from the
host to the virtual machine. In the case where a block-based graph driver is
configured, `virtio-scsi` will be used. In all other cases a `virtio-fs` VIRTIO mount point
will be used. `kata-agent` uses this mount point as the root filesystem for the
container processes.
## Virtualization
How Kata Containers maps container concepts to virtual machine technologies, and how this is realized in the multiple
hypervisors and VMMs that Kata supports is described within the [virtualization documentation](./virtualization.md)
## Guest assets
The hypervisor will launch a virtual machine which includes a minimal guest kernel
and a guest image.
### Guest kernel
The guest kernel is passed to the hypervisor and used to boot the virtual
machine. The default kernel provided in Kata Containers is highly optimized for
kernel boot time and minimal memory footprint, providing only those services
required by a container workload. This is based on a very current upstream Linux
kernel.
### Guest image
Kata Containers supports both an `initrd` and `rootfs` based minimal guest image.
#### Root filesystem image
The default packaged root filesystem image, sometimes referred to as the "mini O/S", is a
highly optimized container bootstrap system based on [Clear Linux](https://clearlinux.org/). It provides an extremely minimal environment and
has a highly optimized boot path.
The only services running in the context of the mini O/S are the init daemon
(`systemd`) and the [Agent](#agent). The real workload the user wishes to run
is created using libcontainer, creating a container in the same manner that is done
by `runc`.
For example, when `ctr run -ti ubuntu date` is run:
- The hypervisor will boot the mini-OS image using the guest kernel.
- `systemd`, running inside the mini-OS context, will launch the `kata-agent` in
the same context.
- The agent will create a new confined context to run the specified command in
(`date` in this example).
- The agent will then execute the command (`date` in this example) inside this
new context, first setting the root filesystem to the expected Ubuntu\* root
filesystem.
#### Initrd image
A compressed `cpio(1)` archive, created from a rootfs which is loaded into memory and used as part of the Linux startup process. During startup, the kernel unpacks it into a special instance of a `tmpfs` that becomes the initial root filesystem.
The only service running in the context of the initrd is the [Agent](#agent) as the init daemon. The real workload the user wishes to run is created using libcontainer, creating a container in the same manner that is done by `runc`.
## Agent
[`kata-agent`](../../src/agent) is a process running in the guest as a supervisor for managing containers and processes running within those containers.
For the 2.0 release, the `kata-agent` is rewritten in the [RUST programming language](https://www.rust-lang.org/) so that we can minimize its memory footprint while keeping the memory safety of the original GO version of [`kata-agent` used in Kata Container 1.x](https://github.com/kata-containers/agent). This memory footprint reduction is pretty impressive, from tens of megabytes down to less than 100 kilobytes, enabling Kata Containers in more use cases like functional computing and edge computing.
The `kata-agent` execution unit is the sandbox. A `kata-agent` sandbox is a container sandbox defined by a set of namespaces (NS, UTS, IPC and PID). `shimv2` can
run several containers per VM to support container engines that require multiple
containers running inside a pod.
`kata-agent` communicates with the other Kata components over `ttRPC`.
## Runtime
`containerd-shim-kata-v2` is a [containerd runtime shimv2](https://github.com/containerd/containerd/blob/v1.4.1/runtime/v2/README.md) implementation and is responsible for handling the `runtime v2 shim APIs`, which is similar to [the OCI runtime specification](https://github.com/opencontainers/runtime-spec) but simplifies the architecture by loading the runtime once and making RPC calls to handle the various container lifecycle commands. This refinement is an improvement on the OCI specification which requires the container manager call the runtime binary multiple times, at least once for each lifecycle command.
`containerd-shim-kata-v2` heavily utilizes the
[virtcontainers package](../../src/runtime/virtcontainers/), which provides a generic, runtime-specification agnostic, hardware-virtualized containers library.
### Configuration
The runtime uses a TOML format configuration file called `configuration.toml`. By default this file is installed in the `/usr/share/defaults/kata-containers` directory and contains various settings such as the paths to the hypervisor, the guest kernel and the mini-OS image.
The actual configuration file paths can be determined by running:
```
$ kata-runtime --show-default-config-paths
```
Most users will not need to modify the configuration file.
The file is well commented and provides a few "knobs" that can be used to modify the behavior of the runtime and your chosen hypervisor.
The configuration file is also used to enable runtime [debug output](../Developer-Guide.md#enable-full-debug).
## Networking
Containers will typically live in their own, possibly shared, networking namespace.
At some point in a container lifecycle, container engines will set up that namespace
to add the container to a network which is isolated from the host network, but
which is shared between containers
In order to do so, container engines will usually add one end of a virtual
ethernet (`veth`) pair into the container networking namespace. The other end of
the `veth` pair is added to the host networking namespace.
This is a very namespace-centric approach as many hypervisors/VMMs cannot handle `veth`
interfaces. Typically, `TAP` interfaces are created for VM connectivity.
To overcome incompatibility between typical container engines expectations
and virtual machines, Kata Containers networking transparently connects `veth`
interfaces with `TAP` ones using Traffic Control:
![Kata Containers networking](arch-images/network.png)
With a TC filter in place, a redirection is created between the container network and the
virtual machine. As an example, the CNI may create a device, `eth0`, in the container's network
namespace, which is a VETH device. Kata Containers will create a tap device for the VM, `tap0_kata`,
and setup a TC redirection filter to mirror traffic from `eth0`'s ingress to `tap0_kata`'s egress,
and a second to mirror traffic from `tap0_kata`'s ingress to `eth0`'s egress.
Kata Containers maintains support for MACVTAP, which was an earlier implementation used in Kata. TC-filter
is the default because it allows for simpler configuration, better CNI plugin compatibility, and performance
on par with MACVTAP.
Kata Containers has deprecated support for bridge due to lacking performance relative to TC-filter and MACVTAP.
Kata Containers supports both
[CNM](https://github.com/docker/libnetwork/blob/master/docs/design.md#the-container-network-model)
and [CNI](https://github.com/containernetworking/cni) for networking management.
### Network Hotplug
Kata Containers has developed a set of network sub-commands and APIs to add, list and
remove a guest network endpoint and to manipulate the guest route table.
The following diagram illustrates the Kata Containers network hotplug workflow.
![Network Hotplug](arch-images/kata-containers-network-hotplug.png)
## Storage
Container workloads are shared with the virtualized environment through [virtio-fs](https://virtio-fs.gitlab.io/).
The [devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/master/snapshots/devmapper) is a special case. The `snapshotter` uses dedicated block devices rather than formatted filesystems, and operates at the block level rather than the file level. This knowledge is used to directly use the underlying block device instead of the overlay file system for the container root file system. The block device maps to the top read-write layer for the overlay. This approach gives much better I/O performance compared to using `virtio-fs` to share the container file system.
Kata Containers has the ability to hotplug and remove block devices, which makes it possible to use block devices for containers started after the VM has been launched.
Users can check to see if the container uses the devicemapper block device as its rootfs by calling `mount(8)` within the container. If the devicemapper block device
is used, `/` will be mounted on `/dev/vda`. Users can disable direct mounting of the underlying block device through the runtime configuration.
## Kubernetes support
[Kubernetes\*](https://github.com/kubernetes/kubernetes/) is a popular open source
container orchestration engine. In Kubernetes, a set of containers sharing resources
such as networking, storage, mount, PID, etc. is called a
[Pod](https://kubernetes.io/docs/user-guide/pods/).
A node can have multiple pods, but at a minimum, a node within a Kubernetes cluster
only needs to run a container runtime and a container agent (called a
[Kubelet](https://kubernetes.io/docs/admin/kubelet/)).
A Kubernetes cluster runs a control plane where a scheduler (typically running on a
dedicated master node) calls into a compute Kubelet. This Kubelet instance is
responsible for managing the lifecycle of pods within the nodes and eventually relies
on a container runtime to handle execution. The Kubelet architecture decouples
lifecycle management from container execution through the dedicated
`gRPC` based [Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/container-runtime-interface-v1.md).
In other words, a Kubelet is a CRI client and expects a CRI implementation to
handle the server side of the interface.
[CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and [Containerd\*](https://github.com/containerd/containerd/) are CRI implementations that rely on [OCI](https://github.com/opencontainers/runtime-spec)
compatible runtimes for managing container instances.
Kata Containers is an officially supported CRI-O and Containerd runtime. Refer to the following guides on how to set up Kata Containers with Kubernetes:
- [How to use Kata Containers and Containerd](../how-to/containerd-kata.md)
- [Run Kata Containers with Kubernetes](../how-to/run-kata-with-k8s.md)
#### OCI annotations
In order for the Kata Containers runtime (or any virtual machine based OCI compatible
runtime) to be able to understand if it needs to create a full virtual machine or if it
has to create a new container inside an existing pod's virtual machine, CRI-O adds
specific annotations to the OCI configuration file (`config.json`) which is passed to
the OCI compatible runtime.
Before calling its runtime, CRI-O will always add a `io.kubernetes.cri-o.ContainerType`
annotation to the `config.json` configuration file it produces from the Kubelet CRI
request. The `io.kubernetes.cri-o.ContainerType` annotation can either be set to `sandbox`
or `container`. Kata Containers will then use this annotation to decide if it needs to
respectively create a virtual machine or a container inside a virtual machine associated
with a Kubernetes pod:
```Go
containerType, err := ociSpec.ContainerType()
if err != nil {
return err
}
handleFactory(ctx, runtimeConfig)
disableOutput := noNeedForOutput(detach, ociSpec.Process.Terminal)
var process vc.Process
switch containerType {
case vc.PodSandbox:
process, err = createSandbox(ctx, ociSpec, runtimeConfig, containerID, bundlePath, console, disableOutput, systemdCgroup)
if err != nil {
return err
}
case vc.PodContainer:
process, err = createContainer(ctx, ociSpec, containerID, bundlePath, console, disableOutput)
if err != nil {
return err
}
}
```
#### Mixing VM based and namespace based runtimes
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/)
> has been supported and the user can specify runtime without the non-standardized annotations.
With `RuntimeClass`, users can define Kata Containers as a `RuntimeClass` and then explicitly specify that a pod being created as a Kata Containers pod. For details, please refer to [How to use Kata Containers and Containerd](../../docs/how-to/containerd-kata.md).
# Appendices
## DAX
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.rst?h=v5.14)
feature to efficiently map some host-side files into the guest VM space.
In particular, Kata Containers uses the QEMU NVDIMM feature to provide a
memory-mapped virtual device that can be used to DAX map the virtual machine's
root filesystem into the guest memory address space.
Mapping files using DAX provides a number of benefits over more traditional VM
file and device mapping mechanisms:
- Mapping as a direct access devices allows the guest to directly access
the host memory pages (such as via Execute In Place (XIP)), bypassing the guest
page cache. This provides both time and space optimizations.
- Mapping as a direct access device inside the VM allows pages from the
host to be demand loaded using page faults, rather than having to make requests
via a virtualized device (causing expensive VM exits/hypercalls), thus providing
a speed optimization.
- Utilizing `MAP_SHARED` shared memory on the host allows the host to efficiently
share pages.
Kata Containers uses the following steps to set up the DAX mappings:
1. QEMU is configured with an NVDIMM memory device, with a memory file
backend to map in the host-side file into the virtual NVDIMM space.
2. The guest kernel command line mounts this NVDIMM device with the DAX
feature enabled, allowing direct page mapping and access, thus bypassing the
guest page cache.
![DAX](arch-images/DAX.png)
Information on the use of NVDIMM via QEMU is available in the [QEMU source code](http://git.qemu-project.org/?p=qemu.git;a=blob;f=docs/nvdimm.txt;hb=HEAD)

View File

@@ -0,0 +1,477 @@
# Kata Containers Architecture
## Overview
Kata Containers is an open source community working to build a secure
container [runtime](#runtime) with lightweight virtual machines (VM's)
that feel and perform like standard Linux containers, but provide
stronger [workload](#workload) isolation using hardware
[virtualization](#virtualization) technology as a second layer of
defence.
Kata Containers runs on [multiple architectures](../../../src/runtime/README.md#platform-support)
and supports [multiple hypervisors](../../hypervisors.md).
This document is a summary of the Kata Containers architecture.
## Background knowledge
This document assumes the reader understands a number of concepts
related to containers and file systems. The
[background](background.md) document explains these concepts.
## Example command
This document makes use of a particular [example
command](example-command.md) throughout the text to illustrate certain
concepts.
## Virtualization
For details on how Kata Containers maps container concepts to VM
technologies, and how this is realized in the multiple hypervisors and
VMMs that Kata supports see the
[virtualization documentation](../virtualization.md).
## Compatibility
The [Kata Containers runtime](../../../src/runtime) is compatible with
the [OCI](https://github.com/opencontainers)
[runtime specification](https://github.com/opencontainers/runtime-spec)
and therefore works seamlessly with the
[Kubernetes Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-node/container-runtime-interface.md)
through the [CRI-O](https://github.com/kubernetes-incubator/cri-o)
and [containerd](https://github.com/containerd/containerd)
implementations.
Kata Containers provides a ["shimv2"](#shim-v2-architecture) compatible runtime.
## Shim v2 architecture
The Kata Containers runtime is shim v2 ("shimv2") compatible. This
section explains what this means.
> **Note:**
>
> For a comparison with the Kata 1.x architecture, see
> [the architectural history document](history.md).
The
[containerd runtime shimv2 architecture](https://github.com/containerd/containerd/tree/main/runtime/v2)
or _shim API_ architecture resolves the issues with the old
architecture by defining a set of shimv2 APIs that a compatible
runtime implementation must supply. Rather than calling the runtime
binary multiple times for each new container, the shimv2 architecture
runs a single instance of the runtime binary (for any number of
containers). This improves performance and resolves the state handling
issue.
The shimv2 API is similar to the
[OCI runtime](https://github.com/opencontainers/runtime-spec)
API in terms of the way the container lifecycle is split into
different verbs. Rather than calling the runtime multiple times, the
container manager creates a socket and passes it to the shimv2
runtime. The socket is a bi-directional communication channel that
uses a gRPC based protocol to allow the container manager to send API
calls to the runtime, which returns the result to the container
manager using the same channel.
The shimv2 architecture allows running several containers per VM to
support container engines that require multiple containers running
inside a pod.
With the new architecture [Kubernetes](kubernetes.md) can
launch both Pod and OCI compatible containers with a single
[runtime](#runtime) shim per Pod, rather than `2N+1` shims. No stand
alone `kata-proxy` process is required, even if VSOCK is not
available.
## Workload
The workload is the command the user requested to run in the
container and is specified in the [OCI bundle](background.md#oci-bundle)'s
configuration file.
In our [example](example-command.md), the workload is the `sh(1)` command.
### Workload root filesystem
For details of how the [runtime](#runtime) makes the
[container image](background.md#container-image) chosen by the user available to
the workload process, see the
[Container creation](#container-creation) and [storage](#storage) sections.
Note that the workload is isolated from the [guest VM](#environments) environment by its
surrounding [container environment](#environments). The guest VM
environment where the container runs in is also isolated from the _outer_
[host environment](#environments) where the container manager runs.
## System overview
### Environments
The following terminology is used to describe the different or
environments (or contexts) various processes run in. It is necessary
to study this table closely to make sense of what follows:
| Type | Name | Virtualized | Containerized | rootfs | Rootfs device type | Mount type | Description |
|-|-|-|-|-|-|-|-|
| Host | Host | no `[1]` | no | Host specific | Host specific | Host specific | The environment provided by a standard, physical non virtualized system. |
| VM root | Guest VM | yes | no | rootfs inside the [guest image](guest-assets.md#guest-image) | Hypervisor specific `[2]` | `ext4` | The first (or top) level VM environment created on a host system. |
| VM container root | Container | yes | yes | rootfs type requested by user ([`ubuntu` in the example](example-command.md)) | `kataShared` | [virtio FS](storage.md#virtio-fs) | The first (or top) level container environment created inside the VM. Based on the [OCI bundle](background.md#oci-bundle). |
**Key:**
- `[1]`: For simplicity, this document assumes the host environment
runs on physical hardware.
- `[2]`: See the [DAX](#dax) section.
> **Notes:**
>
> - The word "root" is used to mean _top level_ here in a similar
> manner to the term [rootfs](background.md#root-filesystem).
>
> - The term "first level" prefix used above is important since it implies
> that it is possible to create multi level systems. However, they do
> not form part of a standard Kata Containers environment so will not
> be considered in this document.
The reasons for containerizing the [workload](#workload) inside the VM
are:
- Isolates the workload entirely from the VM environment.
- Provides better isolation between containers in a [pod](kubernetes.md).
- Allows the workload to be managed and monitored through its cgroup
confinement.
### Container creation
The steps below show at a high level how a Kata Containers container is
created using the containerd container manager:
1. The user requests the creation of a container by running a command
like the [example command](example-command.md).
1. The container manager daemon runs a single instance of the Kata
[runtime](#runtime).
1. The Kata runtime loads its [configuration file](#configuration).
1. The container manager calls a set of shimv2 API functions on the runtime.
1. The Kata runtime launches the configured [hypervisor](#hypervisor).
1. The hypervisor creates and starts (_boots_) a VM using the
[guest assets](guest-assets.md#guest-assets):
- The hypervisor [DAX](#dax) shares the
[guest image](guest-assets.md#guest-image)
into the VM to become the VM [rootfs](background.md#root-filesystem) (mounted on a `/dev/pmem*` device),
which is known as the [VM root environment](#environments).
- The hypervisor mounts the [OCI bundle](background.md#oci-bundle), using [virtio FS](storage.md#virtio-fs),
into a container specific directory inside the VM's rootfs.
This container specific directory will become the
[container rootfs](#environments), known as the
[container environment](#environments).
1. The [agent](#agent) is started as part of the VM boot.
1. The runtime calls the agent's `CreateSandbox` API to request the
agent create a container:
1. The agent creates a [container environment](#environments)
in the container specific directory that contains the [container rootfs](#environments).
The container environment hosts the [workload](#workload) in the
[container rootfs](#environments) directory.
1. The agent spawns the workload inside the container environment.
> **Notes:**
>
> - The container environment created by the agent is equivalent to
> a container environment created by the
> [`runc`](https://github.com/opencontainers/runc) OCI runtime;
> Linux cgroups and namespaces are created inside the VM by the
> [guest kernel](guest-assets.md#guest-kernel) to isolate the
> workload from the VM environment the container is created in.
> See the [Environments](#environments) section for an
> explanation of why this is done.
>
> - See the [guest image](guest-assets.md#guest-image) section for
> details of exactly how the agent is started.
1. The container manager returns control of the container to the
user running the `ctr` command.
> **Note:**
>
> At this point, the container is running and:
>
> - The [workload](#workload) process ([`sh(1)` in the example](example-command.md))
> is running in the [container environment](#environments).
> - The user is now able to interact with the workload
> (using the [`ctr` command in the example](example-command.md)).
> - The [agent](#agent), running inside the VM is monitoring the
> [workload](#workload) process.
> - The [runtime](#runtime) is waiting for the agent's `WaitProcess` API
> call to complete.
Further details of these steps are provided in the sections below.
### Container shutdown
There are two possible ways for the container environment to be
terminated:
- When the [workload](#workload) exits.
This is the standard, or _graceful_ shutdown method.
- When the container manager forces the container to be deleted.
#### Workload exit
The [agent](#agent) will detect when the [workload](#workload) process
exits, capture its exit status (see `wait(2)`) and return that value
to the [runtime](#runtime) by specifying it as the response to the
`WaitProcess` agent API call made by the [runtime](#runtime).
The runtime then passes the value back to the container manager by the
`Wait` [shimv2 API](#shim-v2-architecture) call.
Once the workload has fully exited, the VM is no longer needed and the
runtime cleans up the environment (which includes terminating the
[hypervisor](#hypervisor) process).
> **Note:**
>
> When [agent tracing is enabled](../../tracing.md#agent-shutdown-behaviour),
> the shutdown behaviour is different.
#### Container manager requested shutdown
If the container manager requests the container be deleted, the
[runtime](#runtime) will signal the agent by sending it a
`DestroySandbox` [ttRPC API](../../../src/agent/protocols/protos/agent.proto) request.
## Guest assets
The guest assets comprise a guest image and a guest kernel that are
used by the [hypervisor](#hypervisor).
See the [guest assets](guest-assets.md) document for further
information.
## Hypervisor
The [hypervisor](../../hypervisors.md) specified in the
[configuration file](#configuration) creates a VM to host the
[agent](#agent) and the [workload](#workload) inside the
[container environment](#environments).
> **Note:**
>
> The hypervisor process runs inside an environment slightly different
> to the host environment:
>
> - It is run in a different cgroup environment to the host.
> - It is given a separate network namespace from the host.
> - If the [OCI configuration specifies a SELinux label](https://github.com/opencontainers/runtime-spec/blob/main/config.md#linux-process),
> the hypervisor process will run with that label (*not* the workload running inside the hypervisor's VM).
## Agent
The Kata Containers agent ([`kata-agent`](../../../src/agent)), written
in the [Rust programming language](https://www.rust-lang.org), is a
long running process that runs inside the VM. It acts as the
supervisor for managing the containers and the [workload](#workload)
running within those containers. Only a single agent process is run
for each VM created.
### Agent communications protocol
The agent communicates with the other Kata components (primarily the
[runtime](#runtime)) using a
[`ttRPC`](https://github.com/containerd/ttrpc-rust) based
[protocol](../../../src/agent/protocols/protos).
> **Note:**
>
> If you wish to learn more about this protocol, a practical way to do
> so is to experiment with the
> [agent control tool](#agent-control-tool) on a test system.
> This tool is for test and development purposes only and can send
> arbitrary ttRPC agent API commands to the [agent](#agent).
## Runtime
The Kata Containers runtime (the [`containerd-shim-kata-v2`](../../../src/runtime/cmd/containerd-shim-kata-v2
) binary) is a [shimv2](#shim-v2-architecture) compatible runtime.
> **Note:**
>
> The Kata Containers runtime is sometimes referred to as the Kata
> _shim_. Both terms are correct since the `containerd-shim-kata-v2`
> is a container runtime, and that runtime implements the containerd
> shim v2 API.
The runtime makes heavy use of the [`virtcontainers`
package](../../../src/runtime/virtcontainers), which provides a generic,
runtime-specification agnostic, hardware-virtualized containers
library.
The runtime is responsible for starting the [hypervisor](#hypervisor)
and it's VM, and communicating with the [agent](#agent) using a
[ttRPC based protocol](#agent-communications-protocol) over a VSOCK
socket that provides a communications link between the VM and the
host.
This protocol allows the runtime to send container management commands
to the agent. The protocol is also used to carry the standard I/O
streams (`stdout`, `stderr`, `stdin`) between the containers and
container managers (such as CRI-O or containerd).
## Utility program
The `kata-runtime` binary is a utility program that provides
administrative commands to manipulate and query a Kata Containers
installation.
> **Note:**
>
> In Kata 1.x, this program also acted as the main
> [runtime](#runtime), but this is no longer required due to the
> improved shimv2 architecture.
### exec command
The `exec` command allows an administrator or developer to enter the
[VM root environment](#environments) which is not accessible by the container
[workload](#workload).
See [the developer guide](../../Developer-Guide.md#connect-to-debug-console) for further details.
### Configuration
See the [configuration file details](../../../src/runtime/README.md#configuration).
The configuration file is also used to enable runtime [debug output](../../Developer-Guide.md#enable-full-debug).
## Process overview
The table below shows an example of the main processes running in the
different [environments](#environments) when a Kata Container is
created with containerd using our [example command](example-command.md):
| Description | Host | VM root environment | VM container environment |
|-|-|-|-|
| Container manager | `containerd` | |
| Kata Containers | [runtime](#runtime), [`virtiofsd`](storage.md#virtio-fs), [hypervisor](#hypervisor) | [agent](#agent) |
| User [workload](#workload) | | | [`ubuntu sh`](example-command.md) |
## Networking
See the [networking document](networking.md).
## Storage
See the [storage document](storage.md).
## Kubernetes support
See the [Kubernetes document](kubernetes.md).
#### OCI annotations
In order for the Kata Containers [runtime](#runtime) (or any VM based OCI compatible
runtime) to be able to understand if it needs to create a full VM or if it
has to create a new container inside an existing pod's VM, CRI-O adds
specific annotations to the OCI configuration file (`config.json`) which is passed to
the OCI compatible runtime.
Before calling its runtime, CRI-O will always add a `io.kubernetes.cri-o.ContainerType`
annotation to the `config.json` configuration file it produces from the Kubelet CRI
request. The `io.kubernetes.cri-o.ContainerType` annotation can either be set to `sandbox`
or `container`. Kata Containers will then use this annotation to decide if it needs to
respectively create a virtual machine or a container inside a virtual machine associated
with a Kubernetes pod:
| Annotation value | Kata VM created? | Kata container created? |
|-|-|-|
| `sandbox` | yes | yes (inside new VM) |
| `container`| no | yes (in existing VM) |
#### Mixing VM based and namespace based runtimes
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/)
> has been supported and the user can specify runtime without the non-standardized annotations.
With `RuntimeClass`, users can define Kata Containers as a
`RuntimeClass` and then explicitly specify that a pod must be created
as a Kata Containers pod. For details, please refer to [How to use
Kata Containers and containerd](../../../docs/how-to/containerd-kata.md).
## Tracing
The [tracing document](../../tracing.md) provides details on the tracing
architecture.
# Appendices
## DAX
Kata Containers utilizes the Linux kernel DAX
[(Direct Access filesystem)](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.rst?h=v5.14)
feature to efficiently map the [guest image](guest-assets.md#guest-image) in the
[host environment](#environments) into the
[guest VM environment](#environments) to become the VM's
[rootfs](background.md#root-filesystem).
If the [configured](#configuration) [hypervisor](#hypervisor) is set
to either QEMU or Cloud Hypervisor, DAX is used with the feature shown
in the table below:
| Hypervisor | Feature used | rootfs device type |
|-|-|-|
| Cloud Hypervisor (CH) | `dax` `FsConfig` configuration option | PMEM (emulated Persistent Memory device) |
| QEMU | NVDIMM memory device with a memory file backend | NVDIMM (emulated Non-Volatile Dual In-line Memory Module device) |
The features in the table above are equivalent in that they provide a memory-mapped
virtual device which is used to DAX map the VM's
[rootfs](background.md#root-filesystem) into the [VM guest](#environments) memory
address space.
The VM is then booted, specifying the `root=` kernel parameter to make
the [guest kernel](guest-assets.md#guest-kernel) use the appropriate emulated device
as its rootfs.
### DAX advantages
Mapping files using [DAX](#dax) provides a number of benefits over
more traditional VM file and device mapping mechanisms:
- Mapping as a direct access device allows the guest to directly
access the host memory pages (such as via Execute In Place (XIP)),
bypassing the [guest kernel](guest-assets.md#guest-kernel)'s page cache. This
zero copy provides both time and space optimizations.
- Mapping as a direct access device inside the VM allows pages from the
host to be demand loaded using page faults, rather than having to make requests
via a virtualized device (causing expensive VM exits/hypercalls), thus providing
a speed optimization.
- Utilizing `mmap(2)`'s `MAP_SHARED` shared memory option on the host
allows the host to efficiently share pages.
![DAX](../arch-images/DAX.png)
For further details of the use of NVDIMM with QEMU, see the [QEMU
project documentation](https://www.qemu.org).
## Agent control tool
The [agent control tool](../../../src/tools/agent-ctl) is a test and
development tool that can be used to learn more about a Kata
Containers system.
## Terminology
See the [project glossary](../../../Glossary.md).

View File

@@ -0,0 +1,81 @@
# Kata Containers architecture background knowledge
The following sections explain some of the background concepts
required to understand the [architecture document](README.md).
## Root filesystem
This document uses the term _rootfs_ to refer to a root filesystem
which is mounted as the top-level directory ("`/`") and often referred
to as _slash_.
It is important to understand this term since the overall system uses
multiple different rootfs's (as explained in the
[Environments](README.md#environments) section.
## Container image
In the [example command](example-command.md) the user has specified the
type of container they wish to run via the container image name:
`ubuntu`. This image name corresponds to a _container image_ that can
be used to create a container with an Ubuntu Linux environment. Hence,
in our [example](example-command.md), the `sh(1)` command will be run
inside a container which has an Ubuntu rootfs.
> **Note:**
>
> The term _container image_ is confusing since the image in question
> is **not** a container: it is simply a set of files (_an image_)
> that can be used to _create_ a container. The term _container
> template_ would be more accurate but the term _container image_ is
> commonly used so this document uses the standard term.
For the purposes of this document, the most important part of the
[example command line](example-command.md) is the container image the
user has requested. Normally, the container manager will _pull_
(download) a container image from a remote site and store a copy
locally. This local container image is used by the container manager
to create an [OCI bundle](#oci-bundle) which will form the environment
the container will run in. After creating the OCI bundle, the
container manager launches a [runtime](README.md#runtime) which will create the
container using the provided OCI bundle.
## OCI bundle
To understand what follows, it is important to know at a high level
how an OCI ([Open Containers Initiative](https://opencontainers.org)) compatible container is created.
An OCI compatible container is created by taking a
[container image](#container-image) and converting the embedded rootfs
into an
[OCI rootfs bundle](https://github.com/opencontainers/runtime-spec/blob/main/bundle.md),
or more simply, an _OCI bundle_.
An OCI bundle is a `tar(1)` archive normally created by a container
manager which is passed to an OCI [runtime](README.md#runtime) which converts
it into a full container rootfs. The bundle contains two assets:
- A container image [rootfs](#root-filesystem)
This is simply a directory of files that will be used to represent
the rootfs for the container.
For the [example command](example-command.md), the directory will
contain the files necessary to create a minimal Ubuntu root
filesystem.
- An [OCI configuration file](https://github.com/opencontainers/runtime-spec/blob/main/config.md)
This is a JSON file called `config.json`.
The container manager will create this file so that:
- The `root.path` value is set to the full path of the specified
container rootfs.
In [the example](example-command.md) this value will be `ubuntu`.
- The `process.args` array specifies the list of commands the user
wishes to run. This is known as the [workload](README.md#workload).
In [the example](example-command.md) the workload is `sh(1)`.

View File

@@ -0,0 +1,30 @@
# Example command
The following containerd command creates a container. It is referred
to throughout the architecture document to help explain various points:
```bash
$ sudo ctr run --runtime "io.containerd.kata.v2" --rm -t "quay.io/libpod/ubuntu:latest" foo sh
```
This command requests that containerd:
- Create a container (`ctr run`).
- Use the Kata [shimv2](README.md#shim-v2-architecture) runtime (`--runtime "io.containerd.kata.v2"`).
- Delete the container when it [exits](README.md#workload-exit) (`--rm`).
- Attach the container to the user's terminal (`-t`).
- Use the Ubuntu Linux [container image](background.md#container-image)
to create the container [rootfs](background.md#root-filesystem) that will become
the [container environment](README.md#environments)
(`quay.io/libpod/ubuntu:latest`).
- Create the container with the name "`foo`".
- Run the `sh(1)` command in the Ubuntu rootfs based container
environment.
The command specified here is referred to as the [workload](README.md#workload).
> **Note:**
>
> For the purposes of this document and to keep explanations
> simpler, we assume the user is running this command in the
> [host environment](README.md#environments).

View File

@@ -0,0 +1,152 @@
# Guest assets
Kata Containers creates a VM in which to run one or more containers.
It does this by launching a [hypervisor](README.md#hypervisor) to
create the VM. The hypervisor needs two assets for this task: a Linux
kernel and a small root filesystem image to boot the VM.
## Guest kernel
The [guest kernel](../../../tools/packaging/kernel)
is passed to the hypervisor and used to boot the VM.
The default kernel provided in Kata Containers is highly optimized for
kernel boot time and minimal memory footprint, providing only those
services required by a container workload. It is based on the latest
Linux LTS (Long Term Support) [kernel](https://www.kernel.org).
## Guest image
The hypervisor uses an image file which provides a minimal root
filesystem used by the guest kernel to boot the VM and host the Kata
Container. Kata Containers supports both initrd and rootfs based
minimal guest images. The [default packages](../../install/) provide both
an image and an initrd, both of which are created using the
[`osbuilder`](../../../tools/osbuilder) tool.
> **Notes:**
>
> - Although initrd and rootfs based images are supported, not all
> [hypervisors](README.md#hypervisor) support both types of image.
>
> - The guest image is *unrelated* to the image used in a container
> workload.
>
> For example, if a user creates a container that runs a shell in a
> BusyBox image, they will run that shell in a BusyBox environment.
> However, the guest image running inside the VM that is used to
> *host* that BusyBox image could be running Clear Linux, Ubuntu,
> Fedora or any other distribution potentially.
>
> The `osbuilder` tool provides
> [configurations for various common Linux distributions](../../../tools/osbuilder/rootfs-builder)
> which can be built into either initrd or rootfs guest images.
>
> - If you are using a [packaged version of Kata
> Containers](../../install), you can see image details by running the
> [`kata-collect-data.sh`](../../../src/runtime/data/kata-collect-data.sh.in)
> script as `root` and looking at the "Image details" section of the
> output.
#### Root filesystem image
The default packaged rootfs image, sometimes referred to as the _mini
O/S_, is a highly optimized container bootstrap system.
If this image type is [configured](README.md#configuration), when the
user runs the [example command](example-command.md):
- The [runtime](README.md#runtime) will launch the configured [hypervisor](README.md#hypervisor).
- The hypervisor will boot the mini-OS image using the [guest kernel](#guest-kernel).
- The kernel will start the init daemon as PID 1 (`systemd`) inside the VM root environment.
- `systemd`, running inside the mini-OS context, will launch the [agent](README.md#agent)
in the root context of the VM.
- The agent will create a new container environment, setting its root
filesystem to that requested by the user (Ubuntu in [the example](example-command.md)).
- The agent will then execute the command (`sh(1)` in [the example](example-command.md))
inside the new container.
The table below summarises the default mini O/S showing the
environments that are created, the services running in those
environments (for all platforms) and the root filesystem used by
each service:
| Process | Environment | systemd service? | rootfs | User accessible | Notes |
|-|-|-|-|-|-|
| systemd | VM root | n/a | [VM guest image](#guest-image)| [debug console][debug-console] | The init daemon, running as PID 1 |
| [Agent](README.md#agent) | VM root | yes | [VM guest image](#guest-image)| [debug console][debug-console] | Runs as a systemd service |
| `chronyd` | VM root | yes | [VM guest image](#guest-image)| [debug console][debug-console] | Used to synchronise the time with the host |
| container workload (`sh(1)` in [the example](example-command.md)) | VM container | no | User specified (Ubuntu in [the example](example-command.md)) | [exec command](README.md#exec-command) | Managed by the agent |
See also the [process overview](README.md#process-overview).
> **Notes:**
>
> - The "User accessible" column shows how an administrator can access
> the environment.
>
> - The container workload is running inside a full container
> environment which itself is running within a VM environment.
>
> - See the [configuration files for the `osbuilder` tool](../../../tools/osbuilder/rootfs-builder)
> for details of the default distribution for platforms other than
> Intel x86_64.
#### Initrd image
The initrd image is a compressed `cpio(1)` archive, created from a
rootfs which is loaded into memory and used as part of the Linux
startup process. During startup, the kernel unpacks it into a special
instance of a `tmpfs` mount that becomes the initial root filesystem.
If this image type is [configured](README.md#configuration), when the user runs
the [example command](example-command.md):
- The [runtime](README.md#runtime) will launch the configured [hypervisor](README.md#hypervisor).
- The hypervisor will boot the mini-OS image using the [guest kernel](#guest-kernel).
- The kernel will start the init daemon as PID 1 (the
[agent](README.md#agent))
inside the VM root environment.
- The [agent](README.md#agent) will create a new container environment, setting its root
filesystem to that requested by the user (`ubuntu` in
[the example](example-command.md)).
- The agent will then execute the command (`sh(1)` in [the example](example-command.md))
inside the new container.
The table below summarises the default mini O/S showing the environments that are created,
the processes running in those environments (for all platforms) and
the root filesystem used by each service:
| Process | Environment | rootfs | User accessible | Notes |
|-|-|-|-|-|
| [Agent](README.md#agent) | VM root | [VM guest image](#guest-image) | [debug console][debug-console] | Runs as the init daemon (PID 1) |
| container workload | VM container | User specified (Ubuntu in this example) | [exec command](README.md#exec-command) | Managed by the agent |
> **Notes:**
>
> - The "User accessible" column shows how an administrator can access
> the environment.
>
> - It is possible to use a standard init daemon such as systemd with
> an initrd image if this is desirable.
See also the [process overview](README.md#process-overview).
#### Image summary
| Image type | Default distro | Init daemon | Reason | Notes |
|-|-|-|-|-|
| [image](background.md#root-filesystem-image) | [Clear Linux](https://clearlinux.org) (for x86_64 systems)| systemd | Minimal and highly optimized | systemd offers flexibility |
| [initrd](#initrd-image) | [Alpine Linux](https://alpinelinux.org) | Kata [agent](README.md#agent) (as no systemd support) | Security hardened and tiny C library |
See also:
- The [osbuilder](../../../tools/osbuilder) tool
This is used to build all default image types.
- The [versions database](../../../versions.yaml)
The `default-image-name` and `default-initrd-name` options specify
the default distributions for each image type.
[debug-console]: ../../Developer-Guide.md#connect-to-debug-console

View File

@@ -0,0 +1,41 @@
# History
## Kata 1.x architecture
In the old [Kata 1.x architecture](https://github.com/kata-containers/documentation/blob/master/design/architecture.md),
the Kata [runtime](README.md#runtime) was an executable called `kata-runtime`.
The container manager called this executable multiple times when
creating each container. Each time the runtime was called a different
OCI command-line verb was provided. This architecture was simple, but
not well suited to creating VM based containers due to the issue of
handling state between calls. Additionally, the architecture suffered
from performance issues related to continually having to spawn new
instances of the runtime binary, and
[Kata shim](https://github.com/kata-containers/shim) and
[Kata proxy](https://github.com/kata-containers/proxy) processes for systems
that did not provide VSOCK.
## Kata 2.x architecture
See the ["shimv2"](README.md#shim-v2-architecture) section of the
architecture document.
## Architectural comparison
| Kata version | Kata Runtime process calls | Kata shim processes | Kata proxy processes (if no VSOCK) |
|-|-|-|-|
| 1.x | multiple per container | 1 per container connection | 1 |
| 2.x | 1 per VM (hosting any number of containers) | 0 | 0 |
> **Notes:**
>
> - A single VM can host one or more containers.
>
> - The "Kata shim processes" column refers to the old
> [Kata shim](https://github.com/kata-containers/shim) (`kata-shim` binary),
> *not* the new shimv2 runtime instance (`containerd-shim-kata-v2` binary).
The diagram below shows how the original architecture was simplified
with the advent of shimv2.
![Kubernetes integration with shimv2](../arch-images/shimv2.svg)

View File

@@ -0,0 +1,35 @@
# Kubernetes support
[Kubernetes](https://github.com/kubernetes/kubernetes/), or K8s, is a popular open source
container orchestration engine. In Kubernetes, a set of containers sharing resources
such as networking, storage, mount, PID, etc. is called a
[pod](https://kubernetes.io/docs/user-guide/pods/).
A node can have multiple pods, but at a minimum, a node within a Kubernetes cluster
only needs to run a container runtime and a container agent (called a
[Kubelet](https://kubernetes.io/docs/admin/kubelet/)).
Kata Containers represents a Kubelet pod as a VM.
A Kubernetes cluster runs a control plane where a scheduler (typically
running on a dedicated master node) calls into a compute Kubelet. This
Kubelet instance is responsible for managing the lifecycle of pods
within the nodes and eventually relies on a container runtime to
handle execution. The Kubelet architecture decouples lifecycle
management from container execution through a dedicated gRPC based
[Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/container-runtime-interface-v1.md).
In other words, a Kubelet is a CRI client and expects a CRI
implementation to handle the server side of the interface.
[CRI-O](https://github.com/kubernetes-incubator/cri-o) and
[containerd](https://github.com/containerd/containerd/) are CRI
implementations that rely on
[OCI](https://github.com/opencontainers/runtime-spec) compatible
runtimes for managing container instances.
Kata Containers is an officially supported CRI-O and containerd
runtime. Refer to the following guides on how to set up Kata
Containers with Kubernetes:
- [How to use Kata Containers and containerd](../../how-to/containerd-kata.md)
- [Run Kata Containers with Kubernetes](../../how-to/run-kata-with-k8s.md)

View File

@@ -0,0 +1,48 @@
# Networking
See the [networking document](networking.md).
Containers will typically live in their own, possibly shared, networking namespace.
At some point in a container lifecycle, container engines will set up that namespace
to add the container to a network which is isolated from the host network, but
which is shared between containers
In order to do so, container engines will usually add one end of a virtual
ethernet (`veth`) pair into the container networking namespace. The other end of
the `veth` pair is added to the host networking namespace.
This is a very namespace-centric approach as many hypervisors or VM
Managers (VMMs) such as `virt-manager` cannot handle `veth`
interfaces. Typically, `TAP` interfaces are created for VM
connectivity.
To overcome incompatibility between typical container engines expectations
and virtual machines, Kata Containers networking transparently connects `veth`
interfaces with `TAP` ones using Traffic Control:
![Kata Containers networking](../arch-images/network.png)
With a TC filter in place, a redirection is created between the container network and the
virtual machine. As an example, the CNI may create a device, `eth0`, in the container's network
namespace, which is a VETH device. Kata Containers will create a tap device for the VM, `tap0_kata`,
and setup a TC redirection filter to mirror traffic from `eth0`'s ingress to `tap0_kata`'s egress,
and a second to mirror traffic from `tap0_kata`'s ingress to `eth0`'s egress.
Kata Containers maintains support for MACVTAP, which was an earlier implementation used in Kata. TC-filter
is the default because it allows for simpler configuration, better CNI plugin compatibility, and performance
on par with MACVTAP.
Kata Containers has deprecated support for bridge due to lacking performance relative to TC-filter and MACVTAP.
Kata Containers supports both
[CNM](https://github.com/docker/libnetwork/blob/master/docs/design.md#the-container-network-model)
and [CNI](https://github.com/containernetworking/cni) for networking management.
## Network Hotplug
Kata Containers has developed a set of network sub-commands and APIs to add, list and
remove a guest network endpoint and to manipulate the guest route table.
The following diagram illustrates the Kata Containers network hotplug workflow.
![Network Hotplug](../arch-images/kata-containers-network-hotplug.png)

View File

@@ -0,0 +1,44 @@
# Storage
## virtio SCSI
If a block-based graph driver is [configured](README.md#configuration),
`virtio-scsi` is used to _share_ the workload image (such as
`busybox:latest`) into the container's environment inside the VM.
## virtio FS
If a block-based graph driver is _not_ [configured](README.md#configuration), a
[`virtio-fs`](https://virtio-fs.gitlab.io) (`VIRTIO`) overlay
filesystem mount point is used to _share_ the workload image instead. The
[agent](README.md#agent) uses this mount point as the root filesystem for the
container processes.
For virtio-fs, the [runtime](README.md#runtime) starts one `virtiofsd` daemon
(that runs in the host context) for each VM created.
## Devicemapper
The
[devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/master/snapshots/devmapper)
is a special case. The `snapshotter` uses dedicated block devices
rather than formatted filesystems, and operates at the block level
rather than the file level. This knowledge is used to directly use the
underlying block device instead of the overlay file system for the
container root file system. The block device maps to the top
read-write layer for the overlay. This approach gives much better I/O
performance compared to using `virtio-fs` to share the container file
system.
#### Hot plug and unplug
Kata Containers has the ability to hot plug add and hot plug remove
block devices. This makes it possible to use block devices for
containers started after the VM has been launched.
Users can check to see if the container uses the `devicemapper` block
device as its rootfs by calling `mount(8)` within the container. If
the `devicemapper` block device is used, the root filesystem (`/`)
will be mounted from `/dev/vda`. Users can disable direct mounting of
the underlying block device through the runtime
[configuration](README.md#configuration).

View File

@@ -1825,12 +1825,8 @@ components:
desc: ""
- value: grpc.StartContainerRequest
desc: ""
- value: grpc.StartTracingRequest
desc: ""
- value: grpc.StatsContainerRequest
desc: ""
- value: grpc.StopTracingRequest
desc: ""
- value: grpc.TtyWinResizeRequest
desc: ""
- value: grpc.UpdateContainerRequest

View File

@@ -1,21 +1,21 @@
# Kata 2.0 Metrics Design
Kata implement CRI's API and support [`ContainerStats`](https://github.com/kubernetes/kubernetes/blob/release-1.18/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L101) and [`ListContainerStats`](https://github.com/kubernetes/kubernetes/blob/release-1.18/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L103) interfaces to expose containers metrics. User can use these interface to get basic metrics about container.
Kata implements CRI's API and supports [`ContainerStats`](https://github.com/kubernetes/kubernetes/blob/release-1.18/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L101) and [`ListContainerStats`](https://github.com/kubernetes/kubernetes/blob/release-1.18/staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2/api.proto#L103) interfaces to expose containers metrics. User can use these interfaces to get basic metrics about containers.
But unlike `runc`, Kata is a VM-based runtime and has a different architecture.
Unlike `runc`, Kata is a VM-based runtime and has a different architecture.
## Limitations of Kata 1.x and the target of Kata 2.0
## Limitations of Kata 1.x and target of Kata 2.0
Kata 1.x has a number of limitations related to observability that may be obstacles to running Kata Containers at scale.
In Kata 2.0, the following components will be able to provide more details about the system.
In Kata 2.0, the following components will be able to provide more details about the system:
- containerd shim v2 (effectively `kata-runtime`)
- Hypervisor statistics
- Agent process
- Guest OS statistics
> **Note**: In Kata 1.x, the main user-facing component was the runtime (`kata-runtime`). From 1.5, Kata then introduced the Kata containerd shim v2 (`containerd-shim-kata-v2`) which is essentially a modified runtime that is loaded by containerd to simplify and improve the way VM-based containers are created and managed.
> **Note**: In Kata 1.x, the main user-facing component was the runtime (`kata-runtime`). From 1.5, Kata introduced the Kata containerd shim v2 (`containerd-shim-kata-v2`) which is essentially a modified runtime that is loaded by containerd to simplify and improve the way VM-based containers are created and managed.
>
> For Kata 2.0, the main component is the Kata containerd shim v2, although the deprecated `kata-runtime` binary will be maintained for a period of time.
>
@@ -25,14 +25,15 @@ In Kata 2.0, the following components will be able to provide more details about
Kata 2.0 metrics strongly depend on [Prometheus](https://prometheus.io/), a graduated project from CNCF.
Kata Containers 2.0 introduces a new Kata component called `kata-monitor` which is used to monitor the other Kata components on the host. It's the monitor interface with Kata runtime, and we can do something like these:
Kata Containers 2.0 introduces a new Kata component called `kata-monitor` which is used to monitor the Kata components on the host. It's shipped with the Kata runtime to provide an interface to:
- Get metrics
- Get events
In this document we will cover metrics only. And until now it only supports metrics function.
At present, `kata-monitor` supports retrieval of metrics only: this is what will be covered in this document.
This is the architecture overview metrics in Kata Containers 2.0.
This is the architecture overview of metrics in Kata Containers 2.0:
![Kata Containers 2.0 metrics](arch-images/kata-2-metrics.png)
@@ -45,38 +46,38 @@ For a quick evaluation, you can check out [this how to](../how-to/how-to-set-pro
### Kata monitor
`kata-monitor` is a management agent on one node, where many Kata containers are running. `kata-monitor`'s work include:
The `kata-monitor` management agent should be started on each node where the Kata containers runtime is installed. `kata-monitor` will:
> **Note**: node is a single host system or a node in K8s clusters.
> **Note**: a *node* running Kata containers will be either a single host system or a worker node belonging to a K8s cluster capable of running Kata pods.
- Aggregate sandbox metrics running on this node, and add `sandbox_id` label
- As a Prometheus target, all metrics from Kata shim on this node will be collected by Prometheus indirectly. This can easy the targets count in Prometheus, and also need not to expose shim's metrics by `ip:port`
- Aggregate sandbox metrics running on the node, adding the `sandbox_id` label to them.
- Expose a new Prometheus target, allowing all node metrics coming from the Kata shim to be collected by Prometheus indirectly. This simplifies the targets count in Prometheus and avoids exposing shim's metrics by `ip:port`.
Only one `kata-monitor` process are running on one node.
Only one `kata-monitor` process runs in each node.
`kata-monitor` is using a different communication channel other than that `conatinerd` communicating with Kata shim, and Kata shim listen on a new socket address for communicating with `kata-monitor`.
`kata-monitor` uses a different communication channel than the one used by the container engine (`containerd`/`CRI-O`) to communicate with the Kata shim. The Kata shim exposes a dedicated socket address reserved to `kata-monitor`.
The way `kata-monitor` get shim's metrics socket file(`monitor_address`) like that `containerd` get shim address. The socket is an abstract socket and saved as file `abstract` with the same directory of `address` for `containerd`.
The shim's metrics socket file is created under the virtcontainers sandboxes directory, i.e. `vc/sbs/${PODID}/shim-monitor.sock`.
> **Note**: If there is no Prometheus server is configured, i.e., there is no scrape operations, `kata-monitor` will do nothing initiative.
> **Note**: If there is no Prometheus server configured, i.e., there are no scrape operations, `kata-monitor` will not collect any metrics.
### Kata runtime
Runtime is responsible for:
Kata runtime is responsible for:
- Gather metrics about shim process
- Gather metrics about hypervisor process
- Gather metrics about running sandbox
- Get metrics from Kata agent(through `ttrpc`)
- Get metrics from Kata agent (through `ttrpc`)
### Kata agent
Agent is responsible for:
Kata agent is responsible for:
- Gather agent process metrics
- Gather guest OS metrics
And in Kata 2.0, agent will add a new interface:
In Kata 2.0, the agent adds a new interface:
```protobuf
rpc GetMetrics(GetMetricsRequest) returns (Metrics);
@@ -93,33 +94,49 @@ The `metrics` field is Prometheus encoded content. This can avoid defining a fix
### Performance and overhead
Metrics should not become the bottleneck of system, downgrade the performance, and run with minimal overhead.
Metrics should not become a bottleneck for the system or downgrade the performance: they should run with minimal overhead.
Requirements:
* Metrics **MUST** be quick to collect
* Metrics **MUST** be small.
* Metrics **MUST** be small
* Metrics **MUST** be generated only if there are subscribers to the Kata metrics service
* Metrics **MUST** be stateless
In Kata 2.0, metrics are collected mainly from `/proc` filesystem, and consumed by Prometheus, based on a pull mode, that is mean if there is no Prometheus collector is running, so there will be zero overhead if nobody cares the metrics.
In Kata 2.0, metrics are collected only when needed (pull mode), mainly from the `/proc` filesystem, and consumed by Prometheus. This means that if the Prometheus collector is not running (so no one cares about the metrics) the overhead will be zero.
Metrics service also doesn't hold any metrics in memory.
The metrics service also doesn't hold any metrics in memory.
#### Metrics size ####
|\*|No Sandbox | 1 Sandbox | 2 Sandboxes |
|---|---|---|---|
|Metrics count| 39 | 106 | 173 |
|Metrics size(bytes)| 9K | 144K | 283K |
|Metrics size(`gzipped`, bytes)| 2K | 10K | 17K |
|Metrics size (bytes)| 9K | 144K | 283K |
|Metrics size (`gzipped`, bytes)| 2K | 10K | 17K |
*Metrics size*: Response size of one Prometheus scrape request.
*Metrics size*: response size of one Prometheus scrape request.
It's easy to estimated that if there are 10 sandboxes running in the host, the size of one metrics fetch request issued by Prometheus will be about to 9 + (144 - 9) * 10 = 1.35M (not `gzipped`) or 2 + (10 - 2) * 10 = 82K (`gzipped`). Of course Prometheus support `gzip` compression, that can reduce the response size of every request.
It's easy to estimate the size of one metrics fetch request issued by Prometheus.
The formula to calculate the expected size when no gzip compression is in place is:
9 + (144 - 9) * `number of kata sandboxes`
Prometheus supports `gzip compression`. When enabled, the response size of each request will be smaller:
2 + (10 - 2) * `number of kata sandboxes`
**Example**
We have 10 sandboxes running on a node. The expected size of one metrics fetch request issued by Prometheus against the kata-monitor agent running on that node will be:
9 + (144 - 9) * 10 = **1.35M**
If `gzip compression` is enabled:
2 + (10 - 2) * 10 = **82K**
#### Metrics delay ####
And here is some test data:
- End-to-end (from Prometheus server to `kata-monitor` and `kata-monitor` write response back): 20ms(avg)
- Agent(RPC all from shim to agent): 3ms(avg)
- End-to-end (from Prometheus server to `kata-monitor` and `kata-monitor` write response back): **20ms**(avg)
- Agent (RPC all from shim to agent): **3ms**(avg)
Test infrastructure:
@@ -128,13 +145,13 @@ Test infrastructure:
**Scrape interval**
Prometheus default `scrape_interval` is 1 minute, and usually it is set to 15s. Small `scrape_interval` will cause more overhead, so user should set it on monitor demand.
Prometheus default `scrape_interval` is 1 minute, but it is usually set to 15 seconds. A smaller `scrape_interval` causes more overhead, so users should set it depending on their monitoring needs.
## Metrics list
Here listed is all supported metrics by Kata 2.0. Some metrics is dependent on guest kernels in the VM, so there may be some different by your environment.
Here are listed all the metrics supported by Kata 2.0. Some metrics are dependent on the VM guest kernel, so the available ones may differ based on the environment.
Metrics is categorized by component where metrics are collected from and for.
Metrics are categorized by the component from/for which the metrics are collected.
* [Metric types](#metric-types)
* [Kata agent metrics](#kata-agent-metrics)
@@ -145,15 +162,15 @@ Metrics is categorized by component where metrics are collected from and for.
* [Kata containerd shim v2 metrics](#kata-containerd-shim-v2-metrics)
> **Note**:
> * Labels here are not include `instance` and `job` labels that added by Prometheus.
> * Labels here do not include the `instance` and `job` labels added by Prometheus.
> * Notes about metrics unit
> * `Kibibytes`, abbreviated `KiB`. 1 `KiB` equals 1024 B.
> * For some metrics (like network devices statistics from file `/proc/net/dev`), unit is depend on label( for example `recv_bytes` and `recv_packets` are having different units).
> * Most of these metrics is collected from `/proc` filesystem, so the unit of metrics are keeping the same unit as `/proc`. See the `proc(5)` manual page for further details.
> * For some metrics (like network devices statistics from file `/proc/net/dev`), unit depends on label( for example `recv_bytes` and `recv_packets` have different units).
> * Most of these metrics are collected from the `/proc` filesystem, so the unit of each metric matches the unit of the relevant `/proc` entry. See the `proc(5)` manual page for further details.
### Metric types
Prometheus offer four core metric types.
Prometheus offers four core metric types.
- Counter: A counter is a cumulative metric that represents a single monotonically increasing counter whose value can only increase.
@@ -288,7 +305,7 @@ Metrics about Kata containerd shim v2 process.
| Metric name | Type | Units | Labels | Introduced in Kata version |
|---|---|---|---|---|
| `kata_shim_agent_rpc_durations_histogram_milliseconds`: <br> RPC latency distributions. | `HISTOGRAM` | `milliseconds` | <ul><li>`action` (RPC actions of Kata agent)<ul><li>`grpc.CheckRequest`</li><li>`grpc.CloseStdinRequest`</li><li>`grpc.CopyFileRequest`</li><li>`grpc.CreateContainerRequest`</li><li>`grpc.CreateSandboxRequest`</li><li>`grpc.DestroySandboxRequest`</li><li>`grpc.ExecProcessRequest`</li><li>`grpc.GetMetricsRequest`</li><li>`grpc.GuestDetailsRequest`</li><li>`grpc.ListInterfacesRequest`</li><li>`grpc.ListProcessesRequest`</li><li>`grpc.ListRoutesRequest`</li><li>`grpc.MemHotplugByProbeRequest`</li><li>`grpc.OnlineCPUMemRequest`</li><li>`grpc.PauseContainerRequest`</li><li>`grpc.RemoveContainerRequest`</li><li>`grpc.ReseedRandomDevRequest`</li><li>`grpc.ResumeContainerRequest`</li><li>`grpc.SetGuestDateTimeRequest`</li><li>`grpc.SignalProcessRequest`</li><li>`grpc.StartContainerRequest`</li><li>`grpc.StartTracingRequest`</li><li>`grpc.StatsContainerRequest`</li><li>`grpc.StopTracingRequest`</li><li>`grpc.TtyWinResizeRequest`</li><li>`grpc.UpdateContainerRequest`</li><li>`grpc.UpdateInterfaceRequest`</li><li>`grpc.UpdateRoutesRequest`</li><li>`grpc.WaitProcessRequest`</li><li>`grpc.WriteStreamRequest`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_shim_agent_rpc_durations_histogram_milliseconds`: <br> RPC latency distributions. | `HISTOGRAM` | `milliseconds` | <ul><li>`action` (RPC actions of Kata agent)<ul><li>`grpc.CheckRequest`</li><li>`grpc.CloseStdinRequest`</li><li>`grpc.CopyFileRequest`</li><li>`grpc.CreateContainerRequest`</li><li>`grpc.CreateSandboxRequest`</li><li>`grpc.DestroySandboxRequest`</li><li>`grpc.ExecProcessRequest`</li><li>`grpc.GetMetricsRequest`</li><li>`grpc.GuestDetailsRequest`</li><li>`grpc.ListInterfacesRequest`</li><li>`grpc.ListProcessesRequest`</li><li>`grpc.ListRoutesRequest`</li><li>`grpc.MemHotplugByProbeRequest`</li><li>`grpc.OnlineCPUMemRequest`</li><li>`grpc.PauseContainerRequest`</li><li>`grpc.RemoveContainerRequest`</li><li>`grpc.ReseedRandomDevRequest`</li><li>`grpc.ResumeContainerRequest`</li><li>`grpc.SetGuestDateTimeRequest`</li><li>`grpc.SignalProcessRequest`</li><li>`grpc.StartContainerRequest`</li><li>`grpc.StatsContainerRequest`</li><li>`grpc.TtyWinResizeRequest`</li><li>`grpc.UpdateContainerRequest`</li><li>`grpc.UpdateInterfaceRequest`</li><li>`grpc.UpdateRoutesRequest`</li><li>`grpc.WaitProcessRequest`</li><li>`grpc.WriteStreamRequest`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_shim_fds`: <br> Kata containerd shim v2 open FDs. | `GAUGE` | | <ul><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_shim_go_gc_duration_seconds`: <br> A summary of the pause duration of garbage collection cycles. | `SUMMARY` | `seconds` | <ul><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_shim_go_goroutines`: <br> Number of goroutines that currently exist. | `GAUGE` | | <ul><li>`sandbox_id`</li></ul> | 2.0.0 |

View File

@@ -209,5 +209,5 @@ network accessible to the collector.
- The trace collection proposals are still being considered.
[kata-1x-tracing]: https://github.com/kata-containers/agent/blob/master/TRACING.md
[trace-forwarder]: /src/trace-forwarder
[trace-forwarder]: /src/tools/trace-forwarder
[tracing-doc-pr]: https://github.com/kata-containers/kata-containers/pull/1937

View File

@@ -41,7 +41,7 @@ Kata Containers with QEMU has complete compatibility with Kubernetes.
Depending on the host architecture, Kata Containers supports various machine types,
for example `pc` and `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
machine type is `pc`. The machine type and its [`Machine accelerators`](#machine-accelerators) can
be changed by editing the runtime [`configuration`](./architecture.md/#configuration) file.
be changed by editing the runtime [`configuration`](architecture/README.md#configuration) file.
Devices and features used:
- virtio VSOCK or virtio serial

View File

@@ -36,3 +36,4 @@
- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)
- [How to setup swap devices in guest kernel](how-to-setup-swap-devices-in-guest-kernel.md)
- [How to run rootless vmm](how-to-run-rootless-vmm.md)
- [How to run Docker with Kata Containers](how-to-run-docker-with-kata.md)

View File

@@ -0,0 +1,141 @@
# How to run Docker in Docker with Kata Containers
This document describes the why and how behind running Docker in a Kata Container.
> **Note:** While in other environments this might be described as "Docker in Docker", the new architecture of Kata 2.x means [Docker can no longer be used to create containers using a Kata Containers runtime](https://github.com/kata-containers/kata-containers/issues/722).
## Requirements
- A working Kata Containers installation
## Install and configure Kata Containers
Follow the [Kata Containers installation guide](../install/README.md) to Install Kata Containers on your Kubernetes cluster.
## Background
Docker in Docker ("DinD") is the colloquial name for the ability to run `docker` from inside a container.
You can learn more about about Docker-in-Docker at the following links:
- [The original announcement of DinD](https://www.docker.com/blog/docker-can-now-run-within-docker/)
- [`docker` image Docker Hub page](https://hub.docker.com/_/docker/) (this page lists the `-dind` releases)
While normally DinD refers to running `docker` from inside a Docker container,
Kata Containers 2.x allows only supported runtimes (such as [`containerd`](../install/container-manager/containerd/containerd-install.md)).
Running `docker` in a Kata Container implies creating Docker containers from inside a container managed by `containerd` (or another supported container manager), as illustrated below:
```
container manager -> Kata Containers shim -> Docker Daemon -> Docker container
(containerd) (containerd-shim-kata-v2) (dockerd) (busybox sh)
```
[OverlayFS][OverlayFS] is the preferred storage driver for most container runtimes on Linux ([including Docker](https://docs.docker.com/storage/storagedriver/select-storage-driver)).
> **Note:** While in the past Kata Containers did not contain the [`overlay` kernel module (aka OverlayFS)][OverlayFS], the kernel modules have been included since the [Kata Containers v2.0.0 release][v2.0.0].
[OverlayFS]: https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html
[v2.0.0]: https://github.com/kata-containers/kata-containers/releases/tag/2.0.0
[kata-2.x-supported-runtimes]: https://github.com/kata-containers/kata-containers/blob/5737b36a3513f4da11a9dc7301b0c97ea22a51cf/docs/install/container-manager/containerd/containerd-install.md
## Why Docker in Kata Containers 2.x requires special measures
Running Docker containers Kata Containers requires care because `VOLUME`s specified in `Dockerfile`s run by Kata Containers are given the `kataShared` mount type by default, which applies to the root directory `/`:
```console
/ # mount
kataShared on / type virtiofs (rw,relatime,dax)
```
`kataShared` mount types are powered by [`virtio-fs`][virtio-fs], a marked improvement over `virtio-9p`, thanks to [PR #1016](https://github.com/kata-containers/runtime/pull/1016). While `virtio-fs` is normally an excellent choice, in the case of DinD workloads `virtio-fs` causes an issue -- [it *cannot* be used as a "upper layer" of `overlayfs` without a custom patch](http://lists.katacontainers.io/pipermail/kata-dev/2020-January/001216.html).
As `/var/lib/docker` is a `VOLUME` specified by DinD (i.e. the `docker` images tagged `*-dind`/`*-dind-rootless`), `docker` fill fail to start (or even worse, silently pick a worse storage driver like `vfs`) when started in a Kata Container. Special measures must be taken when running DinD-powered workloads in Kata Containers.
## Workarounds/Solutions
Thanks to various community contributions (see [issue references below](#references)) the following options, with various trade-offs have been uncovered:
### Use a memory backed volume
For small workloads (small container images, without much generated filesystem load), a memory-backed volume is sufficient. Kubernetes supports a variant of [the `EmptyDir` volume][k8s-emptydir], which allows for memdisk-backed storage -- the [the `medium: Memory` ][k8s-memory-volume-type]. An example of a `Pod` using such a setup [was contributed](https://github.com/kata-containers/runtime/issues/1429#issuecomment-477385283), and is reproduced below:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: dind
spec:
runtimeClassName: kata
containers:
- name: dind
securityContext:
privileged: true
image: docker:20.10-dind
args: ["--storage-driver=overlay2"]
resources:
limits:
memory: "3G"
volumeMounts:
- mountPath: /var/run/
name: dockersock
- mountPath: /var/lib/docker
name: docker
volumes:
- name: dockersock
emptyDir: {}
- name: docker
emptyDir:
medium: Memory
```
Inside the container you can view the mount:
```console
/ # mount | grep lib\/docker
tmpfs on /var/lib/docker type tmpfs (rw,relatime)
```
As is mentioned in the comment encapsulating this code, using volatile memory for container storage backing is a risky and could be possibly wasteful on machines that do not have a lot of RAM.
### Use a loop mounted disk
Using a loop mounted disk that is provisioned shortly before starting of the container workload is another approach that yields good performance.
Contributors provided [an example in issue #1888](https://github.com/kata-containers/runtime/issues/1888#issuecomment-739057384), which is reproduced in part below:
```yaml
spec:
containers:
- name: docker
image: docker:20.10-dind
command: ["sh", "-c"]
args:
- if [[ $(df -PT /var/lib/docker | awk 'NR==2 {print $2}') == virtiofs ]]; then
apk add e2fsprogs &&
truncate -s 20G /tmp/disk.img &&
mkfs.ext4 /tmp/disk.img &&
mount /tmp/disk.img /var/lib/docker; fi &&
dockerd-entrypoint.sh;
securityContext:
privileged: true
```
Note that loop mounted disks are often sparse, which means they *do not* take up the full amount of space that has been provisioned. This solution seems to produce the best performance and flexibility, at the expense of increased complexity and additional required setup.
### Build a custom kernel
It's possible to [modify the kernel](https://github.com/kata-containers/runtime/issues/1888#issuecomment-616872558) (in addition to applying the earlier mentioned mailing list patch) to support using `virtio-fs` as an upper. Note that if you modify your kernel and use `virtio-fs` you may require [additional changes](https://github.com/kata-containers/runtime/issues/1888#issuecomment-739057384) for decent performance and to address other issues.
> **NOTE:** A future kernel release may rectify the usability and performance issues of using `virtio-fs` as an OverlayFS upper layer.
## References
The solutions proposed in this document are an amalgamation of thoughtful contributions from the Kata Containers community.
Find links to issues & related discussion and the fruits therein below:
- [How to run Docker in Docker with Kata Containers (#2474)](https://github.com/kata-containers/kata-containers/issues/2474)
- [Does Kata-container support AUFS/OverlayFS? (#2493)](https://github.com/kata-containers/runtime/issues/2493)
- [Unable to start docker in docker with virtio-fs (#1888)](https://github.com/kata-containers/runtime/issues/1888)
- [Not using native diff for overlay2 (#1429)](https://github.com/kata-containers/runtime/issues/1429)

View File

@@ -22,7 +22,7 @@ An equivalent shim implementation for CRI-O is planned.
### CRI-O
For CRI-O installation instructions, refer to the [CRI-O Tutorial](https://github.com/cri-o/cri-o/blob/main/tutorial.md) page.
The following sections show how to set up the CRI-O configuration file (default path: `/etc/crio/crio.conf`) for Kata.
The following sections show how to set up the CRI-O snippet configuration file (default path: `/etc/crio/crio.conf`) for Kata.
Unless otherwise stated, all the following settings are specific to the `crio.runtime` table:
```toml
@@ -40,74 +40,16 @@ A comprehensive documentation of the configuration file can be found [here](http
#### Kubernetes Runtime Class (CRI-O v1.12+)
The [Kubernetes Runtime Class](https://kubernetes.io/docs/concepts/containers/runtime-class/)
is the preferred way of specifying the container runtime configuration to run a Pod's containers.
To use this feature, Kata must added as a runtime handler with:
To use this feature, Kata must added as a runtime handler. This can be done by
dropping a `50-kata` snippet file into `/etc/crio/crio.conf.d`, with the
content shown below:
```toml
[crio.runtime.runtimes.kata-runtime]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
```
You can also add multiple entries to specify alternatives hypervisors, e.g.:
```toml
[crio.runtime.runtimes.kata-qemu]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
[crio.runtime.runtimes.kata-fc]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
```
#### Untrusted annotation (until CRI-O v1.12)
The untrusted annotation is used to specify a runtime for __untrusted__ workloads, i.e.
a runtime to be used when the workload cannot be trusted and a higher level of security
is required. An additional flag can be used to let CRI-O know if a workload
should be considered _trusted_ or _untrusted_ by default.
For further details, see the documentation
[here](../design/architecture.md#mixing-vm-based-and-namespace-based-runtimes).
```toml
# runtime is the OCI compatible runtime used for trusted container workloads.
# This is a mandatory setting as this runtime will be the default one
# and will also be used for untrusted container workloads if
# runtime_untrusted_workload is not set.
runtime = "/usr/bin/runc"
# runtime_untrusted_workload is the OCI compatible runtime used for untrusted
# container workloads. This is an optional setting, except if
# default_container_trust is set to "untrusted".
runtime_untrusted_workload = "/usr/bin/kata-runtime"
# default_workload_trust is the default level of trust crio puts in container
# workloads. It can either be "trusted" or "untrusted", and the default
# is "trusted".
# Containers can be run through different container runtimes, depending on
# the trust hints we receive from kubelet:
# - If kubelet tags a container workload as untrusted, crio will try first to
# run it through the untrusted container workload runtime. If it is not set,
# crio will use the trusted runtime.
# - If kubelet does not provide any information about the container workload trust
# level, the selected runtime will depend on the default_container_trust setting.
# If it is set to "untrusted", then all containers except for the host privileged
# ones, will be run by the runtime_untrusted_workload runtime. Host privileged
# containers are by definition trusted and will always use the trusted container
# runtime. If default_container_trust is set to "trusted", crio will use the trusted
# container runtime for all containers.
default_workload_trust = "untrusted"
```
#### Network namespace management
To enable networking for the workloads run by Kata, CRI-O needs to be configured to
manage network namespaces, by setting the following key to `true`.
In CRI-O v1.16:
```toml
manage_network_ns_lifecycle = true
```
In CRI-O v1.17+:
```toml
manage_ns_lifecycle = true
[crio.runtime.runtimes.kata]
runtime_path = "/usr/bin/containerd-shim-kata-v2"
runtime_type = "vm"
runtime_root = "/run/vc"
privileged_without_host_devices = true
```

View File

@@ -12,16 +12,26 @@ Containers.
Packaged installation methods uses your distribution's native package format (such as RPM or DEB).
*Note:* We encourage installation methods that provides automatic updates, it ensures security updates and bug fixes are
easily applied.
> **Note:** We encourage installation methods that provides automatic updates, it ensures security updates and bug fixes are
> easily applied.
| Installation method | Description | Automatic updates | Use case |
|------------------------------------------------------|---------------------------------------------------------------------|-------------------|----------------------------------------------------------|
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
| [Using snap](#snap-installation) | Easy to install | yes | Good alternative to official distro packages. |
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. |
| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. |
| Installation method | Description | Automatic updates | Use case |
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|
| [Using kata-deploy](#kata-deploy-installation) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. |
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
| [Using snap](#snap-installation) | Easy to install | yes | Good alternative to official distro packages. |
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. |
| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. |
### Kata Deploy Installation
Kata Deploy provides a Dockerfile, which contains all of the binaries and
artifacts required to run Kata Containers, as well as reference DaemonSets,
which can be utilized to install Kata Containers on a running Kubernetes
cluster.
[Use Kata Deploy](/tools/packaging/kata-deploy/README.md) to install Kata Containers on a Kubernetes Cluster.
### Official packages
@@ -48,9 +58,9 @@ Follow the [containerd installation guide](container-manager/containerd/containe
## Build from source installation
*Note:* Power users who decide to build from sources should be aware of the
implications of using an unpackaged system which will not be automatically
updated as new [releases](../Stable-Branch-Strategy.md) are made available.
> **Note:** Power users who decide to build from sources should be aware of the
> implications of using an unpackaged system which will not be automatically
> updated as new [releases](../Stable-Branch-Strategy.md) are made available.
[Building from sources](../Developer-Guide.md#initial-setup) allows power users
who are comfortable building software from source to use the latest component

View File

@@ -209,5 +209,5 @@ to allow you to access the VM environment.
[opentelemetry]: https://opentelemetry.io
[osbuilder]: https://github.com/kata-containers/kata-containers/blob/main/tools/osbuilder
[setup-debug-console]: https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#set-up-a-debug-console
[trace-forwarder]: /src/trace-forwarder
[trace-forwarder]: /src/tools/trace-forwarder
[vsock]: https://wiki.qemu.org/Features/VirtioVsock

View File

@@ -235,7 +235,7 @@ then [Kata-deploy](https://github.com/kata-containers/kata-containers/tree/main/
is use to install Kata. This will make sure that the correct `agent` version
is installed into the rootfs in the steps below.
The following instructions use Debian as the root filesystem with systemd as
The following instructions use Ubuntu as the root filesystem with systemd as
the init and will add in the `kmod` binary, which is not a standard binary in
a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT
kernel modules when the virtual machine rootfs boots.
@@ -257,7 +257,7 @@ $ cd $GOPATH
$ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
$ cd ${OSBUILDER}/rootfs-builder
$ sudo rm -rf ${ROOTFS_DIR}
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh debian'
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ubuntu'
```
### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs

View File

@@ -1,4 +1,4 @@
# Setup to run SPDK vhost-user devices with Kata Containers and Docker*
# Setup to run SPDK vhost-user devices with Kata Containers
> **Note:** This guide only applies to QEMU, since the vhost-user storage
> device is only available for QEMU now. The enablement work on other
@@ -104,7 +104,7 @@ devices:
- `vhost-user-blk`
- `vhost-user-scsi`
- `vhost-user-nvme`
- `vhost-user-nvme` (deprecated from SPDK 21.07 release)
For more information, visit [SPDK](https://spdk.io) and [SPDK vhost-user target](https://spdk.io/doc/vhost.html).
@@ -222,26 +222,43 @@ minor `0` should be created for it, in order to be recognized by Kata runtime:
$ sudo mknod /var/run/kata-containers/vhost-user/block/devices/vhostblk0 b 241 0
```
> **Note:** The enablement of vhost-user block device in Kata containers
> is supported by Kata Containers `1.11.0-alpha1` or newer.
> Make sure you have updated your Kata containers before evaluation.
## Launch a Kata container with SPDK vhost-user block device
To use `vhost-user-blk` device, use Docker to pass a host `vhost-user-blk`
device to the container. In docker, `--device=HOST-DIR:CONTAINER-DIR` is used
To use `vhost-user-blk` device, use `ctr` to pass a host `vhost-user-blk`
device to the container. In your `config.json`, you should use `devices`
to pass a host device to the container.
For example:
For example (only `vhost-user-blk` listed):
```json
{
"linux": {
"devices": [
{
"path": "/dev/vda",
"type": "b",
"major": 241,
"minor": 0,
"fileMode": 420,
"uid": 0,
"gid": 0
}
]
}
}
```
With `rootfs` provisioned under `bundle` directory, you can run your SPDK container:
```bash
$ sudo docker run --runtime kata-runtime --device=/var/run/kata-containers/vhost-user/block/devices/vhostblk0:/dev/vda -it busybox sh
$ sudo ctr run -d --runtime io.containerd.run.kata.v2 --config bundle/config.json spdk_container
```
Example of performing I/O operations on the `vhost-user-blk` device inside
container:
```
$ sudo ctr t exec --exec-id 1 -t spdk_container sh
/ # ls -l /dev/vda
brw-r--r-- 1 root root 254, 0 Jan 20 03:54 /dev/vda
/ # dd if=/dev/vda of=/tmp/ddtest bs=4k count=20

View File

@@ -76,7 +76,7 @@ then a new configuration file can be [created](#configure-kata-containers)
and [configured][7].
[1]: https://docs.snapcraft.io/snaps/intro
[2]: ../docs/design/architecture.md#root-filesystem-image
[2]: ../docs/design/architecture/README.md#root-filesystem-image
[3]: https://docs.snapcraft.io/reference/confinement#classic
[4]: https://github.com/kata-containers/runtime#configuration
[5]: https://docs.docker.com/engine/reference/commandline/dockerd

View File

@@ -118,18 +118,19 @@ parts:
export AGENT_INIT=yes
export USE_DOCKER=1
export DEBUG=1
case "$(uname -m)" in
aarch64)
sudo -E PATH=$PATH make initrd DISTRO=alpine
;;
ppc64le|s390x)
# Cannot use alpine on ppc64le/s390x because it would require a musl agent
sudo -E PATH=$PATH make initrd DISTRO=ubuntu
;;
arch="$(uname -m)"
initrd_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.initrd.architecture.${arch}.name)
image_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.image.architecture.${arch}.name)
case "$arch" in
x86_64)
# In some build systems it's impossible to build a rootfs image, try with the initrd image
sudo -E PATH=$PATH make image DISTRO=clearlinux || sudo -E PATH=$PATH make initrd DISTRO=alpine
sudo -E PATH=$PATH make image DISTRO=${image_distro} || sudo -E PATH=$PATH make initrd DISTRO=${initrd_distro}
;;
aarch64|ppc64le|s390x)
sudo -E PATH=$PATH make initrd DISTRO=${initrd_distro}
;;
*) echo "unsupported architecture: $(uname -m)"; exit 1;;
esac

227
src/agent/Cargo.lock generated
View File

@@ -28,9 +28,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.52"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3"
checksum = "8b26702f315f53b6071259e15dd9d64528213b44d61de1ec926eca7715d62203"
[[package]]
name = "arc-swap"
@@ -60,6 +60,17 @@ dependencies = [
"syn",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.0.1"
@@ -173,6 +184,36 @@ dependencies = [
"winapi",
]
[[package]]
name = "clap"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1121e32687f7f90b905d4775273305baa4f32cd418923e9b0fa726533221857"
dependencies = [
"atty",
"bitflags",
"clap_derive",
"indexmap",
"lazy_static",
"os_str_bytes",
"strsim",
"termcolor",
"textwrap",
]
[[package]]
name = "clap_derive"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cbcf660a32ad0eda4b11996d8761432f499034f6e685bc6072337db662c85f8"
dependencies = [
"heck 0.4.0",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "crc32fast"
version = "1.3.0"
@@ -266,9 +307,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "futures"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28560757fe2bb34e79f907794bb6b22ae8b0e5c669b638a1132f2592b19035b4"
checksum = "a12aa0eb539080d55c3f2d45a67c3b58b6b0773c1a3ca2dfec66d58c97fd66ca"
dependencies = [
"futures-channel",
"futures-core",
@@ -281,9 +322,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b"
checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888"
dependencies = [
"futures-core",
"futures-sink",
@@ -291,15 +332,15 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7"
checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d"
[[package]]
name = "futures-executor"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a"
checksum = "45025be030969d763025784f7f355043dc6bc74093e4ecc5000ca4dc50d8745c"
dependencies = [
"futures-core",
"futures-task",
@@ -308,16 +349,18 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2"
checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377"
[[package]]
name = "futures-macro"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c"
checksum = "18e4a4b95cea4b4ccbcf1c5675ca7c4ee4e9e75eb79944d07defde18068f79bb"
dependencies = [
"autocfg",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
@@ -325,22 +368,23 @@ dependencies = [
[[package]]
name = "futures-sink"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508"
checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11"
[[package]]
name = "futures-task"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72"
checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99"
[[package]]
name = "futures-util"
version = "0.3.19"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164"
checksum = "36568465210a3a6ee45e1f165136d68671471a501e632e9a98d96872222b5481"
dependencies = [
"autocfg",
"futures-channel",
"futures-core",
"futures-io",
@@ -350,6 +394,8 @@ dependencies = [
"memchr",
"pin-project-lite",
"pin-utils",
"proc-macro-hack",
"proc-macro-nested",
"slab",
]
@@ -379,6 +425,12 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "heck"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
[[package]]
name = "hermit-abi"
version = "0.1.19"
@@ -486,6 +538,7 @@ dependencies = [
"async-trait",
"capctl",
"cgroups-rs",
"clap",
"futures",
"ipnetwork",
"lazy_static",
@@ -494,7 +547,7 @@ dependencies = [
"logging",
"netlink-packet-utils",
"netlink-sys",
"nix 0.21.2",
"nix 0.23.1",
"oci",
"opentelemetry",
"procfs 0.12.0",
@@ -736,19 +789,6 @@ dependencies = [
"memoffset",
]
[[package]]
name = "nix"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77d9f3521ea8e0641a153b3cddaf008dcbf26acd4ed739a2517295e0760d12c7"
dependencies = [
"bitflags",
"cc",
"cfg-if 1.0.0",
"libc",
"memoffset",
]
[[package]]
name = "nix"
version = "0.22.2"
@@ -805,9 +845,9 @@ dependencies = [
[[package]]
name = "num_cpus"
version = "1.13.1"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
@@ -849,6 +889,15 @@ dependencies = [
"tokio-stream",
]
[[package]]
name = "os_str_bytes"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
dependencies = [
"memchr",
]
[[package]]
name = "parking_lot"
version = "0.11.2"
@@ -917,18 +966,18 @@ dependencies = [
[[package]]
name = "pin-project"
version = "1.0.9"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1622113ce508488160cff04e6abc60960e676d330e1ca0f77c0b8df17c81438f"
checksum = "576bc800220cc65dac09e99e97b08b358cfab6e17078de8dc5fee223bd2d0c08"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.0.9"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b95af56fee93df76d721d356ac1ca41fccf168bc448eb14049234df764ba3e76"
checksum = "6e8fe8163d14ce7f0cdac2e040116f22eac817edabff0be91e8aff7e9accf389"
dependencies = [
"proc-macro2",
"quote",
@@ -960,10 +1009,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "proc-macro2"
version = "1.0.36"
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro-nested"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086"
[[package]]
name = "proc-macro2"
version = "1.0.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1"
dependencies = [
"unicode-xid",
]
@@ -1031,7 +1116,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603"
dependencies = [
"bytes 1.1.0",
"heck",
"heck 0.3.3",
"itertools",
"log",
"multimap",
@@ -1070,6 +1155,10 @@ name = "protobuf"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e86d370532557ae7573551a1ec8235a0f8d6cb276c7c9e6aa490b511c447485"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "protobuf-codegen"
@@ -1102,9 +1191,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.14"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47aa80447ce4daf1717500037052af176af5d38cc3e571d9ec1c7353fc10c87d"
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
dependencies = [
"proc-macro2",
]
@@ -1231,7 +1320,7 @@ dependencies = [
"lazy_static",
"libc",
"libseccomp",
"nix 0.21.2",
"nix 0.23.1",
"oci",
"path-absolutize",
"protobuf",
@@ -1413,10 +1502,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
[[package]]
name = "syn"
version = "1.0.84"
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecb2e6da8ee5eb9a61068762a32fa9619cc591ceb055b3687f4cd4051ec2e06b"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59"
dependencies = [
"proc-macro2",
"quote",
@@ -1443,6 +1538,21 @@ dependencies = [
"winapi",
]
[[package]]
name = "termcolor"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80"
[[package]]
name = "thiserror"
version = "1.0.30"
@@ -1705,6 +1815,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "void"
version = "1.0.2"
@@ -1729,7 +1845,7 @@ dependencies = [
"bincode",
"byteorder",
"libc",
"nix 0.21.2",
"nix 0.23.1",
"opentelemetry",
"serde",
"slog",
@@ -1825,6 +1941,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"

View File

@@ -5,20 +5,20 @@ authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
[dependencies]
oci = { path = "oci" }
oci = { path = "../libs/oci" }
rustjail = { path = "rustjail" }
protocols = { path = "protocols" }
protocols = { path = "../libs/protocols" }
lazy_static = "1.3.0"
ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
protobuf = "=2.14.0"
libc = "0.2.58"
nix = "0.21.0"
nix = "0.23.0"
capctl = "0.2.0"
serde_json = "1.0.39"
scan_fmt = "0.2.3"
scopeguard = "1.0.0"
thiserror = "1.0.26"
regex = "1"
regex = "1.5.4"
serial_test = "0.5.1"
# Async helpers
@@ -27,7 +27,7 @@ async-recursion = "0.3.2"
futures = "0.3.17"
# Async runtime
tokio = { version = "1", features = ["full"] }
tokio = { version = "1.14.0", features = ["full"] }
tokio-vsock = "0.3.1"
netlink-sys = { version = "0.7.0", features = ["tokio_socket",]}
@@ -37,7 +37,7 @@ ipnetwork = "0.17.0"
# Note: this crate sets the slog 'max_*' features which allows the log level
# to be modified at runtime.
logging = { path = "../../pkg/logging" }
logging = { path = "../libs/logging" }
slog = "2.5.2"
slog-scope = "4.1.2"
@@ -60,14 +60,13 @@ vsock-exporter = { path = "vsock-exporter" }
# Configuration
serde = { version = "1.0.129", features = ["derive"] }
toml = "0.5.8"
clap = { version = "3.0.1", features = ["derive"] }
[dev-dependencies]
tempfile = "3.1.0"
[workspace]
members = [
"oci",
"protocols",
"rustjail",
]

View File

@@ -101,7 +101,10 @@ endef
##TARGET default: build code
default: $(TARGET) show-header
$(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
$(TARGET): $(GENERATED_CODE) logging-crate-tests $(TARGET_PATH)
logging-crate-tests:
make -C $(CWD)/../libs/logging
$(TARGET_PATH): $(SOURCES) | show-summary
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
@@ -111,7 +114,7 @@ $(GENERATED_FILES): %: %.in
##TARGET optimize: optimized build
optimize: $(SOURCES) | show-summary show-header
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
##TARGET clippy: run clippy linter
clippy: $(GENERATED_CODE)
@@ -205,9 +208,10 @@ codecov-html: check_tarpaulin
.PHONY: \
help \
logging-crate-tests \
optimize \
show-header \
show-summary \
optimize \
vendor
##TARGET generate-protocols: generate/update grpc agent protocols

View File

@@ -1,48 +1,38 @@
# Kata Agent in Rust
# Kata Agent
This is a rust version of the [`kata-agent`](https://github.com/kata-containers/agent).
## Overview
In Denver PTG, [we discussed about re-writing agent in rust](https://etherpad.openstack.org/p/katacontainers-2019-ptg-denver-agenda):
The Kata agent is a long running process that runs inside the Virtual Machine
(VM) (also known as the "pod" or "sandbox").
> In general, we all think about re-write agent in rust to reduce the footprint of agent. Moreover, Eric mentioned the possibility to stop using gRPC, which may have some impact on footprint. We may begin to do some POC to show how much we could save by re-writing agent in rust.
The agent is packaged inside the Kata Containers
[guest image](../../docs/design/architecture/README.md#guest-image)
which is used to boot the VM. Once the runtime has launched the configured
[hypervisor](../../docs/hypervisors.md) to create a new VM, the agent is
started. From this point on, the agent is responsible for creating and
managing the life cycle of the containers inside the VM.
After that, we drafted the initial code here, and any contributions are welcome.
For further details, see the
[architecture document](../../docs/design/architecture).
## Features
## Audience
| Feature | Status |
| :--|:--:|
| **OCI Behaviors** |
| create/start containers | :white_check_mark: |
| signal/wait process | :white_check_mark: |
| exec/list process | :white_check_mark: |
| I/O stream | :white_check_mark: |
| Cgroups | :white_check_mark: |
| Capabilities, `rlimit`, readonly path, masked path, users | :white_check_mark: |
| Seccomp | :white_check_mark: |
| container stats (`stats_container`) | :white_check_mark: |
| Hooks | :white_check_mark: |
| **Agent Features & APIs** |
| run agent as `init` (mount fs, udev, setup `lo`) | :white_check_mark: |
| block device as root device | :white_check_mark: |
| Health API | :white_check_mark: |
| network, interface/routes (`update_container`) | :white_check_mark: |
| File transfer API (`copy_file`) | :white_check_mark: |
| Device APIs (`reseed_random_device`, , `online_cpu_memory`, `mem_hotplug_probe`, `set_guet_data_time`) | :white_check_mark: |
| VSOCK support | :white_check_mark: |
| virtio-serial support | :heavy_multiplication_x: |
| OCI Spec validator | :white_check_mark: |
| **Infrastructures**|
| Debug Console | :white_check_mark: |
| Command line | :white_check_mark: |
| Tracing | :heavy_multiplication_x: |
If you simply wish to use Kata Containers, it is not necessary to understand
the details of how the agent operates. Please see the
[installation documentation](../../docs/install) for details of how deploy
Kata Containers (which will include the Kata agent).
## Getting Started
The remainder of this document is only useful for developers and testers.
### Build from Source
The rust-agent needs to be built statically and linked with `musl`
## Build from Source
> **Note:** skip this step for ppc64le, the build scripts explicitly use gnu for ppc64le.
Since the agent is written in the Rust language this section assumes the tool
chain has been installed using standard Rust `rustup` tool.
### Build with musl
If you wish to build the agent with the `musl` C library, you need to run the
following commands:
```bash
$ arch=$(uname -m)
@@ -50,12 +40,15 @@ $ rustup target add "${arch}-unknown-linux-musl"
$ sudo ln -s /usr/bin/g++ /bin/musl-g++
```
ppc64le-only: Manually install `protoc`, e.g.
```bash
$ sudo dnf install protobuf-compiler
```
> **Note:**
>
> It is not currently possible to build using `musl` on ppc64le and s390x
> since both platforms lack the `musl` target.
### Build the agent binary
The following steps download the Kata Containers source files and build the agent:
Download the source files in the Kata containers repository and build the agent:
```bash
$ GOPATH="${GOPATH:-$HOME/go}"
$ dir="$GOPATH/src/github.com/kata-containers"
@@ -63,17 +56,60 @@ $ git -C ${dir} clone --depth 1 https://github.com/kata-containers/kata-containe
$ make -C ${dir}/kata-containers/src/agent
```
## Run Kata CI with rust-agent
* Firstly, install Kata as noted by ["how to install Kata"](../../docs/install/README.md)
* Secondly, build your own Kata initrd/image following the steps in ["how to build your own initrd/image"](../../docs/Developer-Guide.md#create-and-install-rootfs-and-initrd-image).
notes: Please use your rust agent instead of the go agent when building your initrd/image.
* Clone the Kata CI test cases from: https://github.com/kata-containers/tests.git, and then run the CRI test with:
## Change the agent API
The Kata runtime communicates with the Kata agent using a ttRPC based API protocol.
This ttRPC API is defined by a set of [protocol buffers files](protocols/protos).
The protocol files are used to generate the bindings for the following components:
| Component | Language | Generation method `[*]` | Tooling required |
|-|-|-|-|
| runtime | Golang | Run, `make generate-protocols` | `protoc` |
| agent | Rust | Run, `make` | |
> **Key:**
>
> `[*]` - All commands must be run in the agent repository.
If you wish to change the API, these files must be regenerated. Although the
rust code will be automatically generated by the
[build script](protocols/build.rs),
the Golang code generation requires the external `protoc` command to be
available in `$PATH`.
To install the `protoc` command on a Fedora/CentOS/RHEL system:
```bash
$sudo -E PATH=$PATH -E GOPATH=$GOPATH integration/containerd/shimv2/shimv2-tests.sh
$ sudo dnf install -y protobuf-compiler
```
## Mini Benchmark
The memory of `RssAnon` consumed by the go-agent and rust-agent as below:
go-agent: about 11M
rust-agent: about 1.1M
## Custom guest image and kernel assets
If you wish to develop or test changes to the agent, you will need to create a
custom guest image using the [osbuilder tool](../../tools/osbuilder). You
may also wish to create a custom [guest kernel](../../tools/packaging/kernel).
Once created, [configure](../runtime/README.md#configuration) Kata Containers to use
these custom assets to allow you to test your changes.
> **Note:**
>
> To simplify development and testing, you may wish to run the agent
> [stand alone](#run-the-agent-stand-alone) initially.
## Tracing
For details of tracing the operation of the agent, see the
[tracing documentation](/docs/tracing.md).
## Run the agent stand alone
Although the agent is designed to run in a VM environment, for development and
testing purposes it is possible to run it as a normal application.
When run in this way, the agent can be controlled using the low-level Kata
agent control tool, rather than the Kata runtime.
For further details, see the
[agent control tool documentation](../tools/agent-ctl/README.md#run-the-tool-and-the-agent-in-the-same-environment).

View File

@@ -1,44 +0,0 @@
// Copyright (c) 2020 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs;
use ttrpc_codegen::{Codegen, Customize};
fn main() {
let protos = vec![
"protos/types.proto",
"protos/agent.proto",
"protos/health.proto",
"protos/google/protobuf/empty.proto",
"protos/oci.proto",
];
Codegen::new()
.out_dir("src")
.inputs(&protos)
.include("protos")
.rust_protobuf()
.customize(Customize {
async_server: true,
..Default::default()
})
.run()
.expect("Gen codes failed.");
// There is a message named 'Box' in oci.proto
// so there is a struct named 'Box', we should replace Box<Self> to ::std::boxed::Box<Self>
// to avoid the conflict.
replace_text_in_file(
"src/oci.rs",
"self: Box<Self>",
"self: ::std::boxed::Box<Self>",
)
.unwrap();
}
fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> {
let new_contents = fs::read_to_string(file_name)?.replace(from, to);
fs::write(&file_name, new_contents.as_bytes())
}

View File

@@ -8,10 +8,10 @@ edition = "2018"
serde = "1.0.91"
serde_json = "1.0.39"
serde_derive = "1.0.91"
oci = { path = "../oci" }
protocols = { path ="../protocols" }
oci = { path = "../../libs/oci" }
protocols = { path ="../../libs/protocols" }
caps = "0.5.0"
nix = "0.21.0"
nix = "0.23.0"
scopeguard = "1.0.0"
capctl = "0.2.0"
lazy_static = "1.3.0"
@@ -19,15 +19,15 @@ libc = "0.2.58"
protobuf = "=2.14.0"
slog = "2.5.2"
slog-scope = "4.1.2"
scan_fmt = "0.2"
regex = "1.1"
scan_fmt = "0.2.6"
regex = "1.5.4"
path-absolutize = "1.2.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.2.8" }
rlimit = "0.5.3"
tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
futures = "0.3"
futures = "0.3.17"
async-trait = "0.1.31"
inotify = "0.9.2"
libseccomp = { version = "0.1.3", optional = true }

View File

@@ -22,7 +22,6 @@ use crate::cgroups::Manager as CgroupManager;
use crate::container::DEFAULT_DEVICES;
use anyhow::{anyhow, Context, Result};
use libc::{self, pid_t};
use nix::errno::Errno;
use oci::{
LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
LinuxNetwork, LinuxPids, LinuxResources,
@@ -175,7 +174,7 @@ impl CgroupManager for Manager {
freezer_controller.freeze()?;
}
_ => {
return Err(nix::Error::Sys(Errno::EINVAL).into());
return Err(anyhow!(nix::Error::EINVAL));
}
}

View File

@@ -419,7 +419,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
ns.r#type.clone(),
ns.path.clone()
);
log_child!(cfd_log, "error is : {:?}", e.as_errno());
log_child!(cfd_log, "error is : {:?}", e);
e
})?;
@@ -496,7 +496,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
log_child!(cfd_log, "join namespace {:?}", s);
sched::setns(fd, s).or_else(|e| {
if s == CloneFlags::CLONE_NEWUSER {
if e.as_errno().unwrap() != Errno::EINVAL {
if e != Errno::EINVAL {
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
return Err(e);
}
@@ -600,6 +600,14 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
}
// Log unknown seccomp system calls in advance before the log file descriptor closes.
#[cfg(feature = "seccomp")]
if let Some(ref scmp) = linux.seccomp {
if let Some(syscalls) = seccomp::get_unknown_syscalls(scmp) {
log_child!(cfd_log, "unknown seccomp system calls: {:?}", syscalls);
}
}
// Without NoNewPrivileges, we need to set seccomp
// before dropping capabilities because the calling thread
// must have the CAP_SYS_ADMIN.
@@ -636,11 +644,10 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
// setup the envs
for e in env.iter() {
let v: Vec<&str> = e.splitn(2, '=').collect();
if v.len() != 2 {
continue;
match valid_env(e) {
Some((key, value)) => env::set_var(key, value),
None => log_child!(cfd_log, "invalid env key-value: {:?}", e),
}
env::set_var(v[0], v[1]);
}
// set the "HOME" env getting from "/etc/passwd", if
@@ -664,8 +671,8 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let _ = unistd::close(crfd);
let _ = unistd::close(cwfd);
unistd::setsid().context("create a new session")?;
if oci_process.terminal {
unistd::setsid()?;
unsafe {
libc::ioctl(0, libc::TIOCSCTTY);
}
@@ -995,8 +1002,6 @@ impl BaseContainer for LinuxContainer {
info!(logger, "entered namespaces!");
self.created = SystemTime::now();
if p.init {
let spec = self.config.spec.as_mut().unwrap();
update_namespaces(&self.logger, spec, p.pid)?;
@@ -1111,10 +1116,8 @@ fn do_exec(args: &[String]) -> ! {
.collect();
let _ = unistd::execvp(p.as_c_str(), &sa).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
_ => std::process::exit(-2),
nix::Error::UnknownErrno => std::process::exit(-2),
_ => std::process::exit(e as i32),
});
unreachable!()
@@ -1160,7 +1163,7 @@ fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result<Option<RawFd>> {
ns.r#type.clone(),
ns.path.clone()
);
error!(logger, "error is : {:?}", e.as_errno());
error!(logger, "error is : {:?}", e);
e
})?;
@@ -1393,13 +1396,13 @@ impl LinuxContainer {
.context(format!("cannot change onwer of container {} root", id))?;
if config.spec.is_none() {
return Err(nix::Error::Sys(Errno::EINVAL).into());
return Err(anyhow!(nix::Error::EINVAL));
}
let spec = config.spec.as_ref().unwrap();
if spec.linux.is_none() {
return Err(nix::Error::Sys(Errno::EINVAL).into());
return Err(anyhow!(nix::Error::EINVAL));
}
let linux = spec.linux.as_ref().unwrap();
@@ -1476,7 +1479,7 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
let binary = PathBuf::from(h.path.as_str());
let path = binary.canonicalize()?;
if !path.exists() {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(anyhow!(nix::Error::EINVAL));
}
let args = h.args.clone();
@@ -1545,7 +1548,7 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
if code != 0 {
error!(logger, "hook {} exit status is {}", &path, code);
return Err(anyhow!(nix::Error::from_errno(Errno::UnknownErrno)));
return Err(anyhow!(nix::Error::UnknownErrno));
}
debug!(logger, "hook {} exit status is 0", &path);
@@ -1561,10 +1564,34 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
match tokio::time::timeout(Duration::new(timeout, 0), join_handle).await {
Ok(r) => r.unwrap(),
Err(_) => Err(anyhow!(nix::Error::from_errno(Errno::ETIMEDOUT))),
Err(_) => Err(anyhow!(nix::Error::ETIMEDOUT)),
}
}
// valid environment variables according to https://doc.rust-lang.org/std/env/fn.set_var.html#panics
fn valid_env(e: &str) -> Option<(&str, &str)> {
// wherther key or value will contain NULL char.
if e.as_bytes().contains(&b'\0') {
return None;
}
let v: Vec<&str> = e.splitn(2, '=').collect();
// key can't hold an `equal` sign, but value can
if v.len() != 2 {
return None;
}
let (key, value) = (v[0].trim(), v[1].trim());
// key can't be empty
if key.is_empty() {
return None;
}
Some((key, value))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1643,7 +1670,7 @@ mod tests {
)
.await;
let expected_err = nix::Error::from_errno(Errno::ETIMEDOUT);
let expected_err = nix::Error::ETIMEDOUT;
assert_eq!(
res.unwrap_err().downcast::<nix::Error>().unwrap(),
expected_err
@@ -1988,4 +2015,49 @@ mod tests {
let ret = do_init_child(std::io::stdin().as_raw_fd());
assert!(ret.is_err(), "Expecting Err, Got {:?}", ret);
}
#[test]
fn test_valid_env() {
let env = valid_env("a=b=c");
assert_eq!(Some(("a", "b=c")), env);
let env = valid_env("a=b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a =b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env(" a =b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a= b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a=b ");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a=b c ");
assert_eq!(Some(("a", "b c")), env);
let env = valid_env("=b");
assert_eq!(None, env);
let env = valid_env("a=");
assert_eq!(Some(("a", "")), env);
let env = valid_env("a==");
assert_eq!(Some(("a", "=")), env);
let env = valid_env("a");
assert_eq!(None, env);
let invalid_str = vec![97, b'\0', 98];
let invalid_string = std::str::from_utf8(&invalid_str).unwrap();
let invalid_env = format!("{}=value", invalid_string);
let env = valid_env(&invalid_env);
assert_eq!(None, env);
let invalid_env = format!("key={}", invalid_string);
let env = valid_env(&invalid_env);
assert_eq!(None, env);
}
}

View File

@@ -5,7 +5,6 @@
use anyhow::{anyhow, Context, Result};
use libc::uid_t;
use nix::errno::Errno;
use nix::fcntl::{self, OFlag};
#[cfg(not(test))]
use nix::mount;
@@ -35,17 +34,9 @@ use crate::log_child;
// struct is populated from the content in the /proc/<pid>/mountinfo file.
#[derive(std::fmt::Debug)]
pub struct Info {
id: i32,
parent: i32,
major: i32,
minor: i32,
root: String,
mount_point: String,
opts: String,
optional: String,
fstype: String,
source: String,
vfs_opts: String,
}
const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
@@ -112,6 +103,7 @@ lazy_static! {
}
#[inline(always)]
#[cfg(not(test))]
pub fn mount<
P1: ?Sized + NixPath,
P2: ?Sized + NixPath,
@@ -124,21 +116,42 @@ pub fn mount<
flags: MsFlags,
data: Option<&P4>,
) -> std::result::Result<(), nix::Error> {
#[cfg(not(test))]
return mount::mount(source, target, fstype, flags, data);
#[cfg(test)]
return Ok(());
mount::mount(source, target, fstype, flags, data)
}
#[inline(always)]
#[cfg(test)]
pub fn mount<
P1: ?Sized + NixPath,
P2: ?Sized + NixPath,
P3: ?Sized + NixPath,
P4: ?Sized + NixPath,
>(
_source: Option<&P1>,
_target: &P2,
_fstype: Option<&P3>,
_flags: MsFlags,
_data: Option<&P4>,
) -> std::result::Result<(), nix::Error> {
Ok(())
}
#[inline(always)]
#[cfg(not(test))]
pub fn umount2<P: ?Sized + NixPath>(
target: &P,
flags: MntFlags,
) -> std::result::Result<(), nix::Error> {
#[cfg(not(test))]
return mount::umount2(target, flags);
#[cfg(test)]
return Ok(());
mount::umount2(target, flags)
}
#[inline(always)]
#[cfg(test)]
pub fn umount2<P: ?Sized + NixPath>(
_target: &P,
_flags: MntFlags,
) -> std::result::Result<(), nix::Error> {
Ok(())
}
pub fn init_rootfs(
@@ -450,14 +463,20 @@ fn mount_cgroups(
Ok(())
}
#[cfg(not(test))]
fn pivot_root<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(
new_root: &P1,
put_old: &P2,
) -> anyhow::Result<(), nix::Error> {
#[cfg(not(test))]
return unistd::pivot_root(new_root, put_old);
#[cfg(test)]
return Ok(());
unistd::pivot_root(new_root, put_old)
}
#[cfg(test)]
fn pivot_root<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(
_new_root: &P1,
_put_old: &P2,
) -> anyhow::Result<(), nix::Error> {
Ok(())
}
pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<()> {
@@ -535,7 +554,20 @@ fn parse_mount_table() -> Result<Vec<Info>> {
for (_index, line) in reader.lines().enumerate() {
let line = line?;
let (id, parent, major, minor, root, mount_point, opts, optional) = scan_fmt!(
//Example mountinfo format:
// id
// | / parent
// | | / major:minor
// | | | / root
// | | | | / mount_point
// | | | | | / opts
// | | | | | | / optional
// | | | | | | | / fstype
// | | | | | | | | / source
// | | | | | | | | | / vfs_opts
// 22 96 0:21 / /sys rw,nosuid,nodev,noexec,relatime shared:2 - sysfs sysfs rw,seclabel
let (_id, _parent, _major, _minor, _root, mount_point, _opts, optional) = scan_fmt!(
&line,
MOUNTINFOFORMAT,
i32,
@@ -550,7 +582,7 @@ fn parse_mount_table() -> Result<Vec<Info>> {
let fields: Vec<&str> = line.split(" - ").collect();
if fields.len() == 2 {
let (fstype, source, vfs_opts) =
let (fstype, _source, _vfs_opts) =
scan_fmt!(fields[1], "{} {} {}", String, String, String)?;
let mut optional_new = String::new();
@@ -559,17 +591,9 @@ fn parse_mount_table() -> Result<Vec<Info>> {
}
let info = Info {
id,
parent,
major,
minor,
root,
mount_point,
opts,
optional: optional_new,
fstype,
source,
vfs_opts,
};
infos.push(info);
@@ -582,11 +606,15 @@ fn parse_mount_table() -> Result<Vec<Info>> {
}
#[inline(always)]
#[cfg(not(test))]
fn chroot<P: ?Sized + NixPath>(path: &P) -> Result<(), nix::Error> {
#[cfg(not(test))]
return unistd::chroot(path);
#[cfg(test)]
return Ok(());
unistd::chroot(path)
}
#[inline(always)]
#[cfg(test)]
fn chroot<P: ?Sized + NixPath>(_path: &P) -> Result<(), nix::Error> {
Ok(())
}
pub fn ms_move_root(rootfs: &str) -> Result<bool> {
@@ -623,7 +651,7 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
None::<&str>,
)?;
umount2(abs_mount_point, MntFlags::MNT_DETACH).or_else(|e| {
if e.ne(&nix::Error::from(Errno::EINVAL)) && e.ne(&nix::Error::from(Errno::EPERM)) {
if e.ne(&nix::Error::EINVAL) && e.ne(&nix::Error::EPERM) {
return Err(anyhow!(e));
}
@@ -766,14 +794,8 @@ fn mount_from(
}
};
let _ = stat::stat(dest.as_str()).map_err(|e| {
log_child!(
cfd_log,
"dest stat error. {}: {:?}",
dest.as_str(),
e.as_errno()
)
});
let _ = stat::stat(dest.as_str())
.map_err(|e| log_child!(cfd_log, "dest stat error. {}: {:?}", dest.as_str(), e));
mount(
Some(src.as_str()),
@@ -783,7 +805,7 @@ fn mount_from(
Some(d.as_str()),
)
.map_err(|e| {
log_child!(cfd_log, "mount error: {:?}", e.as_errno());
log_child!(cfd_log, "mount error: {:?}", e);
e
})?;
@@ -805,7 +827,7 @@ fn mount_from(
None::<&str>,
)
.map_err(|e| {
log_child!(cfd_log, "remout {}: {:?}", dest.as_str(), e.as_errno());
log_child!(cfd_log, "remout {}: {:?}", dest.as_str(), e);
e
})?;
}
@@ -974,7 +996,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec, process: &Process) -> Result<(
fn mask_path(path: &str) -> Result<()> {
if !path.starts_with('/') || path.contains("..") {
return Err(nix::Error::Sys(Errno::EINVAL).into());
return Err(anyhow!(nix::Error::EINVAL));
}
match mount(
@@ -984,49 +1006,30 @@ fn mask_path(path: &str) -> Result<()> {
MsFlags::MS_BIND,
None::<&str>,
) {
Err(nix::Error::Sys(e)) => {
if e != Errno::ENOENT && e != Errno::ENOTDIR {
//info!("{}: {}", path, e.desc());
return Err(nix::Error::Sys(e).into());
}
}
Err(e) => {
return Err(e.into());
}
Ok(_) => {}
Err(e) => match e {
nix::Error::ENOENT | nix::Error::ENOTDIR => Ok(()),
_ => Err(e.into()),
},
Ok(_) => Ok(()),
}
Ok(())
}
fn readonly_path(path: &str) -> Result<()> {
if !path.starts_with('/') || path.contains("..") {
return Err(nix::Error::Sys(Errno::EINVAL).into());
return Err(anyhow!(nix::Error::EINVAL));
}
match mount(
if let Err(e) = mount(
Some(&path[1..]),
path,
None::<&str>,
MsFlags::MS_BIND | MsFlags::MS_REC,
None::<&str>,
) {
Err(nix::Error::Sys(e)) => {
if e == Errno::ENOENT {
return Ok(());
} else {
//info!("{}: {}", path, e.desc());
return Err(nix::Error::Sys(e).into());
}
}
Err(e) => {
return Err(e.into());
}
Ok(_) => {}
match e {
nix::Error::ENOENT => return Ok(()),
_ => return Err(e.into()),
};
}
mount(

View File

@@ -30,7 +30,7 @@ impl io::Read for &StreamFd {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match unistd::read(self.0, buf) {
Ok(l) => Ok(l),
Err(e) => Err(e.as_errno().unwrap().into()),
Err(e) => Err(e.into()),
}
}
}
@@ -39,7 +39,7 @@ impl io::Write for &StreamFd {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match unistd::write(self.0, buf) {
Ok(l) => Ok(l),
Err(e) => Err(e.as_errno().unwrap().into()),
Err(e) => Err(e.into()),
}
}
@@ -52,7 +52,7 @@ impl StreamFd {
fn close(&mut self) -> io::Result<()> {
match unistd::close(self.0) {
Ok(()) => Ok(()),
Err(e) => Err(e.as_errno().unwrap().into()),
Err(e) => Err(e.into()),
}
}
}

View File

@@ -39,6 +39,24 @@ fn get_rule_conditions(args: &[LinuxSeccompArg]) -> Result<Vec<ScmpArgCompare>>
Ok(conditions)
}
pub fn get_unknown_syscalls(scmp: &LinuxSeccomp) -> Option<Vec<String>> {
let mut unknown_syscalls: Vec<String> = Vec::new();
for syscall in &scmp.syscalls {
for name in &syscall.names {
if get_syscall_from_name(name, None).is_err() {
unknown_syscalls.push(name.to_string());
}
}
}
if unknown_syscalls.is_empty() {
None
} else {
Some(unknown_syscalls)
}
}
// init_seccomp creates a seccomp filter and loads it for the current process
// including all the child processes.
pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> {
@@ -68,7 +86,14 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> {
}
for name in &syscall.names {
let syscall_num = get_syscall_from_name(name, None)?;
let syscall_num = match get_syscall_from_name(name, None) {
Ok(num) => num,
Err(_) => {
// If we cannot resolve the given system call, we assume it is not supported
// by the kernel. Hence, we skip it without generating an error.
continue;
}
};
if syscall.args.is_empty() {
filter.add_rule(action, syscall_num, None)?;
@@ -109,6 +134,72 @@ mod tests {
};
}
const TEST_DATA: &str = r#"{
"defaultAction": "SCMP_ACT_ALLOW",
"architectures": [
],
"flags": [
"SECCOMP_FILTER_FLAG_LOG"
],
"syscalls": [
{
"names": [
"dup3",
"invalid_syscall1",
"invalid_syscall2"
],
"action": "SCMP_ACT_ERRNO"
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 10,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 20,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 222,
"args": [
{
"index": 0,
"value": 30,
"op": "SCMP_CMP_EQ"
},
{
"index": 2,
"value": 40,
"op": "SCMP_CMP_EQ"
}
]
}
]
}"#;
#[test]
fn test_get_filter_attr_from_flag() {
skip_if_not_root!();
@@ -121,75 +212,19 @@ mod tests {
assert_eq!(get_filter_attr_from_flag("ERROR").is_err(), true);
}
#[test]
fn test_get_unknown_syscalls() {
let scmp: oci::LinuxSeccomp = serde_json::from_str(TEST_DATA).unwrap();
let syscalls = get_unknown_syscalls(&scmp).unwrap();
assert_eq!(syscalls, vec!["invalid_syscall1", "invalid_syscall2"]);
}
#[test]
fn test_init_seccomp() {
skip_if_not_root!();
let data = r#"{
"defaultAction": "SCMP_ACT_ALLOW",
"architectures": [
],
"flags": [
"SECCOMP_FILTER_FLAG_LOG"
],
"syscalls": [
{
"names": [
"dup3"
],
"action": "SCMP_ACT_ERRNO"
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 10,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 20,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 222,
"args": [
{
"index": 0,
"value": 30,
"op": "SCMP_CMP_EQ"
},
{
"index": 2,
"value": 40,
"op": "SCMP_CMP_EQ"
}
]
}
]
}"#;
let mut scmp: oci::LinuxSeccomp = serde_json::from_str(data).unwrap();
let mut scmp: oci::LinuxSeccomp = serde_json::from_str(TEST_DATA).unwrap();
let mut arch: Vec<oci::Arch>;
if cfg!(target_endian = "little") {

View File

@@ -3,7 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
use nix::errno::Errno;
use nix::unistd;
use std::mem;
use std::os::unix::io::RawFd;
@@ -41,7 +40,7 @@ pub fn write_count(fd: RawFd, buf: &[u8], count: usize) -> Result<usize> {
}
Err(e) => {
if e != nix::Error::from_errno(Errno::EINTR) {
if e != nix::Error::EINTR {
return Err(e.into());
}
}
@@ -65,7 +64,7 @@ fn read_count(fd: RawFd, count: usize) -> Result<Vec<u8>> {
}
Err(e) => {
if e != nix::Error::from_errno(Errno::EINTR) {
if e != nix::Error::EINTR {
return Err(e.into());
}
}

View File

@@ -5,13 +5,12 @@
use crate::container::Config;
use anyhow::{anyhow, Context, Error, Result};
use nix::errno::Errno;
use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
use std::collections::HashMap;
use std::path::{Component, PathBuf};
fn einval() -> Error {
anyhow!(nix::Error::from_errno(Errno::EINVAL))
anyhow!(nix::Error::EINVAL)
}
fn get_linux(oci: &Spec) -> Result<&Linux> {

View File

@@ -29,9 +29,7 @@ allowed = [
"SetGuestDateTimeRequest",
"SignalProcessRequest",
"StartContainerRequest",
"StartTracingRequest",
"StatsContainerRequest",
"StopTracingRequest",
"TtyWinResizeRequest",
"UpdateContainerRequest",
"UpdateInterfaceRequest",

View File

@@ -194,7 +194,17 @@ impl FromStr for AgentConfig {
impl AgentConfig {
#[instrument]
pub fn from_cmdline(file: &str) -> Result<AgentConfig> {
pub fn from_cmdline(file: &str, args: Vec<String>) -> Result<AgentConfig> {
// If config file specified in the args, generate our config from it
let config_position = args.iter().position(|a| a == "--config" || a == "-c");
if let Some(config_position) = config_position {
if let Some(config_file) = args.get(config_position + 1) {
return AgentConfig::from_config_file(config_file);
} else {
panic!("The config argument wasn't formed properly: {:?}", args);
}
}
let mut config: AgentConfig = Default::default();
let cmdline = fs::read_to_string(file)?;
let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
@@ -896,7 +906,8 @@ mod tests {
vars_to_unset.push(name);
}
let config = AgentConfig::from_cmdline(filename).expect("Failed to parse command line");
let config =
AgentConfig::from_cmdline(filename, vec![]).expect("Failed to parse command line");
assert_eq!(d.debug_console, config.debug_console, "{}", msg);
assert_eq!(d.dev_mode, config.dev_mode, "{}", msg);
@@ -917,6 +928,40 @@ mod tests {
}
}
#[test]
fn test_from_cmdline_with_args_overwrites() {
let expected = AgentConfig {
dev_mode: true,
server_addr: "unix://@/tmp/foo.socket".to_string(),
..Default::default()
};
let example_config_file_contents =
"dev_mode = true\nserver_addr = 'unix://@/tmp/foo.socket'";
let dir = tempdir().expect("failed to create tmpdir");
let file_path = dir.path().join("config.toml");
let filename = file_path.to_str().expect("failed to create filename");
let mut file = File::create(filename).unwrap_or_else(|_| panic!("failed to create file"));
file.write_all(example_config_file_contents.as_bytes())
.unwrap_or_else(|_| panic!("failed to write file contents"));
let config =
AgentConfig::from_cmdline("", vec!["--config".to_string(), filename.to_string()])
.expect("Failed to parse command line");
assert_eq!(expected.debug_console, config.debug_console);
assert_eq!(expected.dev_mode, config.dev_mode);
assert_eq!(
expected.unified_cgroup_hierarchy,
config.unified_cgroup_hierarchy,
);
assert_eq!(expected.log_level, config.log_level);
assert_eq!(expected.hotplug_timeout, config.hotplug_timeout);
assert_eq!(expected.container_pipe_size, config.container_pipe_size);
assert_eq!(expected.server_addr, config.server_addr);
assert_eq!(expected.tracing, config.tracing);
}
#[test]
fn test_logrus_to_slog_level() {
#[derive(Debug)]

View File

@@ -149,10 +149,8 @@ fn run_in_child(slave_fd: libc::c_int, shell: String) -> Result<()> {
// run shell
let _ = unistd::execvp(cmd.as_c_str(), &args).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
_ => std::process::exit(-2),
nix::Error::UnknownErrno => std::process::exit(-2),
_ => std::process::exit(e as i32),
});
Ok(())

View File

@@ -3,7 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
use libc::{c_uint, major, minor};
use nix::sys::stat;
use regex::Regex;
use std::collections::HashMap;
@@ -12,7 +11,7 @@ use std::fmt;
use std::fs;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::Mutex;
@@ -23,7 +22,7 @@ use crate::linux_abi::*;
use crate::pci;
use crate::sandbox::Sandbox;
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
use protocols::agent::Device;
use tracing::instrument;
@@ -53,15 +52,6 @@ pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
// container as a VFIO device node
pub const DRIVER_VFIO_TYPE: &str = "vfio";
#[derive(Debug)]
struct DevIndexEntry {
idx: usize,
residx: Vec<usize>,
}
#[derive(Debug)]
struct DevIndex(HashMap<String, DevIndexEntry>);
#[instrument]
pub fn online_device(path: &str) -> Result<()> {
fs::write(path, "1")?;
@@ -167,20 +157,22 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<Str
let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath);
let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect();
if files.len() != 1 {
return Err(anyhow!(
"Expected exactly one PCI bus in {}, got {} instead",
bridgebuspath,
files.len()
));
}
// unwrap is safe, because of the length test above
let busfile = files.pop().unwrap()?;
bus = busfile
.file_name()
.into_string()
.map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?;
match files.pop() {
Some(busfile) if files.is_empty() => {
bus = busfile?
.file_name()
.into_string()
.map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?;
}
_ => {
return Err(anyhow!(
"Expected exactly one PCI bus in {}, got {} instead",
bridgebuspath,
// Adjust to original value as we've already popped
files.len() + 1
));
}
};
}
Ok(relpath)
@@ -228,8 +220,9 @@ impl VirtioBlkPciMatcher {
fn new(relpath: &str) -> VirtioBlkPciMatcher {
let root_bus = create_pci_root_bus_path();
let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath);
VirtioBlkPciMatcher {
rex: Regex::new(&re).unwrap(),
rex: Regex::new(&re).expect("BUG: failed to compile VirtioBlkPciMatcher regex"),
}
}
}
@@ -267,7 +260,7 @@ impl VirtioBlkCCWMatcher {
root_bus_path, device
);
VirtioBlkCCWMatcher {
rex: Regex::new(&re).unwrap(),
rex: Regex::new(&re).expect("BUG: failed to compile VirtioBlkCCWMatcher regex"),
}
}
}
@@ -423,12 +416,15 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
for entry in fs::read_dir(SYSFS_SCSI_HOST_PATH)? {
let host = entry?.file_name();
let scan_path = format!(
"{}/{}/{}",
SYSFS_SCSI_HOST_PATH,
host.to_str().unwrap(),
"scan"
);
let host_str = host.to_str().ok_or_else(|| {
anyhow!(
"failed to convert directory entry to unicode for file {:?}",
host
)
})?;
let scan_path = PathBuf::from(&format!("{}/{}/{}", SYSFS_SCSI_HOST_PATH, host_str, "scan"));
fs::write(scan_path, &scan_data)?;
}
@@ -436,91 +432,201 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
Ok(())
}
// update_spec_device updates the device list in the OCI spec to make
// it include details appropriate for the VM, instead of the host. It
// is given the host path to the device (to locate the device in the
// original OCI spec) and the VM path which it uses to determine the
// VM major/minor numbers, and the final path with which to present
// the device in the (inner) container
#[instrument]
fn update_spec_device(
spec: &mut Spec,
devidx: &DevIndex,
host_path: &str,
vm_path: &str,
final_path: &str,
) -> Result<()> {
let major_id: c_uint;
let minor_id: c_uint;
#[derive(Debug, Clone)]
struct DevNumUpdate {
// the major and minor numbers for the device within the guest
guest_major: i64,
guest_minor: i64,
}
// If no container_path is provided, we won't be able to match and
// update the device in the OCI spec device list. This is an error.
if host_path.is_empty() {
return Err(anyhow!("Host path cannot empty for device"));
impl DevNumUpdate {
fn from_vm_path<T: AsRef<Path>>(vm_path: T) -> Result<Self> {
let vm_path = vm_path.as_ref();
if !vm_path.exists() {
return Err(anyhow!("VM device path {:?} doesn't exist", vm_path));
}
let devid = fs::metadata(vm_path)?.rdev();
let guest_major = stat::major(devid) as i64;
let guest_minor = stat::minor(devid) as i64;
Ok(DevNumUpdate {
guest_major,
guest_minor,
})
}
}
// Represents the device-node and resource related updates to the OCI
// spec needed for a particular device
#[derive(Debug, Clone)]
struct DevUpdate {
num: DevNumUpdate,
// an optional new path to update the device to in the "inner" container
// specification
final_path: Option<String>,
}
impl DevUpdate {
fn from_vm_path<T: AsRef<Path>>(vm_path: T, final_path: String) -> Result<Self> {
Ok(DevUpdate {
final_path: Some(final_path),
..DevNumUpdate::from_vm_path(vm_path)?.into()
})
}
}
impl From<DevNumUpdate> for DevUpdate {
fn from(num: DevNumUpdate) -> Self {
DevUpdate {
num,
final_path: None,
}
}
}
// Represents the updates to the OCI spec needed for a particular device
#[derive(Debug, Clone, Default)]
struct SpecUpdate {
dev: Option<DevUpdate>,
// optional corrections for PCI addresses
pci: Vec<(pci::Address, pci::Address)>,
}
impl<T: Into<DevUpdate>> From<T> for SpecUpdate {
fn from(dev: T) -> Self {
SpecUpdate {
dev: Some(dev.into()),
pci: Vec::new(),
}
}
}
// update_spec_devices updates the device list in the OCI spec to make
// it include details appropriate for the VM, instead of the host. It
// is given a map of (container_path => update) where:
// container_path: the path to the device in the original OCI spec
// update: information on changes to make to the device
#[instrument]
fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) -> Result<()> {
let linux = spec
.linux
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't container linux field"))?;
.ok_or_else(|| anyhow!("Spec didn't contain linux field"))?;
let mut res_updates = HashMap::<(&str, i64, i64), DevNumUpdate>::with_capacity(updates.len());
if !Path::new(vm_path).exists() {
return Err(anyhow!("vm_path:{} doesn't exist", vm_path));
}
let meta = fs::metadata(vm_path)?;
let dev_id = meta.rdev();
unsafe {
major_id = major(dev_id);
minor_id = minor(dev_id);
}
info!(
sl!(),
"update_spec_device(): vm_path={}, major: {}, minor: {}\n", vm_path, major_id, minor_id
);
if let Some(idxdata) = devidx.0.get(host_path) {
let dev = &mut linux.devices[idxdata.idx];
let host_major = dev.major;
let host_minor = dev.minor;
dev.major = major_id as i64;
dev.minor = minor_id as i64;
dev.path = final_path.to_string();
info!(
sl!(),
"change the device from path: {} major: {} minor: {} to vm device path: {} major: {} minor: {}",
host_path,
host_major,
host_minor,
dev.path,
dev.major,
dev.minor,
);
// Resources must be updated since they are used to identify
// the device in the devices cgroup.
for ridx in &idxdata.residx {
// unwrap is safe, because residx would be empty if there
// were no resources
let res = &mut linux.resources.as_mut().unwrap().devices[*ridx];
res.major = Some(major_id as i64);
res.minor = Some(minor_id as i64);
for specdev in &mut linux.devices {
if let Some(update) = updates.remove(specdev.path.as_str()) {
let host_major = specdev.major;
let host_minor = specdev.minor;
info!(
sl!(),
"set resources for device major: {} minor: {}\n", major_id, minor_id
"update_spec_devices() updating device";
"container_path" => &specdev.path,
"type" => &specdev.r#type,
"host_major" => host_major,
"host_minor" => host_minor,
"guest_major" => update.num.guest_major,
"guest_minor" => update.num.guest_minor,
"final_path" => update.final_path.as_ref(),
);
specdev.major = update.num.guest_major;
specdev.minor = update.num.guest_minor;
if let Some(final_path) = update.final_path {
specdev.path = final_path;
}
if res_updates
.insert(
(specdev.r#type.as_str(), host_major, host_minor),
update.num,
)
.is_some()
{
return Err(anyhow!(
"Conflicting resource updates for host_major={} host_minor={}",
host_major,
host_minor
));
}
}
Ok(())
} else {
Err(anyhow!(
"Should have found a matching device {} in the spec",
vm_path
))
}
// Make sure we applied all of our updates
if !updates.is_empty() {
return Err(anyhow!(
"Missing devices in OCI spec: {:?}",
updates
.keys()
.map(|d| format!("{:?}", d))
.collect::<Vec<_>>()
.join(" ")
));
}
if let Some(resources) = linux.resources.as_mut() {
for r in &mut resources.devices {
if let (Some(host_major), Some(host_minor)) = (r.major, r.minor) {
if let Some(update) = res_updates.get(&(r.r#type.as_str(), host_major, host_minor))
{
info!(
sl!(),
"update_spec_devices() updating resource";
"type" => &r.r#type,
"host_major" => host_major,
"host_minor" => host_minor,
"guest_major" => update.guest_major,
"guest_minor" => update.guest_minor,
);
r.major = Some(update.guest_major);
r.minor = Some(update.guest_minor);
}
}
}
}
Ok(())
}
// update_spec_pci PCI addresses in the OCI spec to be guest addresses
// instead of host addresses. It is given a map of (host address =>
// guest address)
#[instrument]
fn update_spec_pci(spec: &mut Spec, updates: HashMap<pci::Address, pci::Address>) -> Result<()> {
// Correct PCI addresses in the environment
if let Some(process) = spec.process.as_mut() {
for envvar in process.env.iter_mut() {
let eqpos = envvar
.find('=')
.ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?;
let (name, eqval) = envvar.split_at(eqpos);
let val = &eqval[1..];
if !name.starts_with("PCIDEVICE_") {
continue;
}
let mut guest_addrs = Vec::<String>::new();
for host_addr in val.split(',') {
let host_addr = pci::Address::from_str(host_addr)
.with_context(|| format!("Can't parse {} environment variable", name))?;
let guest_addr = updates
.get(&host_addr)
.ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?;
guest_addrs.push(format!("{}", guest_addr));
}
envvar.replace_range(eqpos + 1.., guest_addrs.join(",").as_str());
}
}
Ok(())
}
// device.Id should be the predicted device name (vda, vdb, ...)
@@ -528,43 +634,25 @@ fn update_spec_device(
#[instrument]
async fn virtiommio_blk_device_handler(
device: &Device,
spec: &mut Spec,
_sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
) -> Result<SpecUpdate> {
if device.vm_path.is_empty() {
return Err(anyhow!("Invalid path for virtio mmio blk device"));
}
update_spec_device(
spec,
devidx,
&device.container_path,
&device.vm_path,
&device.container_path,
)
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
}
// device.Id should be a PCI path string
#[instrument]
async fn virtio_blk_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
) -> Result<SpecUpdate> {
let pcipath = pci::Path::from_str(&device.id)?;
let vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
update_spec_device(
spec,
devidx,
&dev.container_path,
&dev.vm_path,
&dev.container_path,
)
Ok(DevNumUpdate::from_vm_path(vm_path)?.into())
}
// device.id should be a CCW path string
@@ -572,30 +660,17 @@ async fn virtio_blk_device_handler(
#[instrument]
async fn virtio_blk_ccw_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
) -> Result<SpecUpdate> {
let ccw_device = ccw::Device::from_str(&device.id)?;
dev.vm_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?;
update_spec_device(
spec,
devidx,
&dev.container_path,
&dev.vm_path,
&dev.container_path,
)
let vm_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?;
Ok(DevNumUpdate::from_vm_path(vm_path)?.into())
}
#[cfg(not(target_arch = "s390x"))]
#[instrument]
async fn virtio_blk_ccw_device_handler(
_: &Device,
_: &mut Spec,
_: &Arc<Mutex<Sandbox>>,
_: &DevIndex,
) -> Result<()> {
async fn virtio_blk_ccw_device_handler(_: &Device, _: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
Err(anyhow!("CCW is only supported on s390x"))
}
@@ -603,39 +678,23 @@ async fn virtio_blk_ccw_device_handler(
#[instrument]
async fn virtio_scsi_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
dev.vm_path = get_scsi_device_name(sandbox, &device.id).await?;
update_spec_device(
spec,
devidx,
&dev.container_path,
&dev.vm_path,
&dev.container_path,
)
) -> Result<SpecUpdate> {
let vm_path = get_scsi_device_name(sandbox, &device.id).await?;
Ok(DevNumUpdate::from_vm_path(vm_path)?.into())
}
#[instrument]
async fn virtio_nvdimm_device_handler(
device: &Device,
spec: &mut Spec,
_sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
) -> Result<SpecUpdate> {
if device.vm_path.is_empty() {
return Err(anyhow!("Invalid path for nvdimm device"));
}
update_spec_device(
spec,
devidx,
&device.container_path,
&device.vm_path,
&device.container_path,
)
Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into())
}
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
@@ -653,80 +712,53 @@ fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
// DDDD:BB:DD.F is the device's PCI address in the host
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
async fn vfio_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
async fn vfio_device_handler(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
let vfio_in_guest = device.field_type != DRIVER_VFIO_GK_TYPE;
let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new();
let mut group = None;
for opt in device.options.iter() {
let (_, pcipath) =
let (host, pcipath) =
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
let host =
pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?;
let pcipath = pci::Path::from_str(pcipath)?;
let guestdev = wait_for_pci_device(sandbox, &pcipath).await?;
if vfio_in_guest {
pci_driver_override(SYSFS_BUS_PCI_PATH, guestdev, "vfio-pci")?;
let devgroup = pci_iommu_group(SYSFS_BUS_PCI_PATH, guestdev)?;
if devgroup.is_none() {
// Devices must have an IOMMU group to be usable via VFIO
return Err(anyhow!("{} has no IOMMU group", guestdev));
}
// Devices must have an IOMMU group to be usable via VFIO
let devgroup = pci_iommu_group(SYSFS_BUS_PCI_PATH, guestdev)?
.ok_or_else(|| anyhow!("{} has no IOMMU group", guestdev))?;
if group.is_some() && group != devgroup {
// If PCI devices associated with the same VFIO device
// (and therefore group) in the host don't end up in
// the same group in the guest, something has gone
// horribly wrong
return Err(anyhow!(
"{} is not in guest IOMMU group {}",
guestdev,
group.unwrap()
));
}
group = devgroup;
}
}
if vfio_in_guest {
// If there are any devices at all, logic above ensures that group is not None
let group = group.unwrap();
let vmpath = get_vfio_device_name(sandbox, group).await?;
update_spec_device(spec, devidx, &device.container_path, &vmpath, &vmpath)?;
}
Ok(())
}
impl DevIndex {
fn new(spec: &Spec) -> DevIndex {
let mut map = HashMap::new();
if let Some(linux) = spec.linux.as_ref() {
for (i, d) in linux.devices.iter().enumerate() {
let mut residx = Vec::new();
if let Some(linuxres) = linux.resources.as_ref() {
for (j, r) in linuxres.devices.iter().enumerate() {
if r.r#type == d.r#type
&& r.major == Some(d.major)
&& r.minor == Some(d.minor)
{
residx.push(j);
}
}
if let Some(g) = group {
if g != devgroup {
return Err(anyhow!("{} is not in guest IOMMU group {}", guestdev, g));
}
map.insert(d.path.clone(), DevIndexEntry { idx: i, residx });
}
group = Some(devgroup);
pci_fixups.push((host, guestdev));
}
DevIndex(map)
}
let dev_update = if vfio_in_guest {
// If there are any devices at all, logic above ensures that group is not None
let group = group.ok_or_else(|| anyhow!("failed to get VFIO group"))?;
let vm_path = get_vfio_device_name(sandbox, group).await?;
Some(DevUpdate::from_vm_path(&vm_path, vm_path.clone())?)
} else {
None
};
Ok(SpecUpdate {
dev: dev_update,
pci: pci_fixups,
})
}
#[instrument]
@@ -735,22 +767,40 @@ pub async fn add_devices(
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
) -> Result<()> {
let devidx = DevIndex::new(spec);
let mut dev_updates = HashMap::<&str, DevUpdate>::with_capacity(devices.len());
let mut pci_updates = HashMap::<pci::Address, pci::Address>::new();
for device in devices.iter() {
add_device(device, spec, sandbox, &devidx).await?;
let update = add_device(device, sandbox).await?;
if let Some(dev_update) = update.dev {
if dev_updates
.insert(&device.container_path, dev_update)
.is_some()
{
return Err(anyhow!(
"Conflicting device updates for {}",
&device.container_path
));
}
for (host, guest) in update.pci {
if let Some(other_guest) = pci_updates.insert(host, guest) {
return Err(anyhow!(
"Conflicting guest address for host device {} ({} versus {})",
host,
guest,
other_guest
));
}
}
}
}
Ok(())
update_spec_devices(spec, dev_updates)
}
#[instrument]
async fn add_device(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
async fn add_device(device: &Device, sandbox: &Arc<Mutex<Sandbox>>) -> Result<SpecUpdate> {
// log before validation to help with debugging gRPC protocol version differences.
info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}",
device.id, device.field_type, device.vm_path, device.container_path, device.options);
@@ -768,14 +818,12 @@ async fn add_device(
}
match device.field_type.as_str() {
DRIVER_BLK_TYPE => virtio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, spec, sandbox, devidx).await,
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => {
vfio_device_handler(device, spec, sandbox, devidx).await
}
DRIVER_BLK_TYPE => virtio_blk_device_handler(device, sandbox).await,
DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, sandbox).await,
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await,
DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => vfio_device_handler(device, sandbox).await,
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
}
}
@@ -795,11 +843,8 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't container linux field"))?;
if linux.resources.is_none() {
linux.resources = Some(LinuxResources::default());
}
let resources = linux.resources.get_or_insert(LinuxResources::default());
let resources = linux.resources.as_mut().unwrap();
resources.devices.push(LinuxDeviceCgroup {
allow: false,
major: Some(major),
@@ -815,7 +860,8 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
mod tests {
use super::*;
use crate::uevent::spawn_test_watcher;
use oci::Linux;
use oci::{Linux, Process};
use std::iter::FromIterator;
use tempfile::tempdir;
#[test]
@@ -840,28 +886,36 @@ mod tests {
}
#[test]
fn test_update_spec_device() {
fn test_update_spec_devices() {
let (major, minor) = (7, 2);
let mut spec = Spec::default();
// container_path empty
let container_path = "";
let vm_path = "";
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_err());
// vm_path empty
let update = DevNumUpdate::from_vm_path("");
assert!(update.is_err());
// linux is empty
let container_path = "/dev/null";
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
let vm_path = "/dev/null";
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
)]),
);
assert!(res.is_err());
spec.linux = Some(Linux::default());
// linux.devices is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
// linux.devices doesn't contain the updated device
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
)]),
);
assert!(res.is_err());
spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice {
@@ -871,16 +925,14 @@ mod tests {
..oci::LinuxDevice::default()
}];
// vm_path empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_err());
let vm_path = "/dev/null";
// guest and host path are not the same
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
)]),
);
assert!(
res.is_err(),
"container_path={:?} vm_path={:?} spec={:?}",
@@ -892,8 +944,13 @@ mod tests {
spec.linux.as_mut().unwrap().devices[0].path = container_path.to_string();
// spec.linux.resources is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
)]),
);
assert!(res.is_ok());
// update both devices and cgroup lists
@@ -913,13 +970,18 @@ mod tests {
..oci::LinuxResources::default()
});
let devidx = DevIndex::new(&spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
)]),
);
assert!(res.is_ok());
}
#[test]
fn test_update_spec_device_guest_host_conflict() {
fn test_update_spec_devices_guest_host_conflict() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let zero_rdev = fs::metadata("/dev/zero").unwrap().rdev();
let full_rdev = fs::metadata("/dev/full").unwrap().rdev();
@@ -968,7 +1030,6 @@ mod tests {
}),
..Spec::default()
};
let devidx = DevIndex::new(&spec);
let container_path_a = "/dev/a";
let vm_path_a = "/dev/zero";
@@ -994,34 +1055,17 @@ mod tests {
assert_eq!(Some(host_major_b), specresources.devices[1].major);
assert_eq!(Some(host_minor_b), specresources.devices[1].minor);
let res = update_spec_device(
&mut spec,
&devidx,
container_path_a,
vm_path_a,
container_path_a,
);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
assert_eq!(guest_major_a, specdevices[0].major);
assert_eq!(guest_minor_a, specdevices[0].minor);
assert_eq!(host_major_b, specdevices[1].major);
assert_eq!(host_minor_b, specdevices[1].minor);
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(guest_major_a), specresources.devices[0].major);
assert_eq!(Some(guest_minor_a), specresources.devices[0].minor);
assert_eq!(Some(host_major_b), specresources.devices[1].major);
assert_eq!(Some(host_minor_b), specresources.devices[1].minor);
let res = update_spec_device(
&mut spec,
&devidx,
container_path_b,
vm_path_b,
container_path_b,
);
let updates = HashMap::from_iter(vec![
(
container_path_a,
DevNumUpdate::from_vm_path(vm_path_a).unwrap().into(),
),
(
container_path_b,
DevNumUpdate::from_vm_path(vm_path_b).unwrap().into(),
),
]);
let res = update_spec_devices(&mut spec, updates);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
@@ -1038,7 +1082,7 @@ mod tests {
}
#[test]
fn test_update_spec_device_char_block_conflict() {
fn test_update_spec_devices_char_block_conflict() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let guest_major = stat::major(null_rdev) as i64;
@@ -1085,7 +1129,6 @@ mod tests {
}),
..Spec::default()
};
let devidx = DevIndex::new(&spec);
let container_path = "/dev/char";
let vm_path = "/dev/null";
@@ -1096,7 +1139,13 @@ mod tests {
assert_eq!(Some(host_major), specresources.devices[1].major);
assert_eq!(Some(host_minor), specresources.devices[1].minor);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevNumUpdate::from_vm_path(vm_path).unwrap().into(),
)]),
);
assert!(res.is_ok());
// Only the char device, not the block device should be updated
@@ -1108,19 +1157,19 @@ mod tests {
}
#[test]
fn test_update_spec_device_final_path() {
fn test_update_spec_devices_final_path() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let guest_major = stat::major(null_rdev) as i64;
let guest_minor = stat::minor(null_rdev) as i64;
let host_path = "/dev/host";
let container_path = "/dev/original";
let host_major: i64 = 99;
let host_minor: i64 = 99;
let mut spec = Spec {
linux: Some(Linux {
devices: vec![oci::LinuxDevice {
path: host_path.to_string(),
path: container_path.to_string(),
r#type: "c".to_string(),
major: host_major,
minor: host_minor,
@@ -1130,12 +1179,17 @@ mod tests {
}),
..Spec::default()
};
let devidx = DevIndex::new(&spec);
let vm_path = "/dev/null";
let final_path = "/dev/final";
let final_path = "/dev/new";
let res = update_spec_device(&mut spec, &devidx, host_path, vm_path, final_path);
let res = update_spec_devices(
&mut spec,
HashMap::from_iter(vec![(
container_path,
DevUpdate::from_vm_path(vm_path, final_path.to_string()).unwrap(),
)]),
);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
@@ -1144,6 +1198,48 @@ mod tests {
assert_eq!(final_path, specdevices[0].path);
}
#[test]
fn test_update_spec_pci() {
let example_map = [
// Each is a host,guest pair of pci addresses
("0000:1a:01.0", "0000:01:01.0"),
("0000:1b:02.0", "0000:01:02.0"),
// This one has the same host address as guest address
// above, to test that we're not double-translating
("0000:01:01.0", "ffff:02:1f.7"),
];
let mut spec = Spec {
process: Some(Process {
env: vec![
"PCIDEVICE_x=0000:1a:01.0,0000:1b:02.0".to_string(),
"PCIDEVICE_y=0000:01:01.0".to_string(),
"NOTAPCIDEVICE_blah=abcd:ef:01.0".to_string(),
],
..Process::default()
}),
..Spec::default()
};
let pci_fixups = example_map
.iter()
.map(|(h, g)| {
(
pci::Address::from_str(h).unwrap(),
pci::Address::from_str(g).unwrap(),
)
})
.collect();
let res = update_spec_pci(&mut spec, pci_fixups);
assert!(res.is_ok());
let env = &spec.process.as_ref().unwrap().env;
assert_eq!(env[0], "PCIDEVICE_x=0000:01:01.0,0000:01:02.0");
assert_eq!(env[1], "PCIDEVICE_y=ffff:02:1f.7");
assert_eq!(env[2], "NOTAPCIDEVICE_blah=abcd:ef:01.0");
}
#[test]
fn test_pcipath_to_sysfs() {
let testdir = tempdir().expect("failed to create tmpdir");

View File

@@ -20,6 +20,7 @@ extern crate scopeguard;
extern crate slog;
use anyhow::{anyhow, Context, Result};
use clap::{AppSettings, Parser};
use nix::fcntl::OFlag;
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
use nix::unistd::{self, dup, Pid};
@@ -80,10 +81,32 @@ const NAME: &str = "kata-agent";
lazy_static! {
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> = Arc::new(RwLock::new(
AgentConfig::from_cmdline("/proc/cmdline").unwrap()
// Note: We can't do AgentOpts.parse() here to send through the processed arguments to AgentConfig
// clap::Parser::parse() greedily process all command line input including cargo test parameters,
// so should only be used inside main.
AgentConfig::from_cmdline("/proc/cmdline", env::args().collect()).unwrap()
));
}
#[derive(Parser)]
// The default clap version info doesn't match our form, so we need to override it
#[clap(global_setting(AppSettings::DisableVersionFlag))]
struct AgentOpts {
/// Print the version information
#[clap(short, long)]
version: bool,
#[clap(subcommand)]
subcmd: Option<SubCommand>,
/// Specify a custom agent config file
#[clap(short, long)]
config: Option<String>,
}
#[derive(Parser)]
enum SubCommand {
Init {},
}
#[instrument]
fn announce(logger: &Logger, config: &AgentConfig) {
info!(logger, "announce";
@@ -113,10 +136,10 @@ async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool
)?;
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, vsock_port);
socket::bind(listenfd, &addr).unwrap();
socket::listen(listenfd, 1).unwrap();
socket::bind(listenfd, &addr)?;
socket::listen(listenfd, 1)?;
writer = Box::new(util::get_vsock_stream(listenfd).await.unwrap());
writer = Box::new(util::get_vsock_stream(listenfd).await?);
} else {
writer = Box::new(tokio::io::stdout());
}
@@ -255,9 +278,9 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let args: Vec<String> = env::args().collect();
let args = AgentOpts::parse();
if args.len() == 2 && args[1] == "--version" {
if args.version {
println!(
"{} version {} (api version: {}, commit version: {}, type: rust)",
NAME,
@@ -265,11 +288,10 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
version::API_VERSION,
version::VERSION_COMMIT,
);
exit(0);
}
if args.len() == 2 && args[1] == "init" {
if let Some(SubCommand::Init {}) = args.subcmd {
reset_sigpipe();
rustjail::container::init_child();
exit(0);
@@ -326,7 +348,7 @@ async fn start_sandbox(
sandbox.lock().await.sender = Some(tx);
// vsock:///dev/vsock, port
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str())?;
server.start().await?;
rx.await?;

View File

@@ -8,6 +8,7 @@ extern crate procfs;
use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder};
use anyhow::Result;
use slog::warn;
use tracing::instrument;
const NAMESPACE_KATA_AGENT: &str = "kata_agent";
@@ -74,7 +75,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
AGENT_SCRAPE_COUNT.inc();
// update agent process metrics
update_agent_metrics();
update_agent_metrics()?;
// update guest os metrics
update_guest_metrics();
@@ -84,23 +85,26 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
let mut buffer = Vec::new();
let encoder = TextEncoder::new();
encoder.encode(&metric_families, &mut buffer).unwrap();
encoder.encode(&metric_families, &mut buffer)?;
Ok(String::from_utf8(buffer).unwrap())
Ok(String::from_utf8(buffer)?)
}
#[instrument]
fn update_agent_metrics() {
fn update_agent_metrics() -> Result<()> {
let me = procfs::process::Process::myself();
if let Err(err) = me {
error!(sl!(), "failed to create process instance: {:?}", err);
return;
}
let me = match me {
Ok(p) => p,
Err(e) => {
// FIXME: return Ok for all errors?
warn!(sl!(), "failed to create process instance: {:?}", e);
let me = me.unwrap();
return Ok(());
}
};
let tps = procfs::ticks_per_second().unwrap();
let tps = procfs::ticks_per_second()?;
// process total time
AGENT_TOTAL_TIME.set((me.stat.utime + me.stat.stime) as f64 / (tps as f64));
@@ -109,7 +113,7 @@ fn update_agent_metrics() {
AGENT_TOTAL_VM.set(me.stat.vsize as f64);
// Total resident set
let page_size = procfs::page_size().unwrap() as f64;
let page_size = procfs::page_size()? as f64;
AGENT_TOTAL_RSS.set(me.stat.rss as f64 * page_size);
// io
@@ -132,11 +136,11 @@ fn update_agent_metrics() {
}
match me.status() {
Err(err) => {
info!(sl!(), "failed to get process status: {:?}", err);
}
Err(err) => error!(sl!(), "failed to get process status: {:?}", err),
Ok(status) => set_gauge_vec_proc_status(&AGENT_PROC_STATUS, &status),
}
Ok(())
}
#[instrument]

View File

@@ -139,8 +139,8 @@ pub const STORAGE_HANDLER_LIST: &[&str] = &[
#[instrument]
pub fn baremount(
source: &str,
destination: &str,
source: &Path,
destination: &Path,
fs_type: &str,
flags: MsFlags,
options: &str,
@@ -148,11 +148,11 @@ pub fn baremount(
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "baremount"));
if source.is_empty() {
if source.as_os_str().is_empty() {
return Err(anyhow!("need mount source"));
}
if destination.is_empty() {
if destination.as_os_str().is_empty() {
return Err(anyhow!("need mount destination"));
}
@@ -448,16 +448,18 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
let options_vec = options_vec.iter().map(String::as_str).collect();
let (flags, options) = parse_mount_flags_and_options(options_vec);
let source = Path::new(&storage.source);
info!(logger, "mounting storage";
"mount-source:" => storage.source.as_str(),
"mount-destination" => storage.mount_point.as_str(),
"mount-source" => source.display(),
"mount-destination" => mount_path.display(),
"mount-fstype" => storage.fstype.as_str(),
"mount-options" => options.as_str(),
);
baremount(
storage.source.as_str(),
storage.mount_point.as_str(),
source,
mount_path,
storage.fstype.as_str(),
flags,
options.as_str(),
@@ -585,7 +587,10 @@ fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
fs::create_dir_all(Path::new(m.dest)).context("could not create directory")?;
baremount(m.src, m.dest, m.fstype, flags, &options, logger).or_else(|e| {
let source = Path::new(m.src);
let dest = Path::new(m.dest);
baremount(source, dest, m.fstype, flags, &options, logger).or_else(|e| {
if m.src != "dev" {
return Err(e);
}
@@ -628,8 +633,7 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
let file = File::open(mount_file)?;
let reader = BufReader::new(file);
let re = Regex::new(format!("device .+ mounted on {} with fstype (.+)", mount_point).as_str())
.unwrap();
let re = Regex::new(format!("device .+ mounted on {} with fstype (.+)", mount_point).as_str())?;
// Read the file line by line using the lines() iterator from std::io::BufRead.
for (_index, line) in reader.lines().enumerate() {
@@ -707,20 +711,21 @@ pub fn get_cgroup_mounts(
}
}
if fields[0].is_empty() {
let subsystem_name = fields[0];
if subsystem_name.is_empty() {
continue;
}
if fields[0] == "devices" {
if subsystem_name == "devices" {
has_device_cgroup = true;
}
if let Some(value) = CGROUPS.get(&fields[0]) {
let key = CGROUPS.keys().find(|&&f| f == fields[0]).unwrap();
if let Some((key, value)) = CGROUPS.get_key_value(subsystem_name) {
cg_mounts.push(InitMount {
fstype: "cgroup",
src: "cgroup",
dest: *value,
dest: value,
options: vec!["nosuid", "nodev", "noexec", "relatime", key],
});
}
@@ -773,10 +778,9 @@ fn ensure_destination_file_exists(path: &Path) -> Result<()> {
return Err(anyhow!("{:?} exists but is not a regular file", path));
}
// The only way parent() can return None is if the path is /,
// which always exists, so the test above will already have caught
// it, thus the unwrap() is safe
let dir = path.parent().unwrap();
let dir = path
.parent()
.ok_or_else(|| anyhow!("failed to find parent path for {:?}", path))?;
fs::create_dir_all(dir).context(format!("create_dir_all {:?}", dir))?;
@@ -943,14 +947,10 @@ mod tests {
std::fs::create_dir_all(d).expect("failed to created directory");
}
let result = baremount(
&src_filename,
&dest_filename,
d.fs_type,
d.flags,
d.options,
&logger,
);
let src = Path::new(&src_filename);
let dest = Path::new(&dest_filename);
let result = baremount(src, dest, d.fs_type, d.flags, d.options, &logger);
let msg = format!("{}: result: {:?}", msg, result);
@@ -1027,15 +1027,11 @@ mod tests {
.unwrap_or_else(|_| panic!("failed to create directory {}", d));
}
let src = Path::new(mnt_src_filename);
let dest = Path::new(mnt_dest_filename);
// Create an actual mount
let result = baremount(
mnt_src_filename,
mnt_dest_filename,
"bind",
MsFlags::MS_BIND,
"",
&logger,
);
let result = baremount(src, dest, "bind", MsFlags::MS_BIND, "", &logger);
assert!(result.is_ok(), "mount for test setup failed");
let tests = &[

View File

@@ -104,7 +104,10 @@ impl Namespace {
if let Err(err) = || -> Result<()> {
let origin_ns_path = get_current_thread_ns_path(ns_type.get());
File::open(Path::new(&origin_ns_path))?;
let source = Path::new(&origin_ns_path);
let destination = new_ns_path.as_path();
File::open(&source)?;
// Create a new netns on the current thread.
let cf = ns_type.get_flags();
@@ -115,8 +118,6 @@ impl Namespace {
nix::unistd::sethostname(hostname.unwrap())?;
}
// Bind mount the new namespace from the current thread onto the mount point to persist it.
let source: &str = origin_ns_path.as_str();
let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
let mut flags = MsFlags::empty();
@@ -131,7 +132,7 @@ impl Namespace {
baremount(source, destination, "none", flags, "", &logger).map_err(|e| {
anyhow!(
"Failed to mount {} to {} with err:{:?}",
"Failed to mount {:?} to {:?} with err:{:?}",
source,
destination,
e
@@ -250,4 +251,126 @@ mod tests {
assert_eq!("pid", pid.get());
assert_eq!(CloneFlags::CLONE_NEWPID, pid.get_flags());
}
#[test]
fn test_new() {
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
let ns_ipc = Namespace::new(&logger);
assert_eq!(NamespaceType::Ipc, ns_ipc.ns_type);
}
#[test]
fn test_get_ipc() {
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
let ns_ipc = Namespace::new(&logger).get_ipc();
assert_eq!(NamespaceType::Ipc, ns_ipc.ns_type);
}
#[test]
fn test_get_uts_with_hostname() {
let hostname = String::from("a.test.com");
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
let ns_uts = Namespace::new(&logger).get_uts(hostname.as_str());
assert_eq!(NamespaceType::Uts, ns_uts.ns_type);
assert!(ns_uts.hostname.is_some());
}
#[test]
fn test_get_uts() {
let hostname = String::from("");
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
let ns_uts = Namespace::new(&logger).get_uts(hostname.as_str());
assert_eq!(NamespaceType::Uts, ns_uts.ns_type);
assert!(ns_uts.hostname.is_none());
}
#[test]
fn test_get_pid() {
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
let ns_pid = Namespace::new(&logger).get_pid();
assert_eq!(NamespaceType::Pid, ns_pid.ns_type);
}
#[test]
fn test_set_root_dir() {
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
let tmpdir = Builder::new().prefix("pid").tempdir().unwrap();
let ns_root = Namespace::new(&logger).set_root_dir(tmpdir.path().to_str().unwrap());
assert_eq!(NamespaceType::Ipc, ns_root.ns_type);
assert_eq!(ns_root.persistent_ns_dir, tmpdir.path().to_str().unwrap());
}
#[test]
fn test_namespace_type_get() {
#[derive(Debug)]
struct TestData<'a> {
ns_type: NamespaceType,
str: &'a str,
}
let tests = &[
TestData {
ns_type: NamespaceType::Ipc,
str: "ipc",
},
TestData {
ns_type: NamespaceType::Uts,
str: "uts",
},
TestData {
ns_type: NamespaceType::Pid,
str: "pid",
},
];
// Run the tests
for (i, d) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{}]: {:?}", i, d);
assert_eq!(d.str, d.ns_type.get(), "{}", msg)
}
}
#[test]
fn test_namespace_type_get_flags() {
#[derive(Debug)]
struct TestData {
ns_type: NamespaceType,
ns_flag: CloneFlags,
}
let tests = &[
TestData {
ns_type: NamespaceType::Ipc,
ns_flag: CloneFlags::CLONE_NEWIPC,
},
TestData {
ns_type: NamespaceType::Uts,
ns_flag: CloneFlags::CLONE_NEWUTS,
},
TestData {
ns_type: NamespaceType::Pid,
ns_flag: CloneFlags::CLONE_NEWPID,
},
];
// Run the tests
for (i, d) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{}]: {:?}", i, d);
assert_eq!(d.ns_flag, d.ns_type.get_flags(), "{}", msg)
}
}
}

View File

@@ -523,7 +523,7 @@ impl Handle {
.as_ref()
.map(|to| to.address.as_str()) // Extract address field
.and_then(|addr| if addr.is_empty() { None } else { Some(addr) }) // Make sure it's not empty
.ok_or(nix::Error::Sys(nix::errno::Errno::EINVAL))?;
.ok_or(anyhow!(nix::Error::EINVAL))?;
let ip = IpAddr::from_str(ip_address)
.map_err(|e| anyhow!("Failed to parse IP {}: {:?}", ip_address, e))?;
@@ -612,12 +612,7 @@ fn parse_mac_address(addr: &str) -> Result<[u8; 6]> {
// Parse single Mac address block
let mut parse_next = || -> Result<u8> {
let v = u8::from_str_radix(
split
.next()
.ok_or(nix::Error::Sys(nix::errno::Errno::EINVAL))?,
16,
)?;
let v = u8::from_str_radix(split.next().ok_or(anyhow!(nix::Error::EINVAL))?, 16)?;
Ok(v)
};

View File

@@ -5,30 +5,22 @@
use anyhow::{anyhow, Result};
use nix::mount::{self, MsFlags};
use protocols::types::{Interface, Route};
use slog::Logger;
use std::collections::HashMap;
use std::fs;
const KATA_GUEST_SANDBOX_DNS_FILE: &str = "/run/kata-containers/sandbox/resolv.conf";
const GUEST_DNS_FILE: &str = "/etc/resolv.conf";
// Network fully describes a sandbox network with its interfaces, routes and dns
// Network describes a sandbox network, includings its dns
// related information.
#[derive(Debug, Default)]
pub struct Network {
ifaces: HashMap<String, Interface>,
routes: Vec<Route>,
dns: Vec<String>,
}
impl Network {
pub fn new() -> Network {
Network {
ifaces: HashMap::new(),
routes: Vec::new(),
dns: Vec::new(),
}
Network { dns: Vec::new() }
}
pub fn set_dns(&mut self, dns: String) {

View File

@@ -20,7 +20,7 @@ const FUNCTION_MAX: u8 = (1 << FUNCTION_BITS) - 1;
// Represents a PCI function's slot (a.k.a. device) and function
// numbers, giving its location on a single logical bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct SlotFn(u8);
impl SlotFn {
@@ -94,7 +94,7 @@ impl fmt::Display for SlotFn {
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct Address {
domain: u16,
bus: u8,

View File

@@ -14,7 +14,7 @@ use std::path::Path;
use std::sync::Arc;
use ttrpc::{
self,
error::get_rpc_status as ttrpc_error,
error::get_rpc_status,
r#async::{Server as TtrpcServer, TtrpcContext},
};
@@ -86,6 +86,13 @@ macro_rules! sl {
};
}
// Convenience macro to wrap an error and response to ttrpc client
macro_rules! ttrpc_error {
($code:path, $err:expr $(,)?) => {
get_rpc_status($code, format!("{:?}", $err))
};
}
macro_rules! is_allowed {
($req:ident) => {
if !AGENT_CONFIG
@@ -93,7 +100,7 @@ macro_rules! is_allowed {
.await
.is_allowed_endpoint($req.descriptor().name())
{
return Err(ttrpc_error(
return Err(ttrpc_error!(
ttrpc::Code::UNIMPLEMENTED,
format!("{} is blocked", $req.descriptor().name()),
));
@@ -111,11 +118,18 @@ pub struct AgentService {
// ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$
//
fn verify_cid(id: &str) -> Result<()> {
let valid = id.len() > 1
&& id.chars().next().unwrap().is_alphanumeric()
&& id
.chars()
.all(|c| (c.is_alphanumeric() || ['.', '-', '_'].contains(&c)));
let mut chars = id.chars();
let valid = match chars.next() {
Some(first)
if first.is_alphanumeric()
&& id.len() > 1
&& chars.all(|c| c.is_alphanumeric() || ['.', '-', '_'].contains(&c)) =>
{
true
}
_ => false,
};
match valid {
true => Ok(()),
@@ -143,7 +157,7 @@ impl AgentService {
Some(spec) => rustjail::grpc_to_oci(spec),
None => {
error!(sl!(), "no oci spec in the create container request!");
return Err(anyhow!(nix::Error::from_errno(nix::errno::Errno::EINVAL)));
return Err(anyhow!(nix::Error::EINVAL));
}
};
@@ -186,7 +200,7 @@ impl AgentService {
update_device_cgroup(&mut oci)?;
// Append guest hooks
append_guest_hooks(&s, &mut oci);
append_guest_hooks(&s, &mut oci)?;
// write spec to bundle path, hooks might
// read ocispec
@@ -208,21 +222,14 @@ impl AgentService {
LinuxContainer::new(cid.as_str(), CONTAINER_BASE, opts, &sl!())?;
let pipe_size = AGENT_CONFIG.read().await.container_pipe_size;
let p = if oci.process.is_some() {
Process::new(
&sl!(),
oci.process.as_ref().unwrap(),
cid.as_str(),
true,
pipe_size,
)?
let p = if let Some(p) = oci.process {
Process::new(&sl!(), &p, cid.as_str(), true, pipe_size)?
} else {
info!(sl!(), "no process configurations!");
return Err(anyhow!(nix::Error::from_errno(nix::errno::Errno::EINVAL)));
return Err(anyhow!(nix::Error::EINVAL));
};
ctr.start(p).await?;
s.update_shared_pidns(&ctr)?;
s.add_container(ctr);
info!(sl!(), "created container!");
@@ -244,11 +251,17 @@ impl AgentService {
ctr.exec()?;
if sid == cid {
return Ok(());
}
// start oom event loop
if sid != cid && ctr.cgroup_manager.is_some() {
let cg_path = ctr.cgroup_manager.as_ref().unwrap().get_cg_path("memory");
if cg_path.is_some() {
let rx = notifier::notify_oom(cid.as_str(), cg_path.unwrap()).await?;
if let Some(ref ctr) = ctr.cgroup_manager {
let cg_path = ctr.get_cg_path("memory");
if let Some(cg_path) = cg_path {
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
s.run_oom_event_monitor(rx, cid.clone()).await;
}
}
@@ -321,13 +334,11 @@ impl AgentService {
.await
.is_err()
{
return Err(anyhow!(nix::Error::from_errno(nix::errno::Errno::ETIME)));
return Err(anyhow!(nix::Error::ETIME));
}
if handle.await.is_err() {
return Err(anyhow!(nix::Error::from_errno(
nix::errno::Errno::UnknownErrno
)));
return Err(anyhow!(nix::Error::UnknownErrno));
}
let s = self.sandbox.clone();
@@ -348,14 +359,13 @@ impl AgentService {
let s = self.sandbox.clone();
let mut sandbox = s.lock().await;
let process = if req.process.is_some() {
req.process.as_ref().unwrap()
} else {
return Err(anyhow!(nix::Error::from_errno(nix::errno::Errno::EINVAL)));
};
let process = req
.process
.into_option()
.ok_or_else(|| anyhow!(nix::Error::EINVAL))?;
let pipe_size = AGENT_CONFIG.read().await.container_pipe_size;
let ocip = rustjail::process_grpc_to_oci(process);
let ocip = rustjail::process_grpc_to_oci(&process);
let p = Process::new(&sl!(), &ocip, exec_id.as_str(), false, pipe_size)?;
let ctr = sandbox
@@ -373,7 +383,6 @@ impl AgentService {
let eid = req.exec_id.clone();
let s = self.sandbox.clone();
let mut sandbox = s.lock().await;
let mut init = false;
info!(
sl!(),
@@ -382,13 +391,14 @@ impl AgentService {
"exec-id" => eid.clone(),
);
if eid.is_empty() {
init = true;
}
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), init)?;
let mut signal = Signal::try_from(req.signal as i32).unwrap();
let mut signal = Signal::try_from(req.signal as i32).map_err(|e| {
anyhow!(e).context(format!(
"failed to convert {:?} to signal (container-id: {}, exec-id: {})",
req.signal, cid, eid
))
})?;
// For container initProcess, if it hasn't installed handler for "SIGTERM" signal,
// it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal
@@ -424,7 +434,7 @@ impl AgentService {
let exit_rx = {
let mut sandbox = s.lock().await;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false)?;
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
p.exit_watchers.push(exit_send);
pid = p.pid;
@@ -447,7 +457,11 @@ impl AgentService {
Some(p) => p,
None => {
// Lost race, pick up exit code from channel
resp.status = exit_recv.recv().await.unwrap();
resp.status = exit_recv
.recv()
.await
.ok_or_else(|| anyhow!("Failed to receive exit code"))?;
return Ok(resp);
}
};
@@ -478,7 +492,7 @@ impl AgentService {
let writer = {
let s = self.sandbox.clone();
let mut sandbox = s.lock().await;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false)?;
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
// use ptmx io
if p.term_master.is_some() {
@@ -489,7 +503,7 @@ impl AgentService {
}
};
let writer = writer.unwrap();
let writer = writer.ok_or_else(|| anyhow!("cannot get writer"))?;
writer.lock().await.write_all(req.data.as_slice()).await?;
let mut resp = WriteStreamResponse::new();
@@ -511,7 +525,7 @@ impl AgentService {
let s = self.sandbox.clone();
let mut sandbox = s.lock().await;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false)?;
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
if p.term_master.is_some() {
term_exit_notifier = p.term_exit_notifier.clone();
@@ -528,10 +542,10 @@ impl AgentService {
};
if reader.is_none() {
return Err(anyhow!(nix::Error::from_errno(nix::errno::Errno::EINVAL)));
return Err(anyhow!(nix::Error::EINVAL));
}
let reader = reader.unwrap();
let reader = reader.ok_or_else(|| anyhow!("cannot get stream reader"))?;
tokio::select! {
_ = term_exit_notifier.notified() => {
@@ -558,7 +572,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
trace_rpc_call!(ctx, "create_container", req);
is_allowed!(req);
match self.do_create_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
Ok(_) => Ok(Empty::new()),
}
}
@@ -571,7 +585,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
trace_rpc_call!(ctx, "start_container", req);
is_allowed!(req);
match self.do_start_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
Ok(_) => Ok(Empty::new()),
}
}
@@ -585,7 +599,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
match self.do_remove_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
Ok(_) => Ok(Empty::new()),
}
}
@@ -598,7 +612,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
trace_rpc_call!(ctx, "exec_process", req);
is_allowed!(req);
match self.do_exec_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
Ok(_) => Ok(Empty::new()),
}
}
@@ -611,7 +625,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
trace_rpc_call!(ctx, "signal_process", req);
is_allowed!(req);
match self.do_signal_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
Ok(_) => Ok(Empty::new()),
}
}
@@ -625,7 +639,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
self.do_wait_process(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))
}
async fn update_container(
@@ -642,7 +656,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut sandbox = s.lock().await;
let ctr = sandbox.get_container(&cid).ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"invalid container id".to_string(),
)
@@ -650,11 +664,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let resp = Empty::new();
if res.is_some() {
let oci_res = rustjail::resources_grpc_to_oci(&res.unwrap());
if let Some(res) = res.as_ref() {
let oci_res = rustjail::resources_grpc_to_oci(res);
match ctr.set(oci_res) {
Err(e) => {
return Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()));
return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e));
}
Ok(_) => return Ok(resp),
@@ -676,14 +690,14 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut sandbox = s.lock().await;
let ctr = sandbox.get_container(&cid).ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"invalid container id".to_string(),
)
})?;
ctr.stats()
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))
}
async fn pause_container(
@@ -698,14 +712,14 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut sandbox = s.lock().await;
let ctr = sandbox.get_container(cid).ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"invalid container id".to_string(),
)
})?;
ctr.pause()
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -722,14 +736,14 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut sandbox = s.lock().await;
let ctr = sandbox.get_container(cid).ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"invalid container id".to_string(),
)
})?;
ctr.resume()
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -742,7 +756,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
self.do_write_stream(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))
}
async fn read_stdout(
@@ -753,7 +767,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
self.do_read_stream(req, true)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))
}
async fn read_stderr(
@@ -764,7 +778,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
self.do_read_stream(req, false)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))
}
async fn close_stdin(
@@ -780,12 +794,14 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false).map_err(|e| {
ttrpc_error(
ttrpc::Code::INVALID_ARGUMENT,
format!("invalid argument: {:?}", e),
)
})?;
let p = sandbox
.find_container_process(cid.as_str(), eid.as_str())
.map_err(|e| {
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
format!("invalid argument: {:?}", e),
)
})?;
p.close_stdin();
@@ -804,30 +820,31 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let eid = req.exec_id.clone();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false).map_err(|e| {
ttrpc_error(
ttrpc::Code::UNAVAILABLE,
format!("invalid argument: {:?}", e),
)
})?;
let p = sandbox
.find_container_process(cid.as_str(), eid.as_str())
.map_err(|e| {
ttrpc_error!(
ttrpc::Code::UNAVAILABLE,
format!("invalid argument: {:?}", e),
)
})?;
if p.term_master.is_none() {
return Err(ttrpc_error(ttrpc::Code::UNAVAILABLE, "no tty".to_string()));
}
if let Some(fd) = p.term_master {
unsafe {
let win = winsize {
ws_row: req.row as c_ushort,
ws_col: req.column as c_ushort,
ws_xpixel: 0,
ws_ypixel: 0,
};
let fd = p.term_master.unwrap();
unsafe {
let win = winsize {
ws_row: req.row as c_ushort,
ws_col: req.column as c_ushort,
ws_xpixel: 0,
ws_ypixel: 0,
};
let err = libc::ioctl(fd, TIOCSWINSZ, &win);
Errno::result(err)
.map(drop)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, format!("ioctl error: {:?}", e)))?;
let err = libc::ioctl(fd, TIOCSWINSZ, &win);
Errno::result(err).map(drop).map_err(|e| {
ttrpc_error!(ttrpc::Code::INTERNAL, format!("ioctl error: {:?}", e))
})?;
}
} else {
return Err(ttrpc_error!(ttrpc::Code::UNAVAILABLE, "no tty".to_string()));
}
Ok(Empty::new())
@@ -842,7 +859,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
let interface = req.interface.into_option().ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"empty update interface request".to_string(),
)
@@ -855,7 +872,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.update_interface(&interface)
.await
.map_err(|e| {
ttrpc_error(ttrpc::Code::INTERNAL, format!("update interface: {:?}", e))
ttrpc_error!(ttrpc::Code::INTERNAL, format!("update interface: {:?}", e))
})?;
Ok(interface)
@@ -874,7 +891,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.into_option()
.map(|r| r.Routes.into_vec())
.ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"empty update routes request".to_string(),
)
@@ -883,14 +900,14 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut sandbox = self.sandbox.lock().await;
sandbox.rtnl.update_routes(new_routes).await.map_err(|e| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INTERNAL,
format!("Failed to update routes: {:?}", e),
)
})?;
let list = sandbox.rtnl.list_routes().await.map_err(|e| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INTERNAL,
format!("Failed to list routes after update: {:?}", e),
)
@@ -918,7 +935,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.list_interfaces()
.await
.map_err(|e| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INTERNAL,
format!("Failed to list interfaces: {:?}", e),
)
@@ -945,7 +962,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.rtnl
.list_routes()
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, format!("list routes: {:?}", e)))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, format!("list routes: {:?}", e)))?;
Ok(protocols::agent::Routes {
Routes: RepeatedField::from_vec(list),
@@ -953,25 +970,6 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
})
}
async fn start_tracing(
&self,
_ctx: &TtrpcContext,
req: protocols::agent::StartTracingRequest,
) -> ttrpc::Result<Empty> {
info!(sl!(), "start_tracing {:?}", req);
is_allowed!(req);
Ok(Empty::new())
}
async fn stop_tracing(
&self,
_ctx: &TtrpcContext,
req: protocols::agent::StopTracingRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
Ok(Empty::new())
}
async fn create_sandbox(
&self,
ctx: &TtrpcContext,
@@ -1004,13 +1002,12 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
}
for m in req.kernel_modules.iter() {
load_kernel_module(m)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
load_kernel_module(m).map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
}
s.setup_shared_namespaces()
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
}
match add_storages(sl!(), req.storages.to_vec(), self.sandbox.clone(), None).await {
@@ -1019,7 +1016,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut s = sandbox.lock().await;
s.mounts = m
}
Err(e) => return Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
};
match setup_guest_dns(sl!(), req.dns.to_vec()) {
@@ -1032,7 +1029,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.iter()
.map(|dns| s.network.set_dns(dns.to_string()));
}
Err(e) => return Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
};
Ok(Empty::new())
@@ -1050,12 +1047,25 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
let mut sandbox = s.lock().await;
// destroy all containers, clean up, notify agent to exit
// etc.
sandbox.destroy().await.unwrap();
sandbox
.destroy()
.await
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
// Close get_oom_event connection,
// otherwise it will block the shutdown of ttrpc.
sandbox.event_tx.take();
sandbox.sender.take().unwrap().send(1).unwrap();
sandbox
.sender
.take()
.ok_or_else(|| {
ttrpc_error!(
ttrpc::Code::INTERNAL,
"failed to get sandbox sender channel".to_string(),
)
})?
.send(1)
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1073,7 +1083,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.into_option()
.map(|n| n.ARPNeighbors.into_vec())
.ok_or_else(|| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INVALID_ARGUMENT,
"empty add arp neighbours request".to_string(),
)
@@ -1086,7 +1096,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.add_arp_neighbors(neighs)
.await
.map_err(|e| {
ttrpc_error(
ttrpc_error!(
ttrpc::Code::INTERNAL,
format!("Failed to add ARP neighbours: {:?}", e),
)
@@ -1107,7 +1117,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
sandbox
.online_cpu_memory(&req)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1121,7 +1131,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
random::reseed_rng(req.data.as_slice())
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1144,7 +1154,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
}
Err(e) => {
info!(sl!(), "fail to get memory info!");
return Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()));
return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e));
}
}
@@ -1164,7 +1174,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
do_mem_hotplug_by_probe(&req.memHotplugProbeAddr)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1178,7 +1188,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
do_set_guest_date_time(req.Sec, req.Usec)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1191,7 +1201,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
trace_rpc_call!(ctx, "copy_file", req);
is_allowed!(req);
do_copy_file(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
do_copy_file(&req).map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1205,7 +1215,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
is_allowed!(req);
match get_metrics(&req) {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)),
Ok(s) => {
let mut metrics = Metrics::new();
metrics.set_metrics(s);
@@ -1236,7 +1246,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
return Ok(resp);
}
Err(ttrpc_error(ttrpc::Code::INTERNAL, ""))
Err(ttrpc_error!(ttrpc::Code::INTERNAL, ""))
}
async fn add_swap(
@@ -1249,7 +1259,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
do_add_swap(&self.sandbox, &req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
.map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?;
Ok(Empty::new())
}
@@ -1314,16 +1324,9 @@ fn get_memory_info(block_size: bool, hotplug: bool) -> Result<(u64, bool)> {
match stat::stat(SYSFS_MEMORY_HOTPLUG_PROBE_PATH) {
Ok(_) => plug = true,
Err(e) => {
info!(
sl!(),
"hotplug memory error: {}",
e.as_errno().unwrap().desc()
);
info!(sl!(), "hotplug memory error: {:?}", e);
match e {
nix::Error::Sys(errno) => match errno {
Errno::ENOENT => plug = false,
_ => return Err(anyhow!(e)),
},
nix::Error::ENOENT => plug = false,
_ => return Err(anyhow!(e)),
}
}
@@ -1374,27 +1377,7 @@ async fn read_stream(reader: Arc<Mutex<ReadHalf<PipeStream>>>, l: usize) -> Resu
Ok(content)
}
fn find_process<'a>(
sandbox: &'a mut Sandbox,
cid: &'a str,
eid: &'a str,
init: bool,
) -> Result<&'a mut Process> {
let ctr = sandbox
.get_container(cid)
.ok_or_else(|| anyhow!("Invalid container id"))?;
if init || eid.is_empty() {
return ctr
.processes
.get_mut(&ctr.init_process_pid)
.ok_or_else(|| anyhow!("cannot find init process!"));
}
ctr.get_process(eid).map_err(|_| anyhow!("Invalid exec id"))
}
pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str) -> TtrpcServer {
pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str) -> Result<TtrpcServer> {
let agent_service = Box::new(AgentService { sandbox: s })
as Box<dyn protocols::agent_ttrpc::AgentService + Send + Sync>;
@@ -1409,14 +1392,13 @@ pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str) -> TtrpcServer {
let hservice = protocols::health_ttrpc::create_health(health_worker);
let server = TtrpcServer::new()
.bind(server_address)
.unwrap()
.bind(server_address)?
.register_service(aservice)
.register_service(hservice);
info!(sl!(), "ttRPC server started"; "address" => server_address);
server
Ok(server)
}
// This function updates the container namespaces configuration based on the
@@ -1461,24 +1443,28 @@ fn update_container_namespaces(
// the create_sandbox request or create_container request.
// Else set this to empty string so that a new pid namespace is
// created for the container.
if sandbox_pidns && sandbox.sandbox_pidns.is_some() {
pid_ns.path = String::from(sandbox.sandbox_pidns.as_ref().unwrap().path.as_str());
if sandbox_pidns {
if let Some(ref pidns) = &sandbox.sandbox_pidns {
pid_ns.path = String::from(pidns.path.as_str());
} else {
return Err(anyhow!("failed to get sandbox pidns"));
}
}
linux.namespaces.push(pid_ns);
Ok(())
}
fn append_guest_hooks(s: &Sandbox, oci: &mut Spec) {
if s.hooks.is_none() {
return;
fn append_guest_hooks(s: &Sandbox, oci: &mut Spec) -> Result<()> {
if let Some(ref guest_hooks) = s.hooks {
let mut hooks = oci.hooks.take().unwrap_or_default();
hooks.prestart.append(&mut guest_hooks.prestart.clone());
hooks.poststart.append(&mut guest_hooks.poststart.clone());
hooks.poststop.append(&mut guest_hooks.poststop.clone());
oci.hooks = Some(hooks);
}
let guest_hooks = s.hooks.as_ref().unwrap();
let mut hooks = oci.hooks.take().unwrap_or_default();
hooks.prestart.append(&mut guest_hooks.prestart.clone());
hooks.poststart.append(&mut guest_hooks.poststart.clone());
hooks.poststop.append(&mut guest_hooks.poststop.clone());
oci.hooks = Some(hooks);
Ok(())
}
// Check is the container process installed the
@@ -1557,7 +1543,7 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
let path = PathBuf::from(req.path.as_str());
if !path.starts_with(CONTAINER_BASE) {
return Err(nix::Error::Sys(Errno::EINVAL).into());
return Err(anyhow!(nix::Error::EINVAL));
}
let parent = path.parent();
@@ -1568,7 +1554,7 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
PathBuf::from("/")
};
fs::create_dir_all(dir.to_str().unwrap()).or_else(|e| {
fs::create_dir_all(&dir).or_else(|e| {
if e.kind() != std::io::ErrorKind::AlreadyExists {
return Err(e);
}
@@ -1576,10 +1562,7 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
Ok(())
})?;
std::fs::set_permissions(
dir.to_str().unwrap(),
std::fs::Permissions::from_mode(req.dir_mode),
)?;
std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?;
let mut tmpfile = path.clone();
tmpfile.set_extension("tmp");
@@ -1588,10 +1571,10 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
.write(true)
.create(true)
.truncate(false)
.open(tmpfile.to_str().unwrap())?;
.open(&tmpfile)?;
file.write_all_at(req.data.as_slice(), req.offset as u64)?;
let st = stat::stat(tmpfile.to_str().unwrap())?;
let st = stat::stat(&tmpfile)?;
if st.st_size != req.file_size {
return Ok(());
@@ -1600,7 +1583,7 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
file.set_permissions(std::fs::Permissions::from_mode(req.file_mode))?;
unistd::chown(
tmpfile.to_str().unwrap(),
&tmpfile,
Some(Uid::from_raw(req.uid as u32)),
Some(Gid::from_raw(req.gid as u32)),
)?;
@@ -1637,10 +1620,13 @@ async fn do_add_swap(sandbox: &Arc<Mutex<Sandbox>>, req: &AddSwapRequest) -> Res
// - container rootfs bind mounted at /<CONTAINER_BASE>/<cid>/rootfs
// - modify container spec root to point to /<CONTAINER_BASE>/<cid>/rootfs
fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
if spec.root.is_none() {
return Err(nix::Error::Sys(Errno::EINVAL).into());
}
let spec_root = spec.root.as_ref().unwrap();
let spec_root = if let Some(sr) = &spec.root {
sr
} else {
return Err(anyhow!(nix::Error::EINVAL));
};
let spec_root_path = Path::new(&spec_root.path);
let bundle_path = Path::new(CONTAINER_BASE).join(cid);
let config_path = bundle_path.join("config.json");
@@ -1648,22 +1634,36 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
fs::create_dir_all(&rootfs_path)?;
baremount(
&spec_root.path,
rootfs_path.to_str().unwrap(),
spec_root_path,
&rootfs_path,
"bind",
MsFlags::MS_BIND,
"",
&sl!(),
)?;
let rootfs_path_name = rootfs_path
.to_str()
.ok_or_else(|| anyhow!("failed to convert rootfs to unicode"))?
.to_string();
spec.root = Some(Root {
path: rootfs_path.to_str().unwrap().to_owned(),
path: rootfs_path_name,
readonly: spec_root.readonly,
});
let _ = spec.save(config_path.to_str().unwrap());
let _ = spec.save(
config_path
.to_str()
.ok_or_else(|| anyhow!("cannot convert path to unicode"))?,
);
let olddir = unistd::getcwd().context("cannot getcwd")?;
unistd::chdir(bundle_path.to_str().unwrap())?;
unistd::chdir(
bundle_path
.to_str()
.ok_or_else(|| anyhow!("cannot convert bundle path to unicode"))?,
)?;
Ok(olddir)
}
@@ -1696,8 +1696,8 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> {
match status.code() {
Some(code) => {
let std_out: String = String::from_utf8(output.stdout).unwrap();
let std_err: String = String::from_utf8(output.stderr).unwrap();
let std_out = String::from_utf8_lossy(&output.stdout);
let std_err = String::from_utf8_lossy(&output.stderr);
let msg = format!(
"load_kernel_module return code: {} stdout:{} stderr:{}",
code, std_out, std_err
@@ -1761,7 +1761,7 @@ mod tests {
let mut oci = Spec {
..Default::default()
};
append_guest_hooks(&s, &mut oci);
append_guest_hooks(&s, &mut oci).unwrap();
assert_eq!(s.hooks, oci.hooks);
}

View File

@@ -226,6 +226,21 @@ impl Sandbox {
None
}
pub fn find_container_process(&mut self, cid: &str, eid: &str) -> Result<&mut Process> {
let ctr = self
.get_container(cid)
.ok_or_else(|| anyhow!("Invalid container id"))?;
if eid.is_empty() {
return ctr
.processes
.get_mut(&ctr.init_process_pid)
.ok_or_else(|| anyhow!("cannot find init process!"));
}
ctr.get_process(eid).map_err(|_| anyhow!("Invalid exec id"))
}
#[instrument]
pub async fn destroy(&mut self) -> Result<()> {
for ctr in self.containers.values_mut() {
@@ -450,18 +465,23 @@ fn online_memory(logger: &Logger) -> Result<()> {
mod tests {
use super::Sandbox;
use crate::{mount::baremount, skip_if_not_root};
use anyhow::Error;
use anyhow::{anyhow, Error};
use nix::mount::MsFlags;
use oci::{Linux, Root, Spec};
use rustjail::container::LinuxContainer;
use rustjail::process::Process;
use rustjail::specconv::CreateOpts;
use slog::Logger;
use std::fs::{self, File};
use std::os::unix::fs::PermissionsExt;
use tempfile::Builder;
use std::path::Path;
use tempfile::{tempdir, Builder, TempDir};
fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> {
baremount(src, dst, "bind", MsFlags::MS_BIND, "", logger)
let src_path = Path::new(src);
let dst_path = Path::new(dst);
baremount(src_path, dst_path, "bind", MsFlags::MS_BIND, "", logger)
}
use serial_test::serial;
@@ -684,23 +704,31 @@ mod tests {
}
}
fn create_linuxcontainer() -> LinuxContainer {
LinuxContainer::new(
"some_id",
"/run/agent",
create_dummy_opts(),
&slog_scope::logger(),
fn create_linuxcontainer() -> (LinuxContainer, TempDir) {
// Create a temporal directory
let dir = tempdir()
.map_err(|e| anyhow!(e).context("tempdir failed"))
.unwrap();
// Create a new container
(
LinuxContainer::new(
"some_id",
dir.path().join("rootfs").to_str().unwrap(),
create_dummy_opts(),
&slog_scope::logger(),
)
.unwrap(),
dir,
)
.unwrap()
}
#[tokio::test]
#[serial]
async fn get_container_entry_exist() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let linux_container = create_linuxcontainer();
let (linux_container, _root) = create_linuxcontainer();
s.containers
.insert("testContainerID".to_string(), linux_container);
@@ -721,10 +749,9 @@ mod tests {
#[tokio::test]
#[serial]
async fn add_and_get_container() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let linux_container = create_linuxcontainer();
let (linux_container, _root) = create_linuxcontainer();
s.add_container(linux_container);
assert!(s.get_container("some_id").is_some());
@@ -733,12 +760,11 @@ mod tests {
#[tokio::test]
#[serial]
async fn update_shared_pidns() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let test_pid = 9999;
let mut linux_container = create_linuxcontainer();
let (mut linux_container, _root) = create_linuxcontainer();
linux_container.init_process_pid = test_pid;
s.update_shared_pidns(&linux_container).unwrap();
@@ -781,4 +807,49 @@ mod tests {
let ret = s.destroy().await;
assert!(ret.is_ok());
}
#[tokio::test]
async fn test_find_container_process() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let cid = "container-123";
let (mut linux_container, _root) = create_linuxcontainer();
linux_container.init_process_pid = 1;
linux_container.id = cid.to_string();
// add init process
linux_container.processes.insert(
1,
Process::new(&logger, &oci::Process::default(), "1", true, 1).unwrap(),
);
// add exec process
linux_container.processes.insert(
123,
Process::new(&logger, &oci::Process::default(), "exec-123", false, 1).unwrap(),
);
s.add_container(linux_container);
// empty exec-id will return init process
let p = s.find_container_process(cid, "");
assert!(p.is_ok(), "Expecting Ok, Got {:?}", p);
let p = p.unwrap();
assert_eq!("1", p.exec_id, "exec_id should be 1");
assert!(p.init, "init flag should be true");
// get exist exec-id will return the exec process
let p = s.find_container_process(cid, "exec-123");
assert!(p.is_ok(), "Expecting Ok, Got {:?}", p);
let p = p.unwrap();
assert_eq!("exec-123", p.exec_id, "exec_id should be exec-123");
assert!(!p.init, "init flag should be false");
// get not exist exec-id will return error
let p = s.find_container_process(cid, "exec-456");
assert!(p.is_err(), "Expecting Error, Got {:?}", p);
// container does not exist
let p = s.find_container_process("not-exist-cid", "");
assert!(p.is_err(), "Expecting Error, Got {:?}", p);
}
}

View File

@@ -11,7 +11,6 @@ use slog::Logger;
use anyhow::{anyhow, Result};
use netlink_sys::{protocols, SocketAddr, TokioSocket};
use nix::errno::Errno;
use std::fmt::Debug;
use std::os::unix::io::FromRawFd;
use std::sync::Arc;
@@ -203,7 +202,7 @@ pub async fn watch_uevents(
Ok((buf, addr)) => {
if addr.port_number() != 0 {
// not our netlink message
let err_msg = format!("{:?}", nix::Error::Sys(Errno::EBADMSG));
let err_msg = format!("{:?}", nix::Error::EBADMSG);
error!(logger, "receive uevent message failed"; "error" => err_msg);
continue;
}
@@ -240,7 +239,6 @@ pub(crate) fn spawn_test_watcher(sandbox: Arc<Mutex<Sandbox>>, uev: Uevent) {
if matcher.is_match(&uev) {
let (_, sender) = watch.take().unwrap();
let _ = sender.send(uev.clone());
return;
}
}
});

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
use anyhow::{anyhow, Result};
use futures::StreamExt;
use std::io;
use std::io::ErrorKind;
@@ -64,8 +64,12 @@ pub fn get_vsock_incoming(fd: RawFd) -> Incoming {
#[instrument]
pub async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
let stream = get_vsock_incoming(fd).next().await.unwrap()?;
Ok(stream)
let stream = get_vsock_incoming(fd)
.next()
.await
.ok_or_else(|| anyhow!("cannot handle incoming vsock connection"))?;
Ok(stream?)
}
#[cfg(test)]
@@ -124,7 +128,9 @@ mod tests {
let mut vec_locked = vec_ref.lock();
let v = vec_locked.as_deref_mut().unwrap();
let v = vec_locked
.as_deref_mut()
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?;
std::io::Write::flush(v)
}

View File

@@ -367,8 +367,8 @@ impl SandboxStorages {
}
match baremount(
entry.source_mount_point.to_str().unwrap(),
entry.target_mount_point.to_str().unwrap(),
entry.source_mount_point.as_path(),
entry.target_mount_point.as_path(),
"bind",
MsFlags::MS_BIND,
"bind",
@@ -478,8 +478,8 @@ impl BindWatcher {
fs::create_dir_all(WATCH_MOUNT_POINT_PATH).await?;
baremount(
"tmpfs",
WATCH_MOUNT_POINT_PATH,
Path::new("tmpfs"),
Path::new(WATCH_MOUNT_POINT_PATH),
"tmpfs",
MsFlags::empty(),
"",

View File

@@ -7,7 +7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nix = "0.21.0"
nix = "0.23.0"
libc = "0.2.94"
thiserror = "1.0.26"
opentelemetry = { version = "0.14.0", features=["serialize"] }

321
src/libs/logging/Cargo.lock generated Normal file
View File

@@ -0,0 +1,321 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "arc-swap"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "itoa"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
[[package]]
name = "logging"
version = "0.1.0"
dependencies = [
"serde_json",
"slog",
"slog-async",
"slog-json",
"slog-scope",
"tempfile",
]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "ppv-lite86"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "rand"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_hc",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_hc"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
dependencies = [
"rand_core",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "ryu"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "serde"
version = "1.0.131"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1"
[[package]]
name = "serde_json"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "slog"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
[[package]]
name = "slog-async"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe"
dependencies = [
"crossbeam-channel",
"slog",
"take_mut",
"thread_local",
]
[[package]]
name = "slog-json"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52e9b96fb6b5e80e371423b4aca6656eb537661ce8f82c2697e619f8ca85d043"
dependencies = [
"chrono",
"serde",
"serde_json",
"slog",
]
[[package]]
name = "slog-scope"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
dependencies = [
"arc-swap",
"lazy_static",
"slog",
]
[[package]]
name = "take_mut"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
[[package]]
name = "tempfile"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22"
dependencies = [
"cfg-if",
"libc",
"rand",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]]
name = "thread_local"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

18
src/libs/logging/Makefile Normal file
View File

@@ -0,0 +1,18 @@
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# It is not necessary to have a build target as this crate is built
# automatically by the consumers of it.
#
# However, it is essential that the crate be tested.
default: test
# It is essential to run these tests using *both* build profiles.
# See the `test_logger_levels()` test for further information.
test:
@echo "INFO: testing log levels for development build"
@cargo test
@echo "INFO: testing log levels for release build"
@cargo test --release

View File

@@ -20,6 +20,8 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[
("critical", slog::Level::Critical),
];
const DEFAULT_SUBSYSTEM: &str = "root";
// XXX: 'writer' param used to make testing possible.
pub fn create_logger<W>(
name: &str,
@@ -50,7 +52,7 @@ where
let logger = slog::Logger::root(
async_drain.fuse(),
o!("version" => env!("CARGO_PKG_VERSION"),
"subsystem" => "root",
"subsystem" => DEFAULT_SUBSYSTEM,
"pid" => process::id().to_string(),
"name" => name.to_string(),
"source" => source.to_string()),
@@ -216,8 +218,8 @@ where
#[cfg(test)]
mod tests {
use super::*;
use serde_json::Value;
use slog::info;
use serde_json::{json, Value};
use slog::{crit, debug, error, info, warn, Logger};
use std::io::prelude::*;
use tempfile::NamedTempFile;
@@ -295,15 +297,15 @@ mod tests {
let result_level = result.unwrap();
let expected_level = d.result.unwrap();
assert!(result_level == expected_level, msg);
assert!(result_level == expected_level, "{}", msg);
continue;
} else {
assert!(result.is_err(), msg);
assert!(result.is_err(), "{}", msg);
}
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
let actual_error = format!("{}", result.unwrap_err());
assert!(actual_error == expected_error, msg);
let expected_error = d.result.as_ref().unwrap_err();
let actual_error = result.unwrap_err();
assert!(&actual_error == expected_error, "{}", msg);
}
}
@@ -350,13 +352,13 @@ mod tests {
let msg = format!("{}, result: {:?}", msg, result);
if d.result.is_ok() {
assert!(result == d.result, msg);
assert!(result == d.result, "{}", msg);
continue;
}
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
let actual_error = format!("{}", result.unwrap_err());
assert!(actual_error == expected_error, msg);
let expected_error = d.result.as_ref().unwrap_err();
let actual_error = result.unwrap_err();
assert!(&actual_error == expected_error, "{}", msg);
}
}
@@ -376,14 +378,17 @@ mod tests {
let record_key = "record-key-1";
let record_value = "record-key-2";
let logger = create_logger(name, source, level, writer);
let (logger, guard) = create_logger(name, source, level, writer);
let msg = "foo, bar, baz";
// Call the logger (which calls the drain)
info!(logger, "{}", msg; "subsystem" => record_subsystem, record_key => record_value);
// Note: This "mid level" log level should be available in debug or
// release builds.
info!(&logger, "{}", msg; "subsystem" => record_subsystem, record_key => record_value);
// Force temp file to be flushed
drop(guard);
drop(logger);
let mut contents = String::new();
@@ -430,4 +435,168 @@ mod tests {
.expect("failed to find record key field");
assert_eq!(field_record_value, record_value);
}
#[test]
fn test_logger_levels() {
let name = "name";
let source = "source";
let debug_msg = "a debug log level message";
let info_msg = "an info log level message";
let warn_msg = "a warn log level message";
let error_msg = "an error log level message";
let critical_msg = "a critical log level message";
// The slog crate will *remove* macro calls for log levels "above" the
// configured log level.lock
//
// At the time of writing, the default slog log
// level is "info", but this crate overrides that using the magic
// "*max_level*" features in the "Cargo.toml" manifest.
// However, there are two log levels:
//
// - max_level_${level}
//
// This is the log level for normal "cargo build" (development/debug)
// builds.
//
// - release_max_level_${level}
//
// This is the log level for "cargo install" and
// "cargo build --release" (release) builds.
//
// This crate sets them to different values, which is sensible and
// standard practice. However, that causes a problem: there is
// currently no clean way for this test code to detect _which_
// profile the test is being built for (development or release),
// meaning we cannot know which macros are expected to produce output
// and which aren't ;(
//
// The best we can do is test the following log levels which
// are expected to work in all build profiles.
let debug_closure = |logger: &Logger, msg: String| debug!(logger, "{}", msg);
let info_closure = |logger: &Logger, msg: String| info!(logger, "{}", msg);
let warn_closure = |logger: &Logger, msg: String| warn!(logger, "{}", msg);
let error_closure = |logger: &Logger, msg: String| error!(logger, "{}", msg);
let critical_closure = |logger: &Logger, msg: String| crit!(logger, "{}", msg);
struct TestData<'a> {
slog_level: slog::Level,
slog_level_tag: &'a str,
msg: String,
closure: Box<dyn Fn(&Logger, String)>,
}
let tests = &[
TestData {
slog_level: slog::Level::Debug,
// Looks like a typo but tragically it isn't! ;(
slog_level_tag: "DEBG",
msg: debug_msg.into(),
closure: Box::new(debug_closure),
},
TestData {
slog_level: slog::Level::Info,
slog_level_tag: "INFO",
msg: info_msg.into(),
closure: Box::new(info_closure),
},
TestData {
slog_level: slog::Level::Warning,
slog_level_tag: "WARN",
msg: warn_msg.into(),
closure: Box::new(warn_closure),
},
TestData {
slog_level: slog::Level::Error,
// Another language tragedy
slog_level_tag: "ERRO",
msg: error_msg.into(),
closure: Box::new(error_closure),
},
TestData {
slog_level: slog::Level::Critical,
slog_level_tag: "CRIT",
msg: critical_msg.into(),
closure: Box::new(critical_closure),
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]", i);
// Create a writer for the logger drain to use
let writer =
NamedTempFile::new().expect(&format!("{:}: failed to create tempfile", msg));
// Used to check file contents before the temp file is unlinked
let mut writer_ref = writer
.reopen()
.expect(&format!("{:?}: failed to clone tempfile", msg));
let (logger, logger_guard) = create_logger(name, source, d.slog_level, writer);
// Call the logger (which calls the drain)
(d.closure)(&logger, d.msg.to_owned());
// Force temp file to be flushed
drop(logger_guard);
drop(logger);
let mut contents = String::new();
writer_ref
.read_to_string(&mut contents)
.expect(&format!("{:?}: failed to read tempfile contents", msg));
// Convert file to JSON
let fields: Value = serde_json::from_str(&contents)
.expect(&format!("{:?}: failed to convert logfile to json", msg));
// Check the expected JSON fields
let field_ts = fields
.get("ts")
.expect(&format!("{:?}: failed to find timestamp field", msg));
assert_ne!(field_ts, "", "{}", msg);
let field_version = fields
.get("version")
.expect(&format!("{:?}: failed to find version field", msg));
assert_eq!(field_version, env!("CARGO_PKG_VERSION"), "{}", msg);
let field_pid = fields
.get("pid")
.expect(&format!("{:?}: failed to find pid field", msg));
assert_ne!(field_pid, "", "{}", msg);
let field_level = fields
.get("level")
.expect(&format!("{:?}: failed to find level field", msg));
assert_eq!(field_level, d.slog_level_tag, "{}", msg);
let field_msg = fields
.get("msg")
.expect(&format!("{:?}: failed to find msg field", msg));
assert_eq!(field_msg, &json!(d.msg), "{}", msg);
let field_name = fields
.get("name")
.expect(&format!("{:?}: failed to find name field", msg));
assert_eq!(field_name, name, "{}", msg);
let field_source = fields
.get("source")
.expect(&format!("{:?}: failed to find source field", msg));
assert_eq!(field_source, source, "{}", msg);
let field_subsystem = fields
.get("subsystem")
.expect(&format!("{:?}: failed to find subsystem field", msg));
// No explicit subsystem, so should be the default
assert_eq!(field_subsystem, &json!(DEFAULT_SUBSYSTEM), "{}", msg);
}
}
}

View File

@@ -4,10 +4,16 @@ version = "0.1.0"
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
[features]
default = []
with-serde = [ "serde", "serde_json" ]
[dependencies]
ttrpc = { version = "0.5.0", features = ["async"] }
async-trait = "0.1.42"
protobuf = "=2.14.0"
protobuf = { version = "=2.14.0", features = ["with-serde"] }
serde = { version = "1.0.130", features = ["derive"], optional = true }
serde_json = { version = "1.0.68", optional = true }
[build-dependencies]
ttrpc-codegen = "0.2.0"

168
src/libs/protocols/build.rs Normal file
View File

@@ -0,0 +1,168 @@
// Copyright (c) 2020 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Write};
use std::path::Path;
use std::process::exit;
use ttrpc_codegen::{Codegen, Customize, ProtobufCustomize};
fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> {
let mut src = File::open(file_name)?;
let mut contents = String::new();
src.read_to_string(&mut contents).unwrap();
drop(src);
let new_contents = contents.replace(from, to);
let mut dst = File::create(&file_name)?;
dst.write_all(new_contents.as_bytes())?;
Ok(())
}
fn use_serde(protos: &[&str], out_dir: &Path) -> Result<(), std::io::Error> {
protos
.iter()
.try_for_each(|f: &&str| -> Result<(), std::io::Error> {
let out_file = Path::new(f)
.file_name()
.and_then(|s| s.to_str())
.ok_or(format!("failed to get proto file name for {:?}", f))
.map(|s| {
let t = s.replace(".proto", ".rs");
out_dir.join(t)
})
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
.to_str()
.ok_or(format!("cannot convert {:?} path to string", f))
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
.to_string();
replace_text_in_file(
&out_file,
"derive(Serialize, Deserialize)",
"derive(serde::Serialize, serde::Deserialize)",
)
})
}
fn handle_file(autogen_comment: &str, rust_filename: &str) -> Result<(), std::io::Error> {
let mut new_contents = Vec::new();
let file = File::open(rust_filename)?;
let reader = BufReader::new(file);
// Guard the code since it is only needed for the agent-ctl tool,
// not the agent itself.
let serde_default_code = r#"#[cfg_attr(feature = "with-serde", serde(default))]"#;
for line in reader.lines() {
let line = line?;
new_contents.push(line.clone());
let pattern = "//! Generated file from";
if line.starts_with(&pattern) {
new_contents.push(autogen_comment.into());
}
let struct_pattern = "pub struct ";
// Although we've requested serde support via `Customize`, to
// allow the `kata-agent-ctl` tool to partially deserialise structures
// specified in JSON, we need this bit of additional magic.
if line.starts_with(&struct_pattern) {
new_contents.insert(new_contents.len() - 1, serde_default_code.trim().into());
}
}
let data = new_contents.join("\n");
let mut dst = File::create(&rust_filename)?;
dst.write_all(data.as_bytes())?;
Ok(())
}
fn real_main() -> Result<(), std::io::Error> {
let autogen_comment = format!("\n//! Generated by {:?} ({:?})", file!(), module_path!());
let protos = vec![
"protos/agent.proto",
"protos/google/protobuf/empty.proto",
"protos/health.proto",
"protos/oci.proto",
"protos/types.proto",
];
// Tell Cargo that if the .proto files changed, to rerun this build script.
protos
.iter()
.for_each(|p| println!("cargo:rerun-if-changed={}", &p));
let ttrpc_options = Customize {
async_server: true,
..Default::default()
};
let protobuf_options = ProtobufCustomize {
serde_derive: Some(true),
..Default::default()
};
let out_dir = Path::new("src");
Codegen::new()
.out_dir(out_dir)
.inputs(&protos)
.include("protos")
.customize(ttrpc_options)
.rust_protobuf()
.rust_protobuf_customize(protobuf_options)
.run()?;
for file in protos.iter() {
let proto_filename = Path::new(file).file_name().unwrap();
let generated_file = proto_filename
.to_str()
.ok_or("failed")
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
.replace(".proto", ".rs");
let out_file = out_dir.join(generated_file);
let out_file_str = out_file
.to_str()
.ok_or("failed")
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
handle_file(&autogen_comment, out_file_str)?;
}
// There is a message named 'Box' in oci.proto
// so there is a struct named 'Box', we should replace Box<Self> to ::std::boxed::Box<Self>
// to avoid the conflict.
replace_text_in_file(
"src/oci.rs",
"self: Box<Self>",
"self: ::std::boxed::Box<Self>",
)?;
use_serde(&protos, out_dir)?;
Ok(())
}
fn main() {
if let Err(e) = real_main() {
eprintln!("ERROR: {}", e);
exit(1);
}
}

View File

@@ -52,8 +52,6 @@ service AgentService {
rpc AddARPNeighbors(AddARPNeighborsRequest) returns (google.protobuf.Empty);
// observability
rpc StartTracing(StartTracingRequest) returns (google.protobuf.Empty);
rpc StopTracing(StopTracingRequest) returns (google.protobuf.Empty);
rpc GetMetrics(GetMetricsRequest) returns (Metrics);
// misc (TODO: some rpcs can be replaced by hyperstart-exec)
@@ -492,12 +490,6 @@ message CopyFileRequest {
bytes data = 8;
}
message StartTracingRequest {
}
message StopTracingRequest {
}
message GetOOMEventRequest {}
message OOMEvent {

View File

@@ -11,7 +11,6 @@ config-generated.go
/pkg/containerd-shim-v2/monitor_address
/data/kata-collect-data.sh
/kata-monitor
/kata-netmon
/kata-runtime
/pkg/katautils/config-settings.go
/virtcontainers/hack/virtc/virtc

View File

@@ -55,11 +55,6 @@ RUNTIME_OUTPUT = $(CURDIR)/$(TARGET)
RUNTIME_DIR = $(CLI_DIR)/$(TARGET)
BINLIST += $(TARGET)
NETMON_DIR = $(CLI_DIR)/netmon
NETMON_TARGET = $(PROJECT_TYPE)-netmon
NETMON_RUNTIME_OUTPUT = $(CURDIR)/$(NETMON_TARGET)
BINLIBEXECLIST += $(NETMON_TARGET)
DESTDIR ?= /
ifeq ($(PREFIX),)
@@ -142,9 +137,6 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"]
ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD)
ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]
NETMONCMD := $(BIN_PREFIX)-netmon
NETMONPATH := $(PKGLIBEXECDIR)/$(NETMONCMD)
# Default number of vCPUs
DEFVCPUS := 1
# Default maximum number of vCPUs
@@ -416,7 +408,6 @@ USER_VARS += PROJECT_PREFIX
USER_VARS += PROJECT_TAG
USER_VARS += PROJECT_TYPE
USER_VARS += PROJECT_URL
USER_VARS += NETMONPATH
USER_VARS += QEMUBINDIR
USER_VARS += QEMUCMD
USER_VARS += QEMUPATH
@@ -509,7 +500,7 @@ define SHOW_ARCH
$(shell printf "\\t%s%s\\\n" "$(1)" $(if $(filter $(ARCH),$(1))," (default)",""))
endef
all: runtime containerd-shim-v2 netmon monitor
all: runtime containerd-shim-v2 monitor
# Targets that depend on .git-commit can use $(shell cat .git-commit) to get a
# git revision string. They will only be rebuilt if the revision string
@@ -525,11 +516,6 @@ containerd-shim-v2: $(SHIMV2_OUTPUT)
monitor: $(MONITOR_OUTPUT)
netmon: $(NETMON_RUNTIME_OUTPUT)
$(NETMON_RUNTIME_OUTPUT): $(SOURCES) VERSION
$(QUIET_BUILD)(cd $(NETMON_DIR) && go build $(BUILDFLAGS) -o $@ -ldflags "-X main.version=$(VERSION)" $(KATA_LDFLAGS))
runtime: $(RUNTIME_OUTPUT) $(CONFIGS)
.DEFAULT: default
@@ -602,14 +588,12 @@ test: install-hook go-test
install-hook:
make -C virtcontainers hook
ifeq ($(shell id -u), 0)
echo "installing mock hook"
make -C virtcontainers install
endif
sudo -E make -C virtcontainers install
go-test: $(GENERATED_FILES)
go clean -testcache
go test -v -mod=vendor ./...
$(QUIET_TEST)../../ci/go-test.sh
fast-test: $(GENERATED_FILES)
go clean -testcache
@@ -638,15 +622,13 @@ coverage:
go test -v -mod=vendor -covermode=atomic -coverprofile=coverage.txt ./...
go tool cover -html=coverage.txt -o coverage.html
install: all install-runtime install-containerd-shim-v2 install-monitor install-netmon
install: all install-runtime install-containerd-shim-v2 install-monitor
install-bin: $(BINLIST)
$(QUIET_INST)$(foreach f,$(BINLIST),$(call INSTALL_EXEC,$f,$(BINDIR)))
install-runtime: runtime install-scripts install-completions install-configs install-bin
install-netmon: install-bin-libexec
install-containerd-shim-v2: $(SHIMV2)
$(QUIET_INST)$(call INSTALL_EXEC,$<,$(BINDIR))
@@ -678,7 +660,6 @@ clean:
$(QUIET_CLEAN)rm -f \
$(CONFIGS) \
$(GENERATED_FILES) \
$(NETMON_TARGET) \
$(MONITOR) \
$(SHIMV2) \
$(TARGET) \
@@ -706,9 +687,7 @@ show-usage: show-header
@printf "\tgenerate-config : create configuration file.\n"
@printf "\tinstall : install everything.\n"
@printf "\tinstall-containerd-shim-v2 : only install containerd shim v2 files.\n"
@printf "\tinstall-netmon : only install netmon files.\n"
@printf "\tinstall-runtime : only install runtime files.\n"
@printf "\tnetmon : only build netmon.\n"
@printf "\truntime : only build runtime.\n"
@printf "\tshow-arches : show supported architectures (ARCH variable values).\n"
@printf "\tshow-summary : show install locations.\n"

View File

@@ -70,7 +70,7 @@ See the
## Architecture overview
See the [architecture overview](../../docs/design/architecture.md)
See the [architecture overview](../../docs/design/architecture)
for details on the Kata Containers design.
## Configuration

View File

@@ -16,10 +16,10 @@ import (
"time"
"github.com/gogo/protobuf/types"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
pb "github.com/kata-containers/kata-containers/src/runtime/protocols/cache"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
"github.com/pkg/errors"
"github.com/urfave/cli"
"golang.org/x/sys/unix"

View File

@@ -8,7 +8,6 @@ package main
import (
"context"
"flag"
"io/ioutil"
"os"
"testing"
@@ -44,7 +43,7 @@ func TestFactoryCLIFunctionNoRuntimeConfig(t *testing.T) {
func TestFactoryCLIFunctionInit(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
@@ -93,7 +92,7 @@ func TestFactoryCLIFunctionInit(t *testing.T) {
func TestFactoryCLIFunctionDestroy(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
@@ -127,7 +126,7 @@ func TestFactoryCLIFunctionDestroy(t *testing.T) {
func TestFactoryCLIFunctionStatus(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)

View File

@@ -26,8 +26,8 @@ import (
"syscall"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)

View File

@@ -7,7 +7,7 @@ package main
import (
"fmt"
"io/ioutil"
"os"
"strings"
"syscall"
"unsafe"
@@ -212,7 +212,7 @@ func setCPUtype(hypervisorType vc.HypervisorType) error {
}
func getCPUtype() int {
content, err := ioutil.ReadFile("/proc/cpuinfo")
content, err := os.ReadFile("/proc/cpuinfo")
if err != nil {
kataLog.WithError(err).Error("failed to read file")
return cpuTypeUnknown

View File

@@ -8,7 +8,6 @@ package main
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
@@ -72,7 +71,7 @@ func TestCCCheckCLIFunction(t *testing.T) {
func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -157,7 +156,7 @@ func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
func TestCheckCheckKernelModulesNoUnrestrictedGuest(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -256,7 +255,7 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -406,7 +405,7 @@ func TestArchKernelParamHandler(t *testing.T) {
func TestKvmIsUsable(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -458,7 +457,7 @@ foo : bar
func TestSetCPUtype(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)

View File

@@ -7,7 +7,6 @@ package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
@@ -68,7 +67,7 @@ foo : bar
{validContents, validNormalizeVendorName, validNormalizeModelName, false},
}
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}

View File

@@ -6,8 +6,9 @@
package main
import (
"os"
"github.com/sirupsen/logrus"
"io/ioutil"
)
var testCPUInfoTemplate = setTestCPUInfoTemplate()
@@ -15,7 +16,7 @@ var testCPUInfoTemplate = setTestCPUInfoTemplate()
func setTestCPUInfoTemplate() string {
var kataLog *logrus.Entry
content, err := ioutil.ReadFile("/proc/cpuinfo")
content, err := os.ReadFile("/proc/cpuinfo")
if err != nil {
kataLog.WithError(err).Error("failed to read file /proc/cpuinfo")

View File

@@ -3,12 +3,12 @@
// SPDX-License-Identifier: Apache-2.0
//
//go:build arm64 || ppc64le
// +build arm64 ppc64le
package main
import (
"io/ioutil"
"os"
"testing"
@@ -18,7 +18,7 @@ import (
func testSetCPUTypeGeneric(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)

View File

@@ -7,7 +7,6 @@ package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
@@ -119,7 +118,7 @@ func TestArchKernelParamHandler(t *testing.T) {
func TestKvmIsUsable(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}

View File

@@ -7,7 +7,6 @@ package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
@@ -118,7 +117,7 @@ func TestArchKernelParamHandler(t *testing.T) {
func TestKvmIsUsable(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}

View File

@@ -10,7 +10,6 @@ import (
"flag"
"fmt"
"html/template"
"io/ioutil"
"os"
"path"
"path/filepath"
@@ -50,7 +49,7 @@ type testCPUDetail struct {
var fakeCPUData = testCPUData{"", "", false}
func createFile(file, contents string) error {
return ioutil.WriteFile(file, []byte(contents), testFileMode)
return os.WriteFile(file, []byte(contents), testFileMode)
}
func createModules(assert *assert.Assertions, cpuInfoFile string, moduleData []testModuleData) {
@@ -151,12 +150,12 @@ func makeCPUInfoFile(path, vendorID, flags string) error {
return err
}
return ioutil.WriteFile(path, contents.Bytes(), testFileMode)
return os.WriteFile(path, contents.Bytes(), testFileMode)
}
// nolint: unused, deadcode
func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel string, validContents string, data []testCPUDetail) {
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
@@ -198,7 +197,7 @@ func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel strin
func genericCheckCLIFunction(t *testing.T, cpuData []testCPUData, moduleData []testModuleData) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -308,7 +307,7 @@ func TestCheckGetCPUInfo(t *testing.T) {
{"foo\n\nbar\nbaz\n\n", "foo", false},
}
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -320,7 +319,7 @@ func TestCheckGetCPUInfo(t *testing.T) {
assert.Error(err)
for _, d := range data {
err = ioutil.WriteFile(file, []byte(d.contents), testFileMode)
err = os.WriteFile(file, []byte(d.contents), testFileMode)
if err != nil {
t.Fatal(err)
}
@@ -528,7 +527,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -578,7 +577,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
func TestCheckCheckKernelModules(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -663,7 +662,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
t.Skip(ktu.TestDisabledNeedNonRoot)
}
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -711,7 +710,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -756,7 +755,7 @@ func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
func TestCheckCLIFunctionFail(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -789,7 +788,7 @@ func TestCheckCLIFunctionFail(t *testing.T) {
func TestCheckKernelParamHandler(t *testing.T) {
assert := assert.New(t)
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
@@ -871,7 +870,7 @@ func TestCheckKernelParamHandler(t *testing.T) {
func TestArchRequiredKernelModules(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
@@ -886,7 +885,7 @@ func TestArchRequiredKernelModules(t *testing.T) {
return
}
dir, err := ioutil.TempDir("", "")
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}

View File

@@ -18,10 +18,10 @@ import (
"github.com/urfave/cli"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
)
@@ -140,14 +140,6 @@ type HostInfo struct {
SupportVSocks bool
}
// NetmonInfo stores netmon details
type NetmonInfo struct {
Path string
Version VersionInfo
Debug bool
Enable bool
}
// EnvInfo collects all information that will be displayed by the
// env command.
//
@@ -159,7 +151,6 @@ type EnvInfo struct {
Initrd InitrdInfo
Hypervisor HypervisorInfo
Runtime RuntimeInfo
Netmon NetmonInfo
Host HostInfo
Agent AgentInfo
}
@@ -276,26 +267,6 @@ func getMemoryInfo() MemoryInfo {
}
}
func getNetmonInfo(config oci.RuntimeConfig) NetmonInfo {
netmonConfig := config.NetmonConfig
var netmonVersionInfo VersionInfo
if version, err := getCommandVersion(netmonConfig.Path); err != nil {
netmonVersionInfo = unknownVersionInfo
} else {
netmonVersionInfo = constructVersionInfo(version)
}
netmon := NetmonInfo{
Version: netmonVersionInfo,
Path: netmonConfig.Path,
Debug: netmonConfig.Debug,
Enable: netmonConfig.Enable,
}
return netmon
}
func getCommandVersion(cmd string) (string, error) {
return utils.RunCommand([]string{cmd, "--version"})
}
@@ -364,8 +335,6 @@ func getEnvInfo(configFile string, config oci.RuntimeConfig) (env EnvInfo, err e
return EnvInfo{}, err
}
netmon := getNetmonInfo(config)
agent, err := getAgentInfo(config)
if err != nil {
return EnvInfo{}, err
@@ -398,7 +367,6 @@ func getEnvInfo(configFile string, config oci.RuntimeConfig) (env EnvInfo, err e
Initrd: initrd,
Agent: agent,
Host: host,
Netmon: netmon,
}
return env, nil

Some files were not shown because too many files have changed in this diff Show More