Compare commits

..

500 Commits

Author SHA1 Message Date
Peng Tao
365e358115 Merge pull request #3402 from snir911/2.3.1-branch-bump
# Kata Containers 2.3.1
2022-01-11 16:56:05 +08:00
Snir Sheriber
a2e524f356 release: Kata Containers 2.3.1
- stable-2.3 | kata-deploy: fix tar command in dockerfile
- stable-2.3 | versions: Upgrade to Cloud Hypervisor v20.2
- stable-2.3 Missing backports
- stable-2.3 | docs: Fix kernel configs README spelling errors
- docs: Fix outdated links
- stable-2.3 | versions: Upgrade to Cloud Hypervisor v20.1
- Backport osbuilder: Revert to using apk.static for Alpine
- stable-2.3 | runtime: only call stopVirtiofsd when shared_fs is virtio-fs
- Backport versions: Use Ubuntu initrd for non-musl archs
- stable-2.3 | Upgrade to Cloud Hypervisor v20.0 and Openapi-generator v5.3.0
- stable-2.3 | packaging: Fix missing commit message in building kata-runtime
- stable-2.3 | runtime: enable vhost-net for rootless hypervisor
- [backport] agent: create directories for watchable-bind mounts
- runtime: enable FUSE_DAX kernel config for DAX

dfbe74c4 kata-deploy: fix tar command in dockerfile
9e7eed7c versions: Upgrade to Cloud Hypervisor v20.2
53cf1dd0 tools/packaging: add copyright to kata-monitor's Dockerfile
a4dee6a5 packaging: delint tests dockerfiles
fd87b60c packaging: delint kata-deploy dockerfiles
2cb4f7ba ci/openshift-ci: delint dockerfiles
993dcc94 osbuilder: delint dockerfiles
bbd7cc2f packaging: delint kata-monitor dockerfiles
9837ec72 packaging: delint static-build dockerfiles
8785106f packaging/qemu: Use QEMU script to update submodules
a915f082 packaging/qemu: Use partial git clone
ec3faab8 security: Update rust crate versions
1f61be84 osbuilder: Add protoc to the alpine container
d2d8f9ac osbuilder: avoid to copy versions.txt which already deprecated
ca30eee3 kata-manager: Retrieve static tarball
0217abce kata-deploy: Deal with empty containerd conf file
572b25dd osbuilder: be runtime consistent also with podman build
84e69ecb agent: user container ID as watchable storage key for hashmap
77b6cfbd docs: Fix kernel configs README spelling errors
24085c95 docs: Fix outdated k8s link
514bf74f docs: Replicate branch rename on runtime-spec
77a2502a cri-o: Update links for the CRI-O github page
6413ecf4 docs: Backport source reorganization links
a0bed72d versions: Upgrade to Cloud Hypervisor v20.1
d03e05e8 versions: Use fixed, minor version for Alpine
0f7db91c osbuilder: Revert to using apk.static for Alpine
271d67a8 runtime: only call stopVirtiofsd when shared_fs is virtio-fs
7c15335d versions: Use Ubuntu initrd for non-musl archs
15080f20 virtcontainers: clh: Upgrade to openapi-generator v5.3.0
c2b8eb3c virtcontainers: clh: Re-generate the client code
fe0fbab5 versions: Upgrade to Cloud Hypervisor v20.0
be5468fd packaging: Fix missing commit message in building kata-runtime
18bb9a5d runtime: enable vhost-net for rootless hypervisor
3458073d agent: create directories for watchable-bind mounts
0e91503c runtime: enable FUSE_DAX kernel config for DAX

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-01-06 20:51:21 +02:00
snir911
3d4dedefda Merge pull request #3396 from snir911/stable-2.3-fix-kata-deploy
stable-2.3 | kata-deploy: fix tar command in dockerfile
2022-01-06 20:36:36 +02:00
snir911
919fc56daa Merge pull request #3397 from likebreath/0105/backport_clh_v20.2
stable-2.3 | versions: Upgrade to Cloud Hypervisor v20.2
2022-01-06 11:22:41 +02:00
Snir Sheriber
dfbe74c489 kata-deploy: fix tar command in dockerfile
tar params are passed wrongly

Fixes: #3394
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-01-06 08:26:36 +02:00
Bo Chen
9e7eed7c4b versions: Upgrade to Cloud Hypervisor v20.2
This is a bug release from Cloud Hypervisor addressing the following
issues: 1) Don't error out when setting up the SIGWINCH handler (for
console resize) when this fails due to older kernel; 2) Seccomp rules
were refined to remove syscalls that are now unused; 3) Fix reboot on
older host kernels when SIGWINCH handler was not initialised; 4) Fix
virtio-vsock blocking issue.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v20.2

Fixes: #3383

Signed-off-by: Bo Chen <chen.bo@intel.com>
(cherry picked from commit 1f581a0405)
2022-01-05 10:52:53 -08:00
Archana Shinde
a0bb8c5599 Merge pull request #3368 from snir911/backports-2.3
stable-2.3 Missing backports
2022-01-04 06:42:42 -08:00
Wainer dos Santos Moschetta
53cf1dd042 tools/packaging: add copyright to kata-monitor's Dockerfile
(added dependency at backport)

The kata-monitor's Dockerfile was added by Eric Ernst on commit 2f1cb7995f
but for some reason the static checker did not catch the file misses the copyright statement
at the time it was added. But it is now complaining about it. So this assign the copyright to
him to make the static-checker happy.

Fixes #3329
github.com/kata-containers/tests#4310
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 15:31:09 +02:00
Wainer dos Santos Moschetta
a4dee6a591 packaging: delint tests dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:53:19 +02:00
Wainer dos Santos Moschetta
fd87b60c7a packaging: delint kata-deploy dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:53 +02:00
Wainer dos Santos Moschetta
2cb4f7ba70 ci/openshift-ci: delint dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:47 +02:00
Wainer dos Santos Moschetta
993dcc94ff osbuilder: delint dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:43 +02:00
Wainer dos Santos Moschetta
bbd7cc2f93 packaging: delint kata-monitor dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:39 +02:00
Wainer dos Santos Moschetta
9837ec728c packaging: delint static-build dockerfiles
Removed all errors/warnings pointed out by hadolint version 2.7.0, except for the following
ignored rules:
  - "DL3008 warning: Pin versions in apt get install"
  - "DL3041 warning: Specify version with `dnf install -y <package>-<version>`"
  - "DL3033 warning: Specify version with `yum install -y <package>-<version>`"
  - "DL3048 style: Invalid label key"
  - "DL3003 warning: Use WORKDIR to switch to a directory"
  - "DL3018 warning: Pin versions in apk add. Instead of apk add <package> use apk add <package>=<version>"
  - "DL3037 warning: Specify version with zypper install -y <package>[=]<version>"

Fixes #3107
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:33 +02:00
Wainer dos Santos Moschetta
8785106f6c packaging/qemu: Use QEMU script to update submodules
Currently QEMU's submodules are git cloned but there is the scripts/git-submodule.sh
which is meant for that. Let's use that script.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:25 +02:00
Wainer dos Santos Moschetta
a915f08266 packaging/qemu: Use partial git clone
The static build of QEMU takes a good amount of time on cloning the
source tree because we do a full git clone. In order to speed up that
operation this changed the Dockerfile so that it is carried out a
partial clone by using --depth=1 argument.

Fixes #3291
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-01-03 10:52:17 +02:00
Snir Sheriber
ec3faab892 security: Update rust crate versions
backporting b1f4e945b3 original commit msg (modified):

Update the rust dependencies that have upstream security fixes. Issues
fixed by this change:

- [`RUSTSEC-2020-0002`](https://rustsec.org/advisories/RUSTSEC-2020-0002) (`prost` crate)
- [`RUSTSEC-2020-0036`](https://rustsec.org/advisories/RUSTSEC-2020-0036) (`failure` crate)
- [`RUSTSEC-2021-0073`](https://rustsec.org/advisories/RUSTSEC-2021-0073) (`prost-types` crate)
- [`RUSTSEC-2021-0119`](https://rustsec.org/advisories/RUSTSEC-2021-0119) (`nix` crate)

This change also includes:

- Minor code changes for the new version of `prometheus` for the agent.

Fixes: #3296.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-12-29 16:58:14 +02:00
Fabiano Fidêncio
1f61be842d osbuilder: Add protoc to the alpine container
It seems the lack of protoc in the alpine containers is causing issues
with some of our CIs, such as the VFIO one.

Fixes: #3323

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-12-29 14:40:24 +02:00
zhanghj
d2d8f9ac65 osbuilder: avoid to copy versions.txt which already deprecated
Currently the versions.txt in rootfs-builder dir is already removed,
so avoid to copy it in list of helper files.

Fixes: #3267

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-12-29 14:39:34 +02:00
Jakob Naucke
ca30eee3e2 kata-manager: Retrieve static tarball
In `utils/kata-manager.sh`, we download the first asset listed for the
release, which used to be the static x86_64 tarball. If that happened to
not match the system architecture, we would abort. Besides that logic
being invalid for !x86_64 (despite not distributing other tarballs at
the moment), the first asset listed is also not the static tarball any
more, it is the vendored source tarball. Retrieve all _static_ tarballs
and select the appropriate one depending on architecture.

Fixes: #3254
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-29 14:39:25 +02:00
Fabiano Fidêncio
0217abce24 kata-deploy: Deal with empty containerd conf file
As containerd can properly run without having a existent
`/etc/containerd/config.toml` file (it'd run using the default
cobnfiguration), let's explicitly create the file in those cases.

This will avoid issues on ammending runtime classes to a non-existent
file.

Fixes: #3229

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
Tested-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-29 14:39:14 +02:00
Snir Sheriber
572b25dd35 osbuilder: be runtime consistent also with podman build
Use the same runtime used for podman run also for the podman build cmd
Additionally remove "docker" from the docker_run_args variable

Fixes: #3239
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-12-29 14:38:32 +02:00
bin
84e69ecb22 agent: user container ID as watchable storage key for hashmap
Use sandbox ID as the key will cause the failed containers' storage
leak.

Fixes: #3172

Signed-off-by: bin <bin@hyper.sh>
2021-12-29 14:38:18 +02:00
Archana Shinde
57a6d46376 Merge pull request #3347 from Jakob-Naucke/backport-spell-kernel-readme
stable-2.3 | docs: Fix kernel configs README spelling errors
2021-12-23 08:56:52 -08:00
Jakob Naucke
77b6cfbd15 docs: Fix kernel configs README spelling errors
- `fragments` in backticks
- s/perfoms/performs/

Fixes: #3338
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-23 15:54:10 +01:00
Peng Tao
0e1cb124b7 Merge pull request #3335 from Jakob-Naucke/backport-src-reorg
docs: Fix outdated links
2021-12-23 11:40:55 +08:00
Jakob Naucke
24085c9553 docs: Fix outdated k8s link
in virtcontainers readme

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-22 19:42:47 +01:00
Jakob Naucke
514bf74f8f docs: Replicate branch rename on runtime-spec
renamed branch `master` to `main`

Fixes: #3336
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-22 18:18:46 +01:00
Fabiano Fidêncio
77a2502a0f cri-o: Update links for the CRI-O github page
The links are either pointing to the not-used-anymore `master` branch,
or to the kubernetes-incubator page.

Let's always point to the CRI-O github page, using the `main`branch.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-12-22 18:18:46 +01:00
Jakob Naucke
6413ecf459 docs: Backport source reorganization links
#3244 moved directories that were referred to with links to `main`,
which affects stable.

Fixes: #3334
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-22 17:59:41 +01:00
Fabiano Fidêncio
a31b5b9ee8 Merge pull request #3269 from likebreath/1214/backport_clh_v20.1
stable-2.3 | versions: Upgrade to Cloud Hypervisor v20.1
2021-12-15 00:18:56 +01:00
Bo Chen
a0bed72d49 versions: Upgrade to Cloud Hypervisor v20.1
This is a bug release from Cloud Hypervisor addressing the following
issues: 1) Networking performance regression with virtio-net; 2) Limit
file descriptors sent in vfio-user support; 3) Fully advertise PCI MMIO
config regions in ACPI tables; 4) Set the TSS and KVM identity maps so
they don't overlap with firmware RAM; 5) Correctly update the DeviceTree
on restore.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v20.1

Fixes: #3262

Signed-off-by: Bo Chen <chen.bo@intel.com>
(cherry picked from commit bbfb10e169)
2021-12-14 11:06:08 -08:00
Fabiano Fidêncio
d61bcb8a44 Merge pull request #3247 from Jakob-Naucke/backport-apk-static
Backport osbuilder: Revert to using apk.static for Alpine
2021-12-10 12:10:59 +01:00
Jakob Naucke
d03e05e803 versions: Use fixed, minor version for Alpine
- Set Alpine guest rootfs to 3.13 on all instances.
- Specify a minor version rather than patch level as the Alpine
  repositories use that.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-09 16:47:43 +01:00
Jakob Naucke
0f7db91c0f osbuilder: Revert to using apk.static for Alpine
#2399 partially reverted #418, missing on returning to bootstrapping a
rootfs with `apk.static` instead of copying the entire root, which can
result in drastically larger (more than 10x) images. Revert this as well
(requires some updates to URL building).

Fixes: #3216
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-09 16:47:43 +01:00
Julio Montes
25ee73ceb3 Merge pull request #3230 from liubin/backport/3220
stable-2.3 | runtime: only call stopVirtiofsd when shared_fs is virtio-fs
2021-12-08 08:32:04 -06:00
Fabiano Fidêncio
64ae76e967 Merge pull request #3224 from Jakob-Naucke/backport-ppc64le-s390x-ubuntu-initrd
Backport versions: Use Ubuntu initrd for non-musl archs
2021-12-08 09:05:13 +01:00
bin
271d67a831 runtime: only call stopVirtiofsd when shared_fs is virtio-fs
If shared_fs is set to virtio-9p, the virtiofsd is not started,
so there is no need to stop it.

Fixes: #3219

Signed-off-by: bin <bin@hyper.sh>
2021-12-08 11:30:35 +08:00
Julio Montes
f42c7d5125 Merge pull request #3215 from likebreath/1206/backport_clh
stable-2.3 | Upgrade to Cloud Hypervisor v20.0 and Openapi-generator v5.3.0
2021-12-07 07:51:21 -06:00
Jakob Naucke
7c15335dc9 versions: Use Ubuntu initrd for non-musl archs
ppc64le & s390x have no (well supported) musl target for Rust,
therefore, the agent must use glibc and cannot use Alpine. Specify
Ubuntu as the distribution to be used for initrd.

Fixes: #3212
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-12-07 12:15:16 +01:00
Bo Chen
15080f20e7 virtcontainers: clh: Upgrade to openapi-generator v5.3.0
The latest release of openapi-generator v5.3.0 contains the fix for
`dropping err` bug [1]. This patch also re-generated the client code of
Cloud Hypervisor to have the bug fixed.

[1] https://github.com/OpenAPITools/openapi-generator/pull/10275

Fixes: #3201

Signed-off-by: Bo Chen <chen.bo@intel.com>
(cherry picked from commit 995300260e)
2021-12-06 18:41:39 -08:00
Bo Chen
c2b8eb3c2c virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor v19.0.
Note: The client code of cloud-hypervisor's (CLH) OpenAPI is
automatically generated by openapi-generator [1-2].

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Signed-off-by: Bo Chen <chen.bo@intel.com>
(cherry picked from commit 4756a04b2d)
2021-12-06 18:38:48 -08:00
Bo Chen
fe0fbab574 versions: Upgrade to Cloud Hypervisor v20.0
Highlights from the Cloud Hypervisor release v20.0: 1) Multiple PCI
segments support (now support up to 496 PCI devices); 2) CPU pinning; 3)
Improved VFIO support; 4) Safer code; 5) Extended documentation; 6) Bug
fixes.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v20.0

Fixes: #3178

Signed-off-by: Bo Chen <chen.bo@intel.com>
(cherry picked from commit 0bf4d2578a)
2021-12-06 18:38:48 -08:00
GabyCT
89f9672f56 Merge pull request #3205 from Bevisy/stable-2.3-3196
stable-2.3 | packaging: Fix missing commit message in building kata-runtime
2021-12-06 10:26:17 -06:00
Fabiano Fidêncio
0a32a1793d Merge pull request #3203 from fengwang666/my_2.3_pr_backport
stable-2.3 | runtime: enable vhost-net for rootless hypervisor
2021-12-06 17:08:33 +01:00
Binbin Zhang
be5468fda7 packaging: Fix missing commit message in building kata-runtime
add `git` package to the shim-v2 build image

Fixes: #3196
Backport PR: #3197

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-12-06 11:04:18 +08:00
Feng Wang
18bb9a5d9b runtime: enable vhost-net for rootless hypervisor
vhost-net is disabled in the rootless kata runtime feature, which has been abandoned since kata 2.0.
I reused the rootless flag for nonroot hypervisor and would like to enable vhost-net.

Fixes #3182

Signed-off-by: Feng Wang <feng.wang@databricks.com>
(cherry picked from commit b3bcb7b251)
2021-12-03 11:28:40 -08:00
Bin Liu
f068057073 Merge pull request #3184 from liubin/backport/3140
[backport] agent: create directories for watchable-bind mounts
2021-12-03 21:24:14 +08:00
bin
3458073d09 agent: create directories for watchable-bind mounts
In function `update_target`, if the updated source is a directory,
we should create the corresponding directory.

Fixes: #3140

Signed-off-by: bin <bin@hyper.sh>
2021-12-03 14:32:08 +08:00
Bin Liu
f9c09ad5bc Merge pull request #3177 from fengwang666/my_2.3_pr_backport
runtime: enable FUSE_DAX kernel config for DAX
2021-12-03 13:32:18 +08:00
Feng Wang
0e91503cd4 runtime: enable FUSE_DAX kernel config for DAX
Otherwise DAX device cannot be set up.

Fixes #3165

Signed-off-by: Feng Wang <feng.wang@databricks.com>
(cherry picked from commit 6105e3ee85)
2021-12-02 09:22:26 -08:00
Fabiano Fidêncio
185f96d170 Merge pull request #3150 from fidencio/2.3.0-branch-bump
# Kata Containers 2.3.0
2021-11-29 22:27:21 +01:00
Fabiano Fidêncio
9bc543f5db release: Kata Containers 2.3.0
- stable-2.3 | osbuilder: fix missing cpio package when building rootfs-initrd image
- stable-2.3 | osbuilder: add coreutils to guest rootfs
- stable-2.3 | backport kata-deploy fixes / improvements
- stable-2.3 | tools/osbuilder: build QAT kernel in fedora 34
- backport: fix symlink handling in agent watcher
- stable-2.3: add VFIO kernel dependencies for ppc64le
- [stable] runtime: Update containerd to 1.5.8
- stable-2.3: disable libudev when building static QEMU
- stable-2.3: virtcontainers: fix failing template test on ppc64le
- stable-2.3: cgroups systemd fix
- stable-2.3:remove non used actions
- stable-2.3 | versions: bump golang to 1.17.x

198e0d16 release: Adapt kata-deploy for 2.3.0
df34e919 osbuilder: fix missing cpio package when building rootfs-initrd image
f61e31cd osbuilder: add coreutils to guest rootfs
cb7891e0 tools/osbuilder: build QAT kernel in fedora 34
2667e028 workflows: only allow org members to run `/test_kata_deploy`
3542cba8 workflows: Add back the checks for running test-kata-deploy
117b9202 kata-deploy: Ensure we test HEAD with `/test_kata_deploy`
db9cd107 watcher: tests: ensure there is 20ms delay between fs writes
a51a1f6d watchers: handle symlinked directories, dir removal
5bc1c209 watchers: don't dereference symlinks when copying files
34a1b539 stable-2.3: add VFIO kernel dependencies for ppc64le
8a705f74 runtime: Update containerd to 1.5.8
ac5ab86e qemu: fix snap build by disabling libudev
d22ec599 virtcontainers: fix failing template test on ppc64le
f9bde321 workflows: Remove non-used main.yaml
b8215119 cgroups: Fix systemd cgroup support
a9d5377b cgroups: pass vhost-vsock device to cgroup
ea83ff1f runtime: remove prefix when cgroups are managed by systemd
91003c27 versions: bump golang to 1.17.x

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-29 20:08:39 +01:00
Fabiano Fidêncio
198e0d1666 release: Adapt kata-deploy for 2.3.0
kata-deploy files must be adapted to a new release.  The cases where it
happens are when the release goes from -> to:
* main -> stable:
  * kata-deploy / kata-cleanup: change from "latest" to "rc0"
  * kata-deploy-stable / kata-cleanup-stable: are removed

* stable -> stable:
  * kata-deploy / kata-cleanup: bump the release to the new one.

There are no changes when doing an alpha release, as the files on the
"main" branch always point to the "latest" and "stable" tags.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-29 20:08:39 +01:00
Fabiano Fidêncio
bf183c5f7f Merge pull request #3148 from fidencio/wip/stable-2.3-fix-cpio-missing-cpio-package
stable-2.3 | osbuilder: fix missing cpio package when building rootfs-initrd image
2021-11-29 20:07:16 +01:00
Binbin Zhang
df34e91978 osbuilder: fix missing cpio package when building rootfs-initrd image
1. install cpio package before building rootfs-initrd image
2. add `pipefaili;errexit` check to the scripts

Fixes: #3144

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
(cherry picked from commit 8ee67aae4f)
2021-11-29 18:29:02 +01:00
Fabiano Fidêncio
5995efc0a6 Merge pull request #3143 from bergwolf/coreutils-2.3
stable-2.3 | osbuilder: add coreutils to guest rootfs
2021-11-29 12:31:38 +01:00
Fabiano Fidêncio
000f878417 Merge pull request #3141 from fidencio/wip/kata-deploy-backports
stable-2.3 | backport kata-deploy fixes / improvements
2021-11-29 12:11:21 +01:00
Fabiano Fidêncio
a6a76bb092 Merge pull request #3142 from fidencio/wip/stable-2.3-backports-before-a-release
stable-2.3 | tools/osbuilder: build QAT kernel in fedora 34
2021-11-29 12:11:13 +01:00
Peng Tao
f61e31cd84 osbuilder: add coreutils to guest rootfs
So that the debug console is more useful. In the meantime, remove
iptables as it is not used by kata-agent any more.

Fixes: #3138
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-11-29 16:53:04 +08:00
Julio Montes
cb7891e0b4 tools/osbuilder: build QAT kernel in fedora 34
kernel compiled in fedora 35 (latest) is not working, following error
is reported:

```
qemu-system-x86_64: Error loading uncompressed kernel without PVH ELF
Note
```

Build QAT kernel in fedora 34 container to fix it

fixes #3135

Signed-off-by: Julio Montes <julio.montes@intel.com>
(cherry picked from commit 857501d8dd)
Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-29 08:24:31 +01:00
Fabiano Fidêncio
2667e0286a workflows: only allow org members to run /test_kata_deploy
Let's take advantage of the "is-organization-member" action and only
allow members who are part of the `kata-containers` organization to
trigger `/test_kata_deploy`.

One caveat with this approach is that for the user to be considered as
part of an organization, they **must** have their "Organization
Visibility" configured as Public (and I think the default is Private).

This was found out and suggested by @jcvenegas!

Fixes: #3130

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
(cherry picked from commit 5e7c1a290f)
2021-11-29 08:04:46 +01:00
Fabiano Fidêncio
3542cba8f3 workflows: Add back the checks for running test-kata-deploy
Commit 3c9ae7f made /test_kata_deploy run
against HEAD, but it also mistakenly removed all the checks that ensure
/test_kata_deploy only runs when explicitly called.

Mea culpa on this, and let's add the tests back.

Fixes: #3101

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
(cherry picked from commit a7c08aa4b6)
2021-11-29 08:04:41 +01:00
Fabiano Fidêncio
117b920230 kata-deploy: Ensure we test HEAD with /test_kata_deploy
Is the past few releases we ended up hitting issues that could be easily
avoided if `/test_kata_deploy` would use HEAD instead of a specific
tarball.

By the end of the day, we want to ensure kata-deploy works, but before
we cut a release we also want to ensure that the binaries used in that
release are in a good shape.  If we don't do that we end up either
having to roll a release back, or to cut a second release in a really
short time (and that's time consuming).

Note: there's code duplication here that could and should be avoided,b
but I sincerely would prefer treating it in a different PR.

Fixes: #3001

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
(cherry picked from commit 3c9ae7fb4b)
2021-11-29 08:02:56 +01:00
Eric Ernst
5694749ce5 Merge pull request #3087 from egernst/fix-symlinks-backport
backport: fix symlink handling in agent watcher
2021-11-19 15:31:48 -08:00
Eric Ernst
db9cd1078f watcher: tests: ensure there is 20ms delay between fs writes
We noticed s390x test failures on several of the watcher unit tests.

Discovered that on s390 in particular, if we update a file in quick
sucecssion, the time stampe on the file would not be unique between the
writes. Through testing, we observe that a 20 millisecond delay is very
reliable for being able to observe the timestamp update. Let's ensure we
have this delay between writes for our tests so our tests are more
reliable.

In "the real world" we'll be polling for changes every 2 seconds, and
frequency of filesystem updates will be on order of minutes and days,
rather that microseconds.

Fixes: #2946

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 13:04:26 -08:00
Eric Ernst
a51a1f6d06 watchers: handle symlinked directories, dir removal
- Even a directory could be a symlink - check for this. This is very
common when using configmaps/secrets
- Add unit test to better mimic a configmap, configmap update
- We would never remove directories before. Let's ensure that these are
added to the watched_list, and verify in unit tests
- Update unit tests which exercise maximum number of files per entry. There's a change
in behavior now that we consider directories/symlinks watchable as well.
For these tests, it means we support one less file in a watchable mount.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 13:04:26 -08:00
Eric Ernst
5bc1c209b2 watchers: don't dereference symlinks when copying files
The current implementation just copies the file, dereferencing any
simlinks in the process. This results in symlinks no being preserved,
and a change in layout relative to the mount that we are making
watchable.

What we want is something like "cp -d"

This isn't available in a crate, so let's go ahead and introduce a copy
function which will create a symlink with same relative path if the
source file is a symlink. Regular files are handled with the standard
fs::copy.

Introduce a unit test to verify symlinks are now handled appropriately.

Fixes: #2950

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-11-19 13:04:24 -08:00
Fabiano Fidêncio
b2851ffc9c Merge pull request #3082 from Amulyam24/kernel_vfio
stable-2.3: add VFIO kernel dependencies for ppc64le
2021-11-19 17:26:23 +01:00
Fabiano Fidêncio
45eafafdf3 Merge pull request #3076 from c3d/backport/3074-containerd-update
[stable] runtime: Update containerd to 1.5.8
2021-11-19 10:39:15 +01:00
Amulyam24
34a1b5396a stable-2.3: add VFIO kernel dependencies for ppc64le
Recently added VFIO kernel configs require addtional
dependencies on pcc64le.

Fixes: #2991

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-11-19 11:29:10 +05:30
Greg Kurz
f1cd3b6300 Merge pull request #3070 from gkurz/backport-snap-udev
stable-2.3: disable libudev when building static QEMU
2021-11-18 22:18:41 +01:00
Greg Kurz
e0b74bb413 Merge pull request #3072 from gkurz/backport-template-test
stable-2.3: virtcontainers: fix failing template test on ppc64le
2021-11-18 21:29:02 +01:00
Christophe de Dinechin
8a705f74b5 runtime: Update containerd to 1.5.8
Release 1.5.8 of containerd contains fixes for two low-severity advisories:

[GHSA-5j5w-g665-5m35](https://github.com/opencontainers/distribution-spec/security/advisories/GHSA-mc8v-mgrf-8f4m)
[GHSA-77vh-xpmg-72qh](https://github.com/opencontainers/image-spec/security/advisories/GHSA-77vh-xpmg-72qh)

Fixes: #3074

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-11-18 19:30:36 +01:00
Amulyam24
ac5ab86ebd qemu: fix snap build by disabling libudev
While building snap, static qemu is considered. Disable libudev
as it doesn't have static libraries on most of the distros of all
archs.

Backport-from: #3003
Fixes: #3002

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
(cherry picked from commit 112ea25859)
Signed-off-by: Greg Kurz <groug@kaod.org>
2021-11-18 17:50:58 +01:00
Amulyam24
d22ec59920 virtcontainers: fix failing template test on ppc64le
If a file/directory doesn't exist, os.Stat() returns an
error. Assert the returned value with os.IsNotExist() to
prevent it from failing.

Backport-from: #2921
Fixes: #2920

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
(cherry picked from commit d5a18173b9)
Signed-off-by: Greg Kurz <groug@kaod.org>
2021-11-18 16:05:18 +01:00
snir911
440657b36d Merge pull request #3037 from snir911/stable-fix-cgroups
stable-2.3: cgroups systemd fix
2021-11-15 12:19:58 +02:00
snir911
0c00a9d463 Merge pull request #3039 from snir911/stable-2.3-remove-non-used-actions
stable-2.3:remove non used actions
2021-11-15 11:09:33 +02:00
Fabiano Fidêncio
f9bde321e9 workflows: Remove non-used main.yaml
The main.yaml workflow was created and used only on 1.x.  We inherited
it, but we didn't remove it after deprecating the 1.x repos.

While here, let's also update the reference to the `main.yaml` file,
and point to `release.yaml` (the file that's actually used for 2.x).

Fixes: #3033

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-14 10:33:19 +02:00
Snir Sheriber
b821511992 cgroups: Fix systemd cgroup support
As github.com/containerd/cgroups doesn't support scope
units which are essential in some cases lets create
the cgroups manually and load it trough the cgroups
api
This is currently done only when there's single sandbox
cgroup (sandbox_cgroup_only=true), otherwise we set it
as static cgroup path as it used to be (until a proper
soultion for overhead cgroup under systemd will be
suggested)

Backport-from: #2959
Fixes: #2868
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-11-14 09:41:35 +02:00
Snir Sheriber
a9d5377bd9 cgroups: pass vhost-vsock device to cgroup
for the sandbox cgroup

Backport-from: #2959
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-11-14 09:41:22 +02:00
Snir Sheriber
ea83ff1fc3 runtime: remove prefix when cgroups are managed by systemd
as done previously in 9949daf4dc

Backport-from: #2959
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-11-14 09:37:24 +02:00
Fabiano Fidêncio
03f7a5e49b Merge pull request #3026 from fidencio/wip/stable-2.3-backport-golang-bump
stable-2.3 | versions: bump golang to 1.17.x
2021-11-13 00:08:12 +01:00
Fabiano Fidêncio
91003c2751 versions: bump golang to 1.17.x
According to https://endoflife.date/go golang 1.15 is not supported
anymore.  Let's remove it from out tests, add 1.17.x, and bump the
newest version known to work when building kata to 1.17.3.

Fixes: #3016

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
(cherry picked from commit 395638c4bc)
2021-11-11 22:27:59 +01:00
Eric Ernst
57ffe14940 Merge pull request #3021 from ManaSugi/fix-yq-for-2.3
stable-2.3 | release: Use ${GOPATH}/bin/yq for upload-libseccomp-tarball action
2021-11-11 11:39:02 -08:00
Manabu Sugimoto
5e9b807ba0 release: Use ${GOPATH}/bin/yq for upload-libseccomp-tarball action
We need to explicitly call `${GOPATH}/bin/yq` that is installed by
`ci/install_yq.sh`.

Fixes: #3014

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
(cherry picked from commit 3430723594)
2021-11-11 23:46:37 +09:00
Fabiano Fidêncio
de6fe98ec0 Merge pull request #3010 from fidencio/2.3.0-rc1-branch-bump
# Kata Containers 2.3.0-rc1
2021-11-10 21:44:58 +01:00
Fabiano Fidêncio
de0eea5f44 release: Kata Containers 2.3.0-rc1
- stable-2.3 | runtime: Revert "runtime: use containerd package instead of cri-containerd

96b66d2c docs: Fix typo
62a51d51 runtime: Revert "runtime: use containerd package instead of cri-containerd"

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-10 19:01:14 +01:00
Fabiano Fidêncio
73d7929c10 Merge pull request #3008 from fidencio/wip/backport-crioption-fix
stable-2.3 | runtime: Revert "runtime: use containerd package instead of cri-containerd
2021-11-10 17:10:29 +01:00
James O. D. Hunt
96b66d2cb4 docs: Fix typo
Correct a typo identified by the static checker's spell checker.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
(cherry picked from commit b09dd7a883)
Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-10 15:58:34 +01:00
Peng Tao
62a51d51a2 runtime: Revert "runtime: use containerd package instead of cri-containerd"
This reverts commit 76f16fd1a7 to bring
back cri-containerd crioptions parsing so that kata works with older
containerd versions like v1.3.9 and v1.4.6.

Fixes: #2999
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
(cherry picked from commit eacfcdec19)
Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2021-11-10 13:42:38 +01:00
Fabiano Fidêncio
c9e6efb1e1 Merge pull request #2976 from bergwolf/2.3.0-rc0-branch-bump
# Kata Containers 2.3.0-rc0
2021-11-05 14:19:21 +01:00
James O. D. Hunt
4be2c8b190 Merge pull request #2602 from cmaf/upgrade-opentelemetry
vendor: update OpenTelemetry
2021-11-05 11:00:25 +00:00
Peng Tao
99c46be787 release: Kata Containers 2.3.0-rc0
- runtime# make sure the "Shutdown" trace span have a correct end
- tracing: Accept multiple dynamic tags
- logging: Enable agent debug output for release builds
- agent: "Revert agent: Disable seccomp feature on aarch64 temporarily"
- runtime: Enhancement for Makefile
- osbuilder: build image-builder image from Fedora 34
- agent: refactor process IO processing
- agent-ctl: Update for Hybrid VSOCK
- docs: Fix outdated links
- ci/install_libseccomp: Fix libseccomp build and misc improvement
- virtcontainers: simplify read-only mount handling
- runtime: add fast-test to let test exit on error
- test: Fix random failure for TestIoCopy
- cli: Show available guest protection in env output
- Update k8s, critools, and CRI-O to their 1.22 release
- package: assign proper value to redefined_string  in build-kernel.sh
- agent: Make wording of error message match CRI-O test suite
- docs: Moving from EOT to EOF
- virtcontainers: api: update the functions in the api.md docs
- release: Upload libseccomp sources with notice to release page
- virtcontainers: check that both initrd and image are not set
- agent: Fix the configuration sample file
- runtime: set tags for trace span
- agent-ctl: Implement Linux OCI spec handling
- runtime: Remove comments about unsupported features in config for clh
- tools/packaging: Add options for VFIO to guest kernel
- agent/runtime: Add seccomp feature
- ci: test-kata-deploy: Get rid of slash-command-action action
- This is to bump the OOT QAT 1.7 driver version to the latest version.…
- forwarder: Drop privileges when using hybrid VSOCK
- packaging/static-build: s390x fixes
- agent-ctl: improve the oci_to_grpc code
- agent: do not return error but print it if task wait failed
- virtcontainers: delete duplicated notify in watchHypervisor function
- agent: Handle uevent remove actions
- enable unit test on arm
- rustjail: Consistent coding style of LinuxDevice type
- cli: Fix outdated kata-runtime bash completion
- Allow VFIO devices to be used as VFIO devices in the container
- Expose top level hypervisor methods -
- Upgrade to Cloud Hypervisor v19.0
- docs: use-cases: Update Intel SGX use case
- virtcontainers: clh: Enable the `seccomp` feature
- runtime: delete cri containerd plugin from versions.yaml
- docs: Write tracing documentation
- runtime: delete useless src/runtime/cli/exit.go
- snap: add cloud-hypervisor and experimental kernel
- osbuilder: Call detect_rust_version() right before install_rust.sh
- docs: Updating Developer Guide re qemu-img
- versions: Add libseccomp and gperf version
- Enable agent tracing for hybrid VSOCK hypervisors
- runtime: optimize test code
- runtime: use containerd package instead of cri-containerd
- runtime: update sandbox root dir cleanup behavior in rootless hypervisor
- utils: kata-manager: Update kata-manager.sh for new containerd config
- osbuilder: Re-enable building the agent in Docker
- agent: Do not fail when trying to adding existing routes
- tracing: Fix typo in "package" tag name
- kata-deploy: add .dockerignore file
- runtime: change name in config settings back to "kata"
- tracing: Remove trace mode and trace type

09d5d88 runtime: tracing: Change method for adding tags
bcf3e82 logging: Enable agent debug output for release builds
a239a38 osbuilder: build image-builder image from Fedora 34
375ad2b runtime: Enhancement for Makefile
b468dc5 agent: Use dup3 system call in unit tests of seccomp
1aaa059 agent: "Revert agent: Disable seccomp feature on aarch64 temporarily"
1e331f7 agent: refactor process IO processing
9d3ec58 runtime: make sure the "Shutdown" trace span have a correct end
3f21af9 runtime: add fast-test to let test exit on error
9b270d7 ci/install_libseccomp: use a temporary work directory
98b4406 ci/install_libseccomp: Fix fail when DESTDIR is set
338ac87 virtcontainers: api: update the functions in the api.md docs
23496f9 release: Upload libseccomp sources with notice to release page
e610fc8 runtime: Remove comments about unsupported features in config for clh
7e40195 agent-ctl: Add stub for AddSwap API
82de838 agent-ctl: Update for Hybrid VSOCK
d1bcf10 forwarder: Remove quotes from socket path in doc
e66d047 virtcontainers: simplify read-only mount handling
bdf4824 tools/packaging: Add options for VFIO to guest kernel
c509a20 agent-ctl: Implement Linux OCI spec handling
42add7f agent: Disable seccomp feature on aarch64 temporarily
5dfedc2 docs: Add explanation about seccomp
45e7c2c static-checks: Add step for installing libseccomp
a3647e3 osbuilder: Set up libseccomp library
3be50ad agent: Add support for Seccomp
4280415 agent: Fix the configuration sample file
b0bc71f ci: test-kata-deploy: Get rid of slash-command-action action
309dae6 virtcontainers: check that both initrd and image are not set
a10cfff forwarder: Fix changing log level
6abccb9 forwarder: Drop privileges when using hybrid VSOCK
bf00b8d agent-ctl: improve the oci_to_grpc code
b67fa9e forwarder: Make explicit root check
e377578 forwarder: Fix docs socket path
5f30633 virtcontainers: delete duplicated notify in watchHypervisor function
5f5eca6 agent: do not return error but print it if task wait failed
d2a7b6f packaging/static-build: s390x fixes
6cc8000 cli: Show available guest protection in env output
2063b13 virtcontainers: Add func AvailableGuestProtections
a13e2f7 agent: Handle uevent remove actions
34273da runtime/device: Allow VFIO devices to be presented to guest as VFIO devices
68696e0 runtime: Add parameter to constrainGRPCSpec to control VFIO handling
d9e2e9e runtime: Rename constraintGRPCSpec to improve grammar
57ab408 runtime: Introduce "vfio_mode" config variable and annotation
730b9c4 agent/device: Create device nodes for VFIO devices
175f9b0 rustjail: Allow container devices in subdirectories
9891efc rustjail: Correct sanity checks on device path
d6b62c0 rustjail: Change mknod_dev() and bind_dev() to take relative device path
2680c0b rustjail: Provide useful context on device node creation errors
42b92b2 agent/device: Allow container devname to differ from the host
827a41f agent/device: Refactor update_spec_device_list()
8ceadcc agent/device: Sanity check guest IOMMU groups
ff59db7 agent/device: Add function to get IOMMU group for a PCI device
13b06a3 agent/device: Rebind VFIO devices to VFIO driver inside guest
e22bd78 agent/device: Add helper function for binding a guest device to a driver
b40eedc rustjail: Consistent coding style of LinuxDevice type
57c0f93 agent: fix race condition when test watcher
1a96b8b template: disable template unit test on arm
43b13a4 runtime: DefaultMaxVCPUs should not greater than defaultMaxQemuVCPUs
c59c367 runtime: current vcpu number should be limited
fa92251 runtime: kernel version with '+' as suffix panic in parse
52268d0 hypervisor: Expose the hypervisor itself
a72bed5 hypervisor: update tests based on createSandbox->CreateVM change
f434bcb hypervisor: createSandbox is CreateVM
76f1ce9 hypervisor: startSandbox is StartVM
fd24a69 hypervisor: waitSandbox is waitVM
a6385c8 hypervisor: stopSandbox is StopVM
f989078 hypervisor: resumeSandbox is ResumeVM
73b4f27 hypervisor: saveSandbox is SaveVM
7308610 hypervisor: pauseSandbox is nothing but PauseVM
8f78e1c hypervisor: The SandboxConsole is the VM's console
4d47aee hypervisor: Export generic interface methods
6baf258 hypervisor: Minimal exports of generic hypervisor internal fields
37fa453 osbuilder: Update QAT driver in Dockerfile
8030b6c virtcontainers: clh: Re-generate the client code
8296754 versions: Upgrade to Cloud Hypervisor v19.0
2b13944 docs: Fix outdated links
4f75ccb docs: use-cases: Update Intel SGX use case
4f018b5 runtime: delete useless src/runtime/cli/exit.go
7a80aeb docs: Moving from EOT to EOF
09a5e03 docs: Write tracing documentation
b625f62 runtime: delete cri containerd plugin from versions.yaml
24fff57 snap: make curl commands consistent
2b9f79c snap: add cloud-hypervisor and experimental kernel
273a1a9 runtime: optimize test code
76f16fd runtime: use containerd package instead of cri-containerd
6d55b1b docs: use containerd to replace cri-containerd
ed02bc9 packaging: add containerd to versions.yaml
50da26d osbuilder: Call detect_rust_version() right before install_rust.sh
b4fadc9 docs: Updating Developer Guide re qemu-img
b8e69ce versions: Add libseccomp and gperf version
17a8c5c runtime: Fix random failure for TestIoCopy
f34f67d osbuilder: Specify version when installing Rust
135a080 osbuilder: Pass CI env to container agent build
eb5dd76 osbuilder: Re-enable building the agent in Docker
bcffa26 tracing: Fix typo in "package" tag name
e61f5e2 runtime: Show socket path in kata-env output
5b3a349 trace-forwarder: Support Hybrid VSOCK
e42bc05 kata-deploy: add .dockerignore file
321be0f tracing: Remove trace mode and trace type
7d0b616 agent: Do not fail when trying to adding existing routes
3f95469 runtime: logging: Add variable for syslog tag
adc9e0b runtime: fix two bugs in rootless hypervisor
51cbe14 runtime: Add option "disable_seccomp" to config hypervisor.clh
98b7350 virtcontainers: clh: Enable the `seccomp` feature
46720c6 runtime: set tags for trace span
d789b42 package: assign proper value to redefined_string
4d7ddff utils: kata-manager: Update kata-manager.sh for new containerd config
f5172d1 cli: Fix outdated kata-runtime bash completion
d45c86d versions: Update CRI-O to its 1.22 release
c4a6426 versions: Update k8s & critools to v1.22
881b996 agent: Make wording of error message match CRI-O test suite

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-11-05 10:49:54 +00:00
Chelsea Mafrica
d17100aee6 vendor: update OpenTelemetry to v1.0.0
Upgrade from v0.20.0 to v1.0.0, first stable release.

    Git log

    4bfa0034 Release prep v1.0.0-RC3 (2218)
    c7ae470a Refactor SDK span creation and implementation (2213)
    db317fce Verify and update OTLP trace exporter documentation (2053)
    04de34a2 Update the website getting started docs (2203)
    a7b9d021 Rename metric instruments to match feature-freeze API specification (2202)
    1f527a52 Update trace API config creation functions (2212)
    361a2096 Fix RC2 header in changelog (2215)
    e209ee75 chore(exporter/zipkin): improves logging on invalid collector. (2191)
    c0c5ef65 Fix typos in resource.go. (2201)
    abf6afe0 Update otel example guide (2210)
    3b05ba02 Bump actions/setup-go from 2.1.3 to 2.1.4 (2206)
    bcd7ff7b Bump codecov/codecov-action from 2.0.2 to 2.0.3 (2205)
    c912b179 Print JSON objects to stdout without a wrapping array (2196)
    add511c1 Make WithoutTimestamps work (2195)
    85c27e01 Bump github.com/golangci/golangci-lint from 1.41.1 to 1.42.0 in /internal/tools (2199)
    bf6500b3 Bump google.golang.org/grpc from 1.39.1 to 1.40.0 in /exporters/otlp/otlptrace (2184)
    9392af96 Bump google.golang.org/grpc in /exporters/otlp/otlptrace/otlptracegrpc (2185)
    c95694dc Bump google.golang.org/grpc from 1.39.1 to 1.40.0 in /example/otel-collector (2183)
    0528fa66 Bump google.golang.org/grpc from 1.39.1 to 1.40.0 in /exporters/otlp/otlpmetric (2186)
    3a26ed21 Deprecate the oteltest package (2188)
    c885435f Website: support GH page links to canonical src (2189)
    6da20a27 Add cross-module test coverage (2182)
    dfc866bd Support capturing stack trace  (2163)
    41588fea Deprecate the attribute.Any function (2181)
    4e8d667f Support a single Resource per MeterProvider in the SDK (2120)
    a8bb0bf8 Make the tracetest.SpanRecorder concurrent safe (2178)
    87d09df3 Deprecate Array attribute in favor of *Slice types (2162)
    df384a9a Move InstrumentKind into the new metric/sdkapi package (2091)
    1cb5cdca Unify the OTLP attribute transform (2170)
    a882ee37 Clarify the attribute package documentation and order/grouping (2168)
    5d25c4d2 Add support for int32 in attribute.Any (2169)
    2b0e139e Refactor attributes benchmark tests (2167)
    4c7470d9 Bump google.golang.org/grpc from 1.39.0 to 1.39.1 in /exporters/otlp/otlptrace (2176)
    990c534a Bump google.golang.org/grpc in /example/otel-collector (2172)
    b45c9d31 Bump google.golang.org/grpc from 1.39.0 to 1.39.1 in /exporters/otlp/otlpmetric (2174)
    a3d4ff5c Deprecated the bridge/opencensus/utils package (2166)
    b1d1d529 Move OC bridge integration tests to own mod (2165)
    89a9489c Add OC bridge internal unit tests (2164)
    56c743ba Allow global ErrorHandler to be set multiple times (2160)
    d18c135f Add OpenCensus bridge internal package (2146)
    fcf945a4 Just a little typo fix in code documentation. (2159)
    59a82eba Update version.go (2157)
    21d4686f Add ErrorHandlerFunc to simplify creating ErrorHandlers (2149)
    23cb9396 Remove `internal/semconv-gen` (2155)
    39acab32 Fix code sample in otel.GetTraceProvider (2147)
    2b1bb29e Update OpenCensus bridge docs with limitations (2145)
    fd7c327b Fix Jaeger exporter agent port default value and docs (2131)
    b8561785 fix(2138): add guard to constructOTResources to return an empty resource (2139)
    11f62640 Add a SpanRecorder to the sdk/trace/tracetest (2132)
    fd9de7ec rename assertsocketbuffersize.go to *_test (2136)
    a6b4d90c nit doc fix (2135)
    79398418 pre-release v1.0.0-RC2 (2133)
    2501e0fd Use semconv.SchemaURL in STDOUT exporter example (2134)
    ef03dbc9 Bump codecov/codecov-action from 1 to 2.0.2 (2129)
    bbe6ca40 Deprecate oteltest.Harness for removal (2123)
    7a624ac2 Deprecated the oteltest.TraceStateFromKeyValues function (2122)
    ece1879f Removed dropped link's attributes field from API package (2118)
    03902d98 Rename sdk/trace/tracetest test.go -> exporter.go (2128)
    cb607b0a Unify OTLP exporter retry logic (2095)
    abe22437 API: create new linked span from current context (2115)
    db81d4aa Update internal/global/trace testing (2111)
    7f10ef72 Remove propagation testing types from oteltest (2116)
    25d739b0 Remove resource.WithBuiltinDetectors() which has not been maintained (2097)
    d57c5a56  Remove several metrics test helpers (2105)
    49359495 Simplify trace_context tests (2108)
    56d42011 Simplify trace context benchmark test (2109)
    63dfe64a Correct status transform in OTLP exporter (2102)
    9b1a5f70 Performance improvement: avoid creating multiple same read-only objects (2104)
    ab78dbd0 Update release URL (2106)
    647af3a0 Pre release experimental metrics v0.22.0 (2101)
    0a562337 Fixed OS type value for DragonFly BSD (2092)
    62c21ffb Bump golang.org/x/tools from 0.1.4 to 0.1.5 in /internal/tools (2096)
    4a3da55a Ensure sample code in website_docs getting started page works (2094)
    d3063a3d Update otel.Meter to global.Meter in Getting Started Document.(2087) (2093)
    00a1ec5f Add documentation guidelines and improve Jaeger exporter readme (2082)
    12f737c7 oteltest: ensure valid SpanContext created for span started WithNewRoot (2073)
    484258eb OS description attribute detector (1840)
    d8c9a955 Bump google.golang.org/grpc from 1.38.0 to 1.39.0 in /example/otel-collector (2054)
    4ffdf034 Add @pellard as an Approver (2047)
    1a74b399 Bump google.golang.org/protobuf from 1.26.0 to 1.27.0 in /exporters/otlp/otlpmetric (2040)
    57c2e8fb Bump golang.org/x/tools from 0.1.3 to 0.1.4 in /internal/tools (2036)
    7cff31a9 Bump google.golang.org/protobuf from 1.26.0 to 1.27.0 in /exporters/otlp/otlptrace (2035)
    9e8f523d when using WithNewRoot, don't use the parent context for sampling (2032)
    62af6c70 semconv-gen: fix capitalization at word boundaries, add stability/deprecation indicators (2033)
    0bceed7e Fix docs on otel-collector example (2034)
    6428cd69 Update doc.go (2030)
    311a6396 fix documentation for trace.Status (2029)
    16f83ce6 export ToZipkinSpanModels for use outside this library (2027)
    d5d4c87f Add HTTP metrics exporter for OTLP (2022)
    d6e8f60f Bump github.com/golangci/golangci-lint from 1.40.1 to 1.41.1 in /internal/tools (2023)
    51dbe3cb Remove deprecated exporters (2020)
    257ef7fc Update project status in README (2017)
    ced177b7 Pre-release 1.0.0-RC1 (2013)
    694c9a41 Interface stability documentation (2012)
    39fe8092 Add span.TracerProvider() (2009)
    d020e1a2 Add more tests for go.opentelemetry.io/otel/trace package. (2004)
    6d4a38f1 replace WithSyncer with WithBatcher in opencensus example (2007)
    c30cd1d0 Split stdout exporter into stdouttrace and stdoutmetric (2005)
    80ca2b1e otlp: mark unix endpoints to work without transport security (2001)
    65140985 Update codecov ignore (2006)
    3be9813d Deprecate the exporters in the "trace" and "metric" sub-directories (1993)
    377f7ce4 remove WithTrace* options from otlptrace exporters (1997)
    b33edaa5 OTLP metrics gRPC exporter (1991)
    64b640cc Remove old OTLP exporter (1990)
    7728a521 Remove dependency on metrics packages (1988)
    135ac4b6 Moved internal/tools duplicated findRepoRoot function to common package (1978)
    cdf67ddf Update semantic conventions to v1.4.0, move to versioned package (1987)
    4883cb11 Refactor exporter creation functions (1985)
    87cc1e1f Test BatchSpanProcessor export timeout directly (1982)
    7ffe2845 Added inputPath validation to semconv-gen (1986)
    a113856a Add caveat about installing opencensus bridge (1983)
    741cb9a3 Fix generator.go call typo in RELEASING.md (1977)
    7a0cee7b Replaces golint by revive and fix newly reported linter issues (1946)
    46d9687a Add Schema URL support to Resource (1938)
    0827aa62 Use mock server as jaeger agent listener. (1930)
    20886012 Bugfix jaeger exporter test panic (1973)
    4bf6150f Add baggage implementation based on the W3C and OpenTelemetry specification (1967)
    bbe2b8a3 Bump github.com/itchyny/gojq from 0.12.3 to 0.12.4 in /internal/tools (1971)
    4949bf05 Bump github.com/cenkalti/backoff/v4 from 4.1.0 to 4.1.1 in /exporters/otlp/otlptrace (1972)
    015b4c17 Bump github.com/cenkalti/backoff/v4 from 4.1.0 to 4.1.1 in /exporters/otlp (1970)
    13eb12ac Bump github.com/prometheus/client_golang from 1.10.0 to 1.11.0 in /exporters/metric/prometheus (1974)
    2371bb0a add otlp trace http exporter (1963)
    a75ade4e sdk/resource: honor OTEL_SERVICE_NAME in fromEnv resource detector (1969)
    aed45802 Bump go.opentelemetry.io/proto/otlp from 0.8.0 to 0.9.0 in /exporters/otlp/otlptrace (1959)
    c4ebae6a Bump go.opentelemetry.io/proto/otlp (1960)
    b1d2be3b Bump google.golang.org/grpc from 1.37.1 to 1.38.0 in /exporters/otlp/otlptrace (1958)
    f6daea5e Generate semantic conventions according to specification latest tagged version (1933)
    435a63b3 Bump github.com/google/go-cmp from 0.5.5 to 0.5.6 (1954)
    6c46af66 Bump github.com/google/go-cmp from 0.5.5 to 0.5.6 in /exporters/trace/jaeger (1953)
    4d294853 Bump actions/cache from 2.1.5 to 2.1.6 (1952)
    dfe2b6f1 OTLP trace gRPC exporter (1922)
    5a8f7ff7 Bump go.opentelemetry.io/proto/otlp from 0.8.0 to 0.9.0 in /exporters/otlp (1943)
    bd935866 Add schema URL support to Tracer (1889)
    c1f460e0 Update API configs. (1921)
    270cc603 Small fixes on some Span method's documentation headers (1950)
    8603b902 Fix typo in doc (1949)
    acbb1882 Bump google.golang.org/grpc from 1.37.1 to 1.38.0 in /exporters/otlp (1942)
    b1621501 Add codecov badge (1940)
    ea1434c3 Fix some golint issues (1947)
    0eeb8f87 Refactor Tracestate (1931)
    d3b12808 Add Passthrough example (1912)
    f06cace6 Add @MadVikingGod as a project Approver (1923)
    ab5facb3 Bump github.com/golangci/golangci-lint in /internal/tools (1925)
    d23cc61b Refactor configs (1882)
    6324adaa Add tracer option argument to global Tracer function (1902)
    035fc650 Do not include authentication information in the http.url attribute (1919)
    d8ac212c Fix sporadic test failure in otlp exporter http driver (1906)
    a3df00f4 Create .gitattributes (1920)
    fb88e926 Bump google.golang.org/grpc from 1.37.0 to 1.37.1 in /exporters/otlp (1914)
    1982dc46 Bump google.golang.org/grpc in /example/prom-collector (1915)
    1759c630 Bump github.com/golangci/golangci-lint in /internal/tools (1916)
    7342aa47 Bump google.golang.org/grpc in /example/otel-collector (1913)
    21c16418 Add support for scheme in OTEL_EXPORTER_OTLP_ENDPOINT (1886)
    5cb62636 Semantic Convention generation tooling (1891)
    6219221f Move the unit package to the metric module (1903)
    63e0ecfc Implement global default non-recording span (1901)
    b6d5442f Remove the Tracer method from the Span API (1900)
    ae85fab3 Document functional options (1899)
    cabf0c07 Fix default Jaeger collector endpoint (1898)
    1e3fa3a3 Bump go.opentelemetry.io/proto/otlp from 0.7.0 to 0.8.0 in /exporters/otlp (1872)
    696af787 Bump github.com/benbjohnson/clock from 1.0.3 to 1.1.0 in /sdk/metric (1532)
    97eea6c3 Fix some golint issues (1894)
    79d9852e fix container port mismatch issue (1895)
    d20e7228 CI builds validate against last two versions of Go, dropping 1.14 and adding 1.16 (1865)
    cbcd4b1a Redefine ExportSpans of SpanExporter with ReadOnlySpan (1873)
    c99d5e99 Split large jaeger span batch to admire the udp packet size limit  (1853)
    42a84509 Unembed SpanContext (1877)
    b7d02db1 Add Status type to SDK (1874)
    f90d0d93 Update README (1876)
    a1349944 Update resource.go (1871)
    f40cad5e Add markdown link check configuration and action (1869)
    9bc28f6b Fix existing markdown lint issues (1866)
    08f4c270 Add documentation for tracer.Start() (1864)
    2bd4840c remove Set.Encoded(Encoder) enconding cache (1855)
    7674eebf Removed different types of Detectors for Resources. (1810)
    f92a6d83 Implement retry policy for the OTLP/gRPC  exporter (1832)
    ec75390f Fix BSP context done tests (1863)
    8e55f10a Move the Event type from the API to the SDK (1846)
    e399d355 drop failed to exporter batches and return error when forcing flush a span processor (1860)
    f6a9279a Honor context deadline or cancellation in SimpleSpanProcessor.Shutdown (1856)
    aeef8e00 Add markdown lint GitHub action (1849)
    d4c8ffad Replace spaces to tabs in Go code snippets (1854)
    cb097250 fixed typo (1857)
    392a44fa Refine configuration design docs (1841)
    62cd933d Handle Resource env error when non-nil (1851)
    24a91628 Document the SSP is not for production use (1844)
    ec26ac23 Update RELEASING.md (1843)
    8eb0bb99 Fix golint issue caused by typo (1847)
    ca130e54 Markdownlint (1842)
    1144a83d Small typo fixes to existing CHANGELOG entries (1839)
    e6086958 Update website_docs to v0.20.0 (1838)
    0f4e454c Change NewSplitDriver paramater and initialization (1798)
    92551d39 Prerelease v1.0.0 (2250)
    61839133 zipkin: remove no-op WithSDKOptions (2248)
    568e7556 Set Schema URL when exporting traces to OTLP (2242)
    ec26b556 Fix RC tags in docs (2239)
    767ce26c Bump github.com/itchyny/gojq from 0.12.4 to 0.12.5 in /internal/tools (2216)
    fe7058da adding NewNoopMeterProvider to follow trace api (2237)
    c338a5ef Bump github.com/golangci/golangci-lint from 1.42.0 to 1.42.1 in /internal/tools (2236)
    ef126f5c Remove deprecated Array from attribute package (2235)
    360d1302 Add tests for nil *Resource (2227)
    9e7812d1 Remove the deprecated oteltest package (2234)
    486afd34 Remove the deprecated bridge/opencensus/utils pkg (2233)
    eaacfaa8 Fix slice-valued attributes when used as map keys (2223)
    df2bdbba Fix the import comments of otelpconfig (2224)
    7aae2a02 otlptrace: Document supported environment variables (2222)

Fixes #2591

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-11-04 12:39:00 -07:00
Chelsea Mafrica
84ccdd8ef2 vendor: update OpenTelemetry to v0.20.0
Update OpenTelemetry from v0.15.0 to v0.20.0.

    Git log

    02d8bdd5 Release v0.20.0 (1837)
    aa66fe75 OS and Process resource detectors (1788)
    7374d679 Fix Links documents (1835)
    856f5b84 Add feature request issue template (1831)
    0fdc3d78 Remove bundler from Jaeger exporter (1830)
    738ef11e Fix flaky global ErrorHandler delegation test (1829)
    e43d9c00  Update Default Value for Jaeger Exporter Endpoint  (1824)
    0032bd64 Fix default merging of resource attributes from environment variable (1785)
    96c5e4ba Add SpanProcessor example for Span annotation on start (1733)
    543c8144 Remove the WithSDKOptions from the Jaeger exporter (1825)
    66389ad6 Update function docs in sdk.go (1826)
    70bc9eb3 Adds support for timeout on the otlp/gRPC exporter (1821)
    081cc61d Update Jaeger exporter convenience functions (1822)
    1b9f16d3 Remove the WithDisabled option from Jaeger exporter (1806)
    6867faa0 Bump actions/cache from v2.1.4 to v2.1.5 (1818)
    a2bf04dc Build context pipeline in Jaeger upload process (1809)
    2de86f23 Remove locking from Jaeger exporter shutdown/export (1807)
    4f9fec29 Add ExportSpans benchmark to Jaeger exporter (1805)
    d9566abe Fix OTLP testing flake: signal connection from mock collector (1816)
    a2cecb6e add support for env var configuration to otlp/gRPC (1811)
    d616df61 Fix flaky OTLP exporter reconnect test (1814)
    b09df84a Changes stdout to expose the `*sdktrace.TracerProvider` (1800)
    04890608 Remove options field from Jaeger exporter (1808)
    6db20e00 Remove the abandoned Process struct in Jaeger exporter (1804)
    086abf34 docs: use test example to document prometheus.InstallNewPipeline (1796)
    d0cea04b Bump google.golang.org/api from 0.43.0 to 0.44.0 in /exporters/trace/jaeger (1792)
    99c477fe Fixed typo for default service name in Jaeger Exporter (1797)
    95fd8f50 Bump google.golang.org/grpc from 1.36.1 to 1.37.0 in /exporters/otlp (1791)
    9b251644 Zipkin Exporter: Use default resouce's serviceName as default serivce name (1777) (1786)
    4d141e47 Add k8s.node.name and k8s.node.uid to semconv (1789)
    5c99a34c Fix golint issue caused by incorrect comment (1795)
    c5d006c0 Update Jaeger environment variables (1752)
    58432808 add NewExportPipeline and InstallNewPipeline for otlp (1373)
    7d8e6bd7 Zipkin Exporter: Adjust span transformation to comply with the spec (1688)
    2817c091 Merge sdk/export/trace into sdk/trace (1778)
    c61e654c Refactor prometheus exporter tests to match file headers as well (1470)
    23422c56 Remove process config for Jaeger exporter (1776)
    0d49b592 Add test to check bsp ignores `OnEnd` and `ForceFlush` post Shutdown` (1772)
    e9aaa04b Record links/events attribute drops independently (1771)
    5bbfc22c Make ExportSpans for Jaeger Exporter honor deadline (1773)
    0786fe32 Add Bug report issue templates (1775)
    3c7facee Add `ExportTimeout` option to batch span processor (1755)
    c6b92d5b Make TraceFlags spec-compliant (1770)
    ee687ca5 Bump github.com/itchyny/gojq from 0.12.2 to 0.12.3 in /internal/tools (1774)
    52a24774 add support for configuring tls certs via env var to otlp/HTTP (1769)
    35cfbc7e Update precedence of event name in Jaeger exporter (1768)
    33699d24 Adds semantic conventions for exceptions (1492)
    928e3c38 Modify ForceFlush to abort after timeout/cancellation (1757)
    3947cab4 Fix testCollectorEndpoint typo and add tag assertions in jaeger_test (1753)
    ecc635dc add website docs (1747)
    07a8d195 Fix Jaeger span status reporting and unify tag keys (1761)
    4fa35c90 add partial support for env var config to otlp/HTTP (1758)
    bf180d0f improve OTLP/gRPC connection errors (1737)
    d575865b Fix span IsRecording when not sampling (1750)
    20c93b01 Update SamplingParameters (1749)
    97501a3f Update SpanSnapshot to use parent SpanContext (1748)
    604b05cb Store current Span instead of local and remote SpanContext in context.Context (1731)
    c61f4b6d Set @lizthegrey to emeritus status (1745)
    b1342fec Bump github.com/golangci/golangci-lint in /internal/tools (1743)
    54e1bd19 Bump google.golang.org/api from 0.41.0 to 0.43.0 in /exporters/trace/jaeger (1741)
    4d25b6a2 Bump github.com/prometheus/client_golang from 1.9.0 to 1.10.0 in /exporters/metric/prometheus (1740)
    0a47b66f Bump google.golang.org/grpc from 1.36.0 to 1.36.1 in /exporters/otlp (1739)
    26f006b8 Reinstate @paivagustavo as an Approver (1734)
    382c7ced Remove hasRemoteParent field from SDK span (1728)
    862a5a68 Remove setting error status while recording error with Span from oteltest package (1729)
    6defcfdf Remove links on NewRoot spans (1726)
    a9b2f851 upgrade thrift to v0.14.1 in jaeger exporter (1712)
    5a6a854d Bump google.golang.org/protobuf from 1.25.0 to 1.26.0 in /exporters/otlp (1724)
    23486213 Migrate to using go.opentelemetry.io/proto/otlp (1713)
    5d559b40 Remove makeSamplingDecision func (1711)
    e24702da Update the TraceContext.Extract docs (1720)
    9d4eb1f6 Update dates in CHANGELOG.md for 2021 releases (1723)
    2b4fa968 Release v0.19.0 (1710)
    4beb7041 sdk/trace: removing ApplyConfig and Config (1693)
    1d42be16 Rename WithDefaultSampler TracerProvider option to WithSampler and update docs (1702)
    860d5d86 Add flag to determine whether SpanContext is remote (1701)
    0fe65e6b Comply with OpenTelemetry attributes specification (1703)
    88884351 Bump google.golang.org/api from 0.40.0 to 0.41.0 in /exporters/trace/jaeger (1700)
    345f264a breaking(zipkin): removes servicName from zipkin exporter. (1697)
    62cbf0f2 Populate Jaeger's Span.Process from Resource (1673)
    28eaaa9a Add a test to prove the Tracer is safe for concurrent calls (1665)
    8b1be11a Rename resource pkg label vars and methods (1692)
    a1539d44 OpenCensus metric exporter bridge (1444)
    77aa218d Fix issue #1490, apply same logic as in the SDK (1687)
    9d3416cc Fix synchronization issues in global trace delegate implementation (1686)
    58f69f09 Span status from HTTP code: Do not set status message if it can be inferred (1681)
    9c305bde Flush metric events prior to shutdown in OTLP example (1678)
    66b1135a Fix CHANGELOG (1680)
    90bd4ab5 Update employer information for maintainers (1683)
    36841913 Remove WithRecord() option from trace.SpanOption when starting a span (1660)
    65c7de20 Remove trace prefix from NoOp src files. (1679)
    e88a091a Make SpanContext Immutable (1573)
    d75e2680 Avoid overriding configuration of tracer provider (1633)
    2b4d5ac3 Bump github.com/golangci/golangci-lint in /internal/tools (1671)
    150b868d Bump github.com/google/go-cmp from 0.5.4 to 0.5.5 (1667)
    76aa924e Fix the examples target info messaging (1676)
    a3aa9fda Bump github.com/itchyny/gojq from 0.12.1 to 0.12.2 in /internal/tools (1672)
    a5edd79e Removed setting error status while recording err as span event (1663)
    e9814758 chore(zipkin): improves zipkin example to not to depend on timeouts. (1566)
    3dc91f2d Add ForceFlush method to TracerProvider (1608)
    bd0bba43 exporter: swap pusher for exporter (1656)
    56904859 Update the SimpleSpanProcessor (1612)
    a7f7abac  SpanStatus description set only when status code is set to Error (1662)
    05252f40 Jaeger Exporter: Fix minor mapping discrepancies (1626)
    238e7c61 Add non-empty string check for attribute keys (1659)
    e9b9aca8 Add tests for propagation of Sampler Tracestate changes (1655)
    875a2583 Add docs on when reviews should be cleared (1556)
    7153ef2d Add HTTP/JSON to the otlp exporter (1586)
    62e2a0f7 Unexport the simple and batch SpanProcessors (1638)
    992837f1 Add TracerProvider tests to oteltest harness (1607)
    bb4c297e Pre release v0.18.0 (1635)
    712c3dcc Fix makefile ci target and coverage test packages (1634)
    841d2a58 Rename local var new to not collide with builtin (1610)
    13938ab5 Update SpanProcessor docs (1611)
    e25503a0 Add compatibility tests to CI (1567)
    1519d959 Use reasonable interval in sdktrace.WithBatchTimeout (1621)
    7d4496e0 Pass metric labels when transforming to gaugeArray (1570)
    6d4a5e0d Bump google.golang.org/grpc from 1.35.0 to 1.36.0 in /exporters/otlp (1619)
    a93393a0 Bump google.golang.org/grpc in /example/prom-collector (1620)
    e499ca86 Fix validation for tracestate with vendor and add tests (1581)
    43886e52 Make timestamps sequential in lastvalue agg check (1579)
    37688ef6 revent end-users from implementing some interfaces (1575)
    85e696d2 Updating documentation with an working example for creating NewExporter (1513)
    562eb28b Unify the Added sections of the unreleased changes (1580)
    c4cf1aff Fix Windows build of Jaeger tests (1577)
    4a163bea Fix stdout TestStdoutTimestamp failure with sleep (1572)
    bd4701eb Stagger timestamps in exact aggregator tests (1569)
    b94cd4b2 add code attributes to semconv package (1558)
    78c06cef Update docs from gitter to slack for communication (1554)
    1307c911 Remove vendor exclude from license-check (1552)
    5d2636e5 Bump github.com/golangci/golangci-lint in /internal/tools (1565)
    d7aff473 Vendor Thrift dependency (1551)
    298c5a14 Update span limits to conform with OpenTelemetry specification (1535)
    ecf65d79 Rename otel/label -> otel/attribute (1541)
    1b5b6621 Remove resampling on span.SetName (1545)
    8da52996 fix: grpc reconnection  (1521)
    3bce9c97 Add Keys() method to propagation.TextMapCarrier (1544)
    0b1a1c72 Make oteltest.SpanRecorder into a concrete type (1542)
    7d0e3e52 SDK span no modification after ended (1543)
    7de3b58c Remove extra labels types (1314)
    73194e44 Bump google.golang.org/api from 0.39.0 to 0.40.0 in /exporters/trace/jaeger (1536)
    8fae0a64 Create resource.Default() with required attributes/default values (1507)
    76f93422 Release v0.17.0 (1534)
    9b242bc4 Organize API into Go modules based on stability and dependencies (1528)
    e50a1c8c Bump actions/cache from v2 to v2.1.4 (1518)
    a6aa7f00 Bump google.golang.org/api from 0.38.0 to 0.39.0 in /exporters/trace/jaeger (1517)
    38efc875 Code Improvement - Error strings should not be capitalized (1488)
    6b340501 Update default branch name (1505)
    b39fd052 nit: Fix comment to be up-to-date (1510)
    186c2953 Fix golint error of package comment form (1487)
    9308d662 Bump google.golang.org/api from 0.37.0 to 0.38.0 in /exporters/trace/jaeger (1506)
    1952d7b6 Reverse order of attribute precedence when merging two Resources (1501)
    ad7b4715 Remove build flags for runtime/trace support (1498)
    4bf4b690 Remove inaccurate and unnecessary import comment (1481)
    7e19eb6a Bump google.golang.org/api from 0.36.0 to 0.37.0 in /exporters/trace/jaeger (1504)
    c6a4406a Bump github.com/golangci/golangci-lint in /internal/tools (1503)
    9524ac09 Update workflows to include main branch as trigger (1497)
    c066f15e Bump github.com/gogo/protobuf from 1.3.1 to 1.3.2 in /internal/tools (1478)
    894e0240 Bump github.com/golangci/golangci-lint in /internal/tools (1477)
    71ffba39 Bump google.golang.org/grpc from 1.34.0 to 1.35.0 in /exporters/otlp (1471)
    515809a8 Bump github.com/itchyny/gojq from 0.12.0 to 0.12.1 in /internal/tools (1472)
    3e96ad1e gitignore: remove unused example path (1474)
    c5622777 Histogram aggregator functional options (1434)
    0df8cd62 Rename Makefile.proto to avoid interpretation as proto file (1468)
    979ff51f Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 (1453)
    1df8b3b8 Bump github.com/gogo/protobuf from 1.3.1 to 1.3.2 in /exporters/otlp (1456)
    4c30a90a Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 in /sdk (1455)
    5a9f8f6e Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 in /exporters/stdout (1454)
    7786f34c Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 in /exporters/trace/zipkin (1457)
    4352a7a6 Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 in /exporters/otlp (1460)
    6990b3b3 Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 in /exporters/metric/prometheus (1461)
    7af40d22 Bump github.com/stretchr/testify from 1.6.1 to 1.7.0 in /exporters/trace/jaeger (1463)
    f16f1892 Bump google.golang.org/grpc in /example/otel-collector (1465)
    fe363be3 Move Span Event to API (1452)
    43922240 Bump google.golang.org/grpc in /example/prom-collector (1466)
    0aadfb27 Prepare release v0.16.0 (1464)
    207587b6 Metric histogram aggregator: Swap in SynchronizedMove to avoid allocations (1435)
    c29c6fd1 Shutdown underlying span exporter while shutting down BatchSpanProcessor (1443)
    dfece3d2 Combine the Push and Pull metric controllers (1378)
    74deeddd Handle tracestate in TraceContext propagator  (1447)
    49f699d6 Remove Quantile aggregation, DDSketch aggregator; add Exact timestamps (1412)
    9c949411 Rename internal/testing to internal/internaltest (1449)
    8d809814 Move gRPC driver to a subpackage and add an HTTP driver (1420)
    9332af1b Bump github.com/golangci/golangci-lint in /internal/tools (1445)
    5ed96e92 Update exporters/otlp Readme.md (1441)
    bc9cb5e3 Switch CircleCI badge to GitHub Actions (1440)
    716ad082 Remove CircleCI config (1439)
    0682db1e Adding Security Workflows to GitHub Actions (2/2): gosec workflow (1429)
    11f732b8 Adding Security Workflows to GitHub Actions (1/2): codeql workflow (1428)
    40f1c003 Add Tracestate into the SamplingResult struct (1432)
    db06c8d1 Flush metric events before shutdown in collector example (1438)
    f6f458e1 Fix golint issue caused by typo in trace.go (1436)
    fe9d1f7e Use uint64 Count consistently in metric aggregation (1430)
    3a337d0b Bump github.com/golangci/golangci-lint in /internal/tools (1433)
    1e4c8321 cleanup: drop the removed examples in gitignore (1427)
    5c9221cf Unify endpoint API that related to OTel exporter (1401)
    045c3ffe Build scripts: Replace mapfile with read loop for old bash versions (1425)
    2def8c3d Add Versioning Documentation (1388)
    6bcd1085 Bump github.com/itchyny/gojq from 0.11.2 to 0.12.0 in /internal/tools (1424)
    38e76efe Add a split protocol driver for otlp exporter (1418)
    439cd313 Add TraceState to SpanContext in API (1340)
    35215264 Split connection management away from exporter (1369)
    add9d933 Bump github.com/prometheus/client_golang from 1.8.0 to 1.9.0 in /exporters/metric/prometheus (1414)
    93d426a1 Add @dashpole as a project Approver (1410)
    6fe20ef3 Fix small typo (1409)
    b22d0d70 Mention the getting started guide (1406)
    3fb80fb2 Fix duplicate checkout action in GitHub workflow (1407)
    2051927b Correct CI workflow syntax (1403)
    f11a86f7 Fix typo in comment (1402)
    bdf87a78 Migrate CircleCI ci.yml workflow to GitHub Actions (1382)
    4e59dd1f Bump google.golang.org/grpc from 1.32.0 to 1.34.0 in /example/otel-collector (1400)
    83513f70 Bump google.golang.org/api from 0.32.0 to 0.36.0 in /exporters/trace/jaeger (1398)
    a354fc41 Bump github.com/prometheus/client_golang from 1.7.1 to 1.8.0 in /exporters/metric/prometheus (1397)
    3528e42c Bump google.golang.org/grpc from 1.32.0 to 1.34.0 in /exporters/otlp (1396)
    af114baf Call otel.Handle with non-nil errors (1384)
    c3c4273e Add RO/RW span interfaces (1360)

Fixes #2591

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-11-04 12:30:45 -07:00
Chelsea Mafrica
b5cfb73466 Merge pull request #2931 from YchauWang/wyc-runtime-shim2
runtime# make sure the "Shutdown" trace span have a correct end
2021-11-04 11:33:22 -07:00
Chelsea Mafrica
02181cb7d8 Merge pull request #2620 from cmaf/tracing-fix-addtag
tracing: Accept multiple dynamic tags
2021-11-04 11:33:06 -07:00
Chelsea Mafrica
09d5d8836b runtime: tracing: Change method for adding tags
In later versions of OpenTelemetry label.Any() is deprecated. Create
addTag() to handle type assertions of values. Change AddTag() to
variadic function that accepts multiple keys and values.

Fixes #2547

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-11-04 10:19:05 -07:00
GabyCT
f611785fdc Merge pull request #2967 from jodh-intel/enable-debug-logs
logging: Enable agent debug output for release builds
2021-11-04 10:04:59 -06:00
GabyCT
86b5bb5801 Merge pull request #2940 from ManaSugi/seccomp-aarch64
agent: "Revert agent: Disable seccomp feature on aarch64 temporarily"
2021-11-04 09:38:45 -06:00
James O. D. Hunt
bcf3e82cf0 logging: Enable agent debug output for release builds
Raise the `slog` maximum log level feature for release code from `info`
to `debug` by changing the `slog` maximum level features in the shared
`logging` crate. This allows the consumers of the `logging` crate (the
agent, the `trace-forwarder` and the `agent-ctl` tool) to produce debug
output when their debug options are enabled. Currently, those options
will essentially be a NOP (unless using a debug version of the code).

Testing showed that setting the `slog` maximum level features in the
rust manifest files for the consumers of the `logging` crate has no
impact: those values are ignored, so they have been removed and replaced
with a comment stating the levels are set in the `logging` crate.

Fixes: #2966.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-11-04 11:42:47 +00:00
Bin Liu
a7a47bd7d4 Merge pull request #2943 from liubin/fix/2942-add-golint-for-makefile
runtime: Enhancement for Makefile
2021-11-04 11:37:21 +08:00
GabyCT
fbe27d9097 Merge pull request #2962 from wainersm/image-builder-fix-1
osbuilder: build image-builder image from Fedora 34
2021-11-03 14:04:02 -06:00
Wainer dos Santos Moschetta
a239a38f45 osbuilder: build image-builder image from Fedora 34
Currently the image-builder image is built from `fedora:latest` and
this is error-prone as any update of the base image can lead to
breakage. Instead let's create the image from Fedora 34, which is the
last known version to build fine.

Fixes #2960
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-11-03 13:07:42 -04:00
bin
375ad2b2b6 runtime: Enhancement for Makefile
There are some issues with Makefile for runtime:

- default target can't be used as a dependent of other targets.
- empty target `check`

And also add two targets for locally development/tests.

- lint: run golangci-lint
- pre-commit: run lint and test

Fixes: #2942

Signed-off-by: bin <bin@hyper.sh>
2021-11-03 17:36:55 +08:00
Manabu Sugimoto
b468dc500a agent: Use dup3 system call in unit tests of seccomp
Use `dup3` system call instead of `dup2` in unit tests of seccomp
because `dup2` is obsolete on aarch64.

Fixes: #2939

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-11-03 15:49:23 +09:00
Tim Zhang
5e230a1cba Merge pull request #2945 from liubin/fix/2944-refactor-process-io
agent: refactor process IO processing
2021-11-03 14:35:04 +08:00
Manabu Sugimoto
1aaa0599d9 agent: "Revert agent: Disable seccomp feature on aarch64 temporarily"
Re-enable seccomp feature on aarch64 because CI is ready
by https://github.com/kata-containers/tests/pull/4124.

This reverts commit 42add7f201.

Fixes: #2939

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-11-02 22:53:38 +09:00
bin
1e331f7542 agent: refactor process IO processing
Move closing IO into process.rs and use macro
to reduce codes.

Fixes: #2944

Signed-off-by: bin <bin@hyper.sh>
2021-11-02 15:49:11 +08:00
wangyongchao.bj
9d3ec58370 runtime: make sure the "Shutdown" trace span have a correct end
We only added span.End() in the main process of the shim2 Shutdown method.
The "Shutdown" span would keep alive, when the containers number is not 0.
This PR make sure the "Shutdown" trace span have a correct end.

Fixes: #2930

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-11-02 14:24:31 +08:00
Fupan Li
1c81d7e0b6 Merge pull request #2915 from jodh-intel/agent-ctl-handle-hybrid-vsock
agent-ctl: Update for Hybrid VSOCK
2021-11-02 09:55:16 +08:00
GabyCT
3bc25e684e Merge pull request #2631 from Bevisy/main-2630
docs: Fix outdated links
2021-11-01 11:22:45 -06:00
Wainer Moschetta
415f5a9a67 Merge pull request #2935 from wainersm/fix_install_libseccomp-1
ci/install_libseccomp: Fix libseccomp build and misc improvement
2021-11-01 12:04:46 -03:00
Jianyong Wu
e15c8460db Merge pull request #2265 from rapiz1/simple-ro-mount
virtcontainers: simplify read-only mount handling
2021-11-01 10:43:16 +08:00
Bin Liu
51e9038ad5 Merge pull request #1998 from liubin/1997/add-fastfail-test
runtime: add fast-test to let test exit on error
2021-10-30 15:38:27 +08:00
bin
3f21af9c5c runtime: add fast-test to let test exit on error
Add -failfast option to let test exit on error, but -failfast option
can't cross package, so there is a for loop used to test on all packages
in src/runtime, and the parallel number is set to 1, this may lead test
to be slow.

Fixes: #1997

Signed-off-by: bin <bin@hyper.sh>
2021-10-30 11:09:54 +08:00
GabyCT
c8553ea427 Merge pull request #2046 from littlejawa/issue_2042
test: Fix random failure for TestIoCopy
2021-10-29 17:29:31 -05:00
GabyCT
969b78b01f Merge pull request #2496 from rapiz1/show-guest-protection
cli: Show available guest protection in env output
2021-10-29 17:28:47 -05:00
GabyCT
39ab5f4bea Merge pull request #2435 from fidencio/wip/update-k8s-and-crio-to-1.22
Update k8s, critools, and CRI-O to their 1.22 release
2021-10-29 17:27:51 -05:00
GabyCT
e009b58c93 Merge pull request #2629 from Kvasscn/kata_dev_kbuild
package: assign proper value to redefined_string  in build-kernel.sh
2021-10-29 17:26:40 -05:00
GabyCT
7b406d5561 Merge pull request #2037 from c3d/issue/2036-is-not-exist
agent: Make wording of error message match CRI-O test suite
2021-10-29 17:25:06 -05:00
Wainer dos Santos Moschetta
9b270d72d1 ci/install_libseccomp: use a temporary work directory
It is safer to download the tarballs and work on a temporary directory
which can be proper cleaned up when the script finishes.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-10-29 13:00:27 -03:00
Wainer dos Santos Moschetta
98b4406196 ci/install_libseccomp: Fix fail when DESTDIR is set
If DESTDIR is set on the environment then gperf will be installed
in an unexpected directory, resulting on the libseccomp's configure
not being able to find it. To avoid that issue this changed the
ci/install_libseccomp.sh so that PREFIX and DESTDIR are unset
inside the script.

Fixes #2932
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-10-29 12:58:09 -03:00
Chelsea Mafrica
53a9f9460f Merge pull request #2383 from wzshiming/patch-1
docs: Moving from EOT to EOF
2021-10-29 08:44:52 -07:00
James O. D. Hunt
2551179e43 Merge pull request #2929 from YchauWang/vc-docs-api
virtcontainers: api: update the functions in the api.md docs
2021-10-29 16:01:31 +01:00
Julio Montes
2751a13bbd Merge pull request #2923 from ManaSugi/add-libseccomp-info
release: Upload libseccomp sources with notice to release page
2021-10-29 09:44:24 -05:00
James O. D. Hunt
4e2dd41eb6 Merge pull request #1791 from wainersm/virtcontainers-1
virtcontainers: check that both initrd and image are not set
2021-10-29 14:51:07 +01:00
wangyongchao.bj
338ac87516 virtcontainers: api: update the functions in the api.md docs
Virtcontainers API document functions weren't sync with the codes Sandbox and VCImpl.
And we have two functions named `CreateSandbox` functions, diff by one parameter,
very confused. So this pr sync the codes to api documents.

Fixes: #2928

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-10-29 15:36:53 +08:00
Bin Liu
71b69c36d5 Merge pull request #2917 from sameo/topic/agent-config-sample
agent: Fix the configuration sample file
2021-10-29 11:51:58 +08:00
Bin Liu
eb248b0c66 Merge pull request #2750 from liubin/fix/2749-remove-fixme
runtime: set tags for trace span
2021-10-29 11:42:49 +08:00
Manabu Sugimoto
23496f94be release: Upload libseccomp sources with notice to release page
The `kata-agent` binaries inside the Kata Containers images provided
with release are statically linked with the GNU LGPL-2.1 licensed
libseccomp library by default.
Therefore, we attach the complete source code of the libseccomp
to the release page in order to comply with the LGPL-2.1 (6(a)).
In addition, we add the description about the libseccomp license
to the release page.

Fixes: #2922

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-29 12:38:14 +09:00
Bin Liu
00a20c840b Merge pull request #2716 from liudalibj/linux_oci_spec
agent-ctl: Implement Linux OCI spec handling
2021-10-29 10:53:04 +08:00
GabyCT
29f5ff5304 Merge pull request #2925 from GabyCT/topic/fixclhconfig
runtime: Remove comments about unsupported features in config for clh
2021-10-28 14:42:52 -05:00
Gabriela Cervantes
e610fc82ff runtime: Remove comments about unsupported features in config for clh
Cloud hypervisor is only supporting virtio-blk, this PR removes comments
that make a wrong reference of other features that are not supported
by clh.

Fixes #2924

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-10-28 15:14:49 +00:00
Julio Montes
caa6e19b5d Merge pull request #2919 from dgibson/viommu
tools/packaging: Add options for VFIO to guest kernel
2021-10-28 08:23:55 -05:00
James O. D. Hunt
7e401952f8 agent-ctl: Add stub for AddSwap API
Add a basic implementation for the `AddSwap` agent API call.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-28 09:22:52 +01:00
James O. D. Hunt
82de838e5f agent-ctl: Update for Hybrid VSOCK
Allow the `agent-ctl` tool to connect to a Hybrid VSOCK hypervisor such
as Cloud Hypervisor or Firecracker.

Fixes: #2914.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-28 09:22:35 +01:00
James O. D. Hunt
d1bcf105ff forwarder: Remove quotes from socket path in doc
Update the trace forwarder README to remove the quotes around the socket
path, which makes manipulating that path easier.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-28 09:20:38 +01:00
Yujia Qiao
e66d0473be virtcontainers: simplify read-only mount handling
Current handling of read-only mounts is a little tricky.
However, a clearer solution can be used here:
  1. make a private ro bind mount at privateDest to the mount source
  2. make a bind mount at mountDest to the mount created in step 1
  3. umount the private bind mount created in step 1
One important aspect is that the mount in step 2 is duplicated from
the one we created in step 1. So the MS_RDONLY flag is properly
preserved in all mounts created in the propagtion.

Fixes: #2205

Depends-on: github.com/kata-containers/tests#4106

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-10-28 15:48:41 +08:00
David Gibson
bdf4824145 tools/packaging: Add options for VFIO to guest kernel
Pull #2795 recently added support for a closer-to-OCI behaviour for
VFIO devices, in which they appear to the container as VFIO devices,
rather than being interpreted by the guest kernel.  However, in order
to use this, the Kata guest kernel needs to include the VFIO PCI
driver, along with dependencies like the Intel IOMMU driver.

The kernel as built by the scripts within Kata don't currently include
those, so this patch adds them.

fixes #2913

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-28 11:38:51 +11:00
Da Li Liu
c509a204f3 agent-ctl: Implement Linux OCI spec handling
- convert linux field from oci spec to grpc spec
- include all the fields below linux oci spec

Fixes: #2715

Signed-off-by: Da Li Liu <liudali@cn.ibm.com>
2021-10-27 13:41:57 +00:00
Bin Liu
b85edbfa00 Merge pull request #1788 from ManaSugi/add-seccomp-feature
agent/runtime: Add seccomp feature
2021-10-27 21:00:04 +08:00
Manabu Sugimoto
42add7f201 agent: Disable seccomp feature on aarch64 temporarily
In order to pass CI test of aarch64, it is necessary to run
`ci/install_libseccomp.sh` before ruuning unit tests in
`jenkins_job_build.sh`.
However, `ci/install_libseccomp.sh` is not available
until PR #1788 including this commit is merged in the mainline.
Therefore, we disable seccomp feature on aarch64 temporarily.
After #1788 lands and CI is fixed, this commit will be reverted.

Fixes: #1476

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-27 19:06:13 +09:00
Manabu Sugimoto
5dfedc2b19 docs: Add explanation about seccomp
This adds explanation about how to enable seccomp in the kata-runtime and
build the kata-agent with seccomp capability.

Fixes: #1476

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-27 19:06:13 +09:00
Manabu Sugimoto
45e7c2cab1 static-checks: Add step for installing libseccomp
This adds a step for installing libseccomp because the kata-agent
supports seccomp feature.

Fixes: #1476

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-27 19:06:13 +09:00
Manabu Sugimoto
a3647e3486 osbuilder: Set up libseccomp library
The osbuilder needs to set up libseccomp library to build the kata-agent
because the kata-agent supports seccomp currently.
The library is built from the sources to create a static library for musl libc.
In addition, environment variables for the libseccomp crate are set to
link the library statically.

Fixes: #1476

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-27 19:06:13 +09:00
Manabu Sugimoto
3be50adab9 agent: Add support for Seccomp
The kata-agent supports seccomp feature based on the OCI runtime specification.
This seccomp capability in the kata-agent is enabled by default.
However, it is not enforced by default: users need to enable that by setting
`disable_guest_seccomp` to `false` in the main configuration file.

Fixes: #1476

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-27 19:06:13 +09:00
James O. D. Hunt
4d4a15d6ce Merge pull request #2057 from wainersm/fix_kata-deploy-ci
ci: test-kata-deploy: Get rid of slash-command-action action
2021-10-27 10:08:12 +01:00
Peng Tao
03a9411884 Merge pull request #2878 from eadamsintel/update-qat-dockerfile
This is to bump the OOT QAT 1.7 driver version to the latest version.…
2021-10-27 17:00:04 +08:00
Samuel Ortiz
4280415149 agent: Fix the configuration sample file
All endpoint names share the `Request` suffix.
Also, the current list is based on functions, not requests.

Fixes #2916

Reported-by: Jakob Naucke <jakob.naucke@ibm.com>
Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-27 06:02:33 +02:00
Bo Chen
bf5f42d411 Merge pull request #2906 from jodh-intel/trace-forwarder-drop-privs
forwarder: Drop privileges when using hybrid VSOCK
2021-10-26 13:24:01 -07:00
Chelsea Mafrica
8f33e6f593 Merge pull request #2896 from Jakob-Naucke/static
packaging/static-build: s390x fixes
2021-10-26 11:53:34 -07:00
Wainer dos Santos Moschetta
b0bc71f463 ci: test-kata-deploy: Get rid of slash-command-action action
There is a problem with slash-command-action which is on absence of a slash command
the job fails (instead of simply ignore, i.e., skip). This is documented on
https://github.com/xt0rted/slash-command-action/issues/124. There is a workaround
also documented on that issue, but here instead let's get rid of the action.

In this new implementation all comments sent to the pull request are parsed, if any
starts with "/test_kata-deploy" then the job is triggered.

Fixes #2836
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-10-26 11:36:13 -04:00
Wainer dos Santos Moschetta
309dae631a virtcontainers: check that both initrd and image are not set
This changed valid() in hypervisor to check the case where both
initrd and image path are set; in this case it returns an error.

Fixes #1868
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-10-26 10:44:23 -04:00
James O. D. Hunt
3120b489e3 Merge pull request #2687 from genjuro214/improve-oci-to-grpc
agent-ctl: improve the oci_to_grpc code
2021-10-26 13:00:02 +01:00
James O. D. Hunt
a10cfffdff forwarder: Fix changing log level
Fix `-l <log-level>` for the trace forwarder which didn't work
previously as it lacked the magic Cargo configuration.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-26 11:02:06 +01:00
James O. D. Hunt
6abccb92ce forwarder: Drop privileges when using hybrid VSOCK
Hybrid VSOCK requires `root` privileges to access the sandbox-specific
host-side AF_UNIX socket created by the hypervisor (CLH or FC). However,
once the socket has been bound, privileges can be dropped, allowing the
forwarder to run as user `nobody`.

Fixes: #2905.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-26 11:01:58 +01:00
Bin Liu
8d8604e10f Merge pull request #2893 from liubin/fix/2892-print-error-instead-of-return
agent: do not return error but print it if task wait failed
2021-10-26 17:48:17 +08:00
Lei Li
bf00b8df87 agent-ctl: improve the oci_to_grpc code
The oci_to_grpc function just handles part of oci fields,
and others are not copied from oci spec to grpc spec,
such as process.env, process.capabilities, mounts and so on.
Try to implement more handlings to convert thoses fields.

Fixes #2686

Signed-off-by: Lei Li <cdlleili@cn.ibm.com>
2021-10-26 16:54:28 +08:00
James O. D. Hunt
b67fa9e450 forwarder: Make explicit root check
Rather than generating a potentially misleading error message if the
socket bind fails, perform an explicit check for `root` for Hybrid
VSOCK.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-26 09:28:26 +01:00
James O. D. Hunt
e377578e08 forwarder: Fix docs socket path
Updated the trace forwarder README to ensure the real socket path is
created, not the template socket path returned by `kata-runtime env`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-26 09:28:26 +01:00
James O. D. Hunt
d1d9e84e9f Merge pull request #2902 from liubin/fix/2901-delete-duplicated-line
virtcontainers: delete duplicated notify in watchHypervisor function
2021-10-26 08:22:11 +01:00
bin
5f306330f4 virtcontainers: delete duplicated notify in watchHypervisor function
When hypervisor check failed, the notify function is called twice.

Fixes: #2901

Signed-off-by: bin <bin@hyper.sh>
2021-10-26 11:58:26 +08:00
bin
5f5eca6b8e agent: do not return error but print it if task wait failed
Do not return error but print it if task wait failed
and let program continue to run the next code.

Fixes: #2892

Signed-off-by: bin <bin@hyper.sh>
2021-10-26 11:43:39 +08:00
Jakob Naucke
d2a7b6ff4a packaging/static-build: s390x fixes
- Install OpenSSL for key generation in kernel build
- Do not install libpmem
- Do not exclude `*/share/*/*.img` files in QEMU tarball since among
  them are boot loader files critical for IPLing.

Fixes: #2895
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-25 18:47:35 +02:00
Yujia Qiao
6cc8000cae cli: Show available guest protection in env output
Show available guest protections in the
`kata-runtime env` output. Also bump the formatVersion.

Fixes: #1982

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-10-25 21:44:56 +08:00
Yujia Qiao
2063b13805 virtcontainers: Add func AvailableGuestProtections
Add functions to return guestProtection as a string slice, which
can be then used in `kata-runtime env` output.

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-10-25 21:44:01 +08:00
Fupan Li
3d0fe433c6 Merge pull request #2889 from lht/handle-uevent-remove-actions
agent: Handle uevent remove actions
2021-10-25 19:08:20 +08:00
James O. D. Hunt
ec3aa1694b Merge pull request #2844 from jongwu/unit_test
enable unit test on arm
2021-10-25 10:58:21 +01:00
Bin Liu
01fdeb7641 Merge pull request #2891 from ManaSugi/fix/unify-form
rustjail: Consistent coding style of LinuxDevice type
2021-10-25 14:03:03 +08:00
Bin Liu
ded864f862 Merge pull request #2568 from Bevisy/main-2254
cli: Fix outdated kata-runtime bash completion
2021-10-25 14:02:13 +08:00
Haitao Li
a13e2f77b8 agent: Handle uevent remove actions
uevents with action=remove was ignored causing the agent to reuse stale
data in the device map. This patch adds handling of such uevents.

Fixes #2405

Signed-off-by: Haitao Li <lihaitao@gmail.com>
2021-10-25 14:41:32 +11:00
David Gibson
a0825badf6 Merge pull request #2795 from dgibson/vfio-as-vfio
Allow VFIO devices to be used as VFIO devices in the container
2021-10-25 14:25:26 +11:00
Peng Tao
e709f11229 Merge pull request #2881 from mcastelino/topic/hypervisor-rename
Expose top level hypervisor methods -
2021-10-25 10:25:49 +08:00
David Gibson
34273da98f runtime/device: Allow VFIO devices to be presented to guest as VFIO devices
On a conventional (e.g. runc) container, passing in a VFIO group device,
/dev/vfio/NN, will result in the same VFIO group device being available
within the container.

With Kata, however, the VFIO device will be bound to the guest kernel's
driver (if it has one), possibly appearing as some other device (or a
network interface) within the guest.

This add a new `vfio_mode` option to alter this.  If set to "vfio" it will
instruct the agent to remap VFIO devices to the VFIO driver within the
guest as well, meaning they will appear as VFIO devices within the
container.

Unlike a runc container, the VFIO devices will have different names to the
host, since the names correspond to the IOMMU groups of the guest and those
can't be remapped with namespaces.

For now we keep 'guest-kernel' as the value in the default configuration
files, to maintain current Kata behaviour.  In future we should change this
to 'vfio' as the default.  That will make Kata's default behaviour more
closely resemble OCI specified behaviour.

fixes #693

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:29:31 +11:00
David Gibson
68696e051d runtime: Add parameter to constrainGRPCSpec to control VFIO handling
Currently constrainGRPCSpec always removes VFIO devices from the OCI
container spec which will be used for the inner container.  For
upcoming support for VFIO devices in DPDK usecases we'll need to not
do that.

As a preliminary to that, add an extra parameter to the function to
control whether or not it will remove the VFIO devices from the spec.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:29:31 +11:00
David Gibson
d9e2e9edb2 runtime: Rename constraintGRPCSpec to improve grammar
"constraint" is a noun, "constrain" is the associated verb, which makes
more sense in this context.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:29:31 +11:00
David Gibson
57ab408576 runtime: Introduce "vfio_mode" config variable and annotation
In order to support DPDK workloads, we need to change the way VFIO devices
will be handled in Kata containers.  However, the current method, although
it is not remotely OCI compliant has real uses.  Therefore, introduce a new
runtime configuration field "vfio_mode" to control how VFIO devices will be
presented to the container.

We also add a new sandbox annotation -
io.katacontainers.config.runtime.vfio_mode - to override this on a
per-sandbox basis.

For now, the only allowed value is "guest-kernel" which refers to the
current behaviour where VFIO devices added to the container will be bound
to whatever driver in the VM kernel claims them.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:29:29 +11:00
David Gibson
730b9c433f agent/device: Create device nodes for VFIO devices
Add and adjust the vfio devices in the inner container spec so that
rustjail will create device nodes for them.

In order to do that, we also need to make sure the VFIO device node is
ready within the guest VM first.  That may take (slightly) longer than
just the underlying PCI device(s) being ready, because vfio-pci needs
to initialize.  So, add a helper function that will wait for a
specific VFIO device node to be ready, using the existing uevent
listening mechanism.  It also returns the device node name for the
device (though in practice it will always /dev/vfio/NN where NN is the
group number).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
175f9b06e9 rustjail: Allow container devices in subdirectories
Many device nodes go directly under /dev, however some are conventionally
placed in subdirectories under /dev.  For example /dev/vfio/vfio or
/dev/pts/ptmx.

Currently, attempting to pass such a device into a Kata container will fail
because mknod() will get an ENOENT because the parent directory is missing
(or an equivalent error for bind_dev()).

Correct that by making subdirectories as necessary in create_devices().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
9891efc61f rustjail: Correct sanity checks on device path
For each user supplied device, create_devices() checks that the given path
actually is in /dev, by checking that its path starts with /dev and does
not contain "..".

However, this has subtle errors because it's interpreting the path as a raw
string without considering separators.  It will accept the path /devfoo
which it should not, while it will not accept the valid (though weird)
paths /dev/... and /dev/a..b.

Correct this by using std::path::Path methods designed for the purpose.
Having done this, it's trivial to also generate the relative path that
mknod_dev() or bind_dev() will need, so do that at the same time.

We also move this logic into a helper function so that we can add some unit
tests for it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
d6b62c029e rustjail: Change mknod_dev() and bind_dev() to take relative device path
Both these functions take the absolute path from LinuxDevice and drop the
leading '/' to make a relative path.  They do that with a simple
&dev.path[1..].  That can be technically incorrect in some edge cases such
as a path with redundant /s like "//dev//sda".

To handle cases like that, have the explicit relative path passed into
these functions.  For now we calculate it in the same buggy way, but we'll
fix that shortly.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
2680c0bfee rustjail: Provide useful context on device node creation errors
create_devices() within the rustjail module is responsible for creating
device nodes within the (inner) containers.  Errors that occur here will
be propagated up, but are likely to be low level failures of mknod() - e.g.
ENOENT or EACCESS - which won't be very useful without context when
reported all the way up to the runtime without the context of what we were
trying to do.

Add some anyhow context information giving the details of the device we
were trying to create when it failed.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
42b92b2b05 agent/device: Allow container devname to differ from the host
Currently, update_spec_device() assumes that the proper device path in the
(inner) container is the same as the device path specified in the outer OCI
spec on the host.

Usually that's correct.  However for VFIO group devices we actually need
the container to see the VM's device path, since it's normal to correlate
that with IOMMU group information from sysfs which will be different in the
guest and which we can't namespace away.

So, add an extra "final_path" parameter to update_spec_device() to allow
callers to chose the device path that should be used for the inner
container.  All current callers pass the same thing as container_path, but
that will change in future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
827a41f973 agent/device: Refactor update_spec_device_list()
update_spec_device_list() is used to update the container configuration to
change device major/minor numbers configured by the Kata client based on
host details to values suitable for the sandbox VM, which may differ.  It
takes a 'device' object, but the only things it actually uses from there
are container_path and vm_path.

Refactor this as update_spec_device(), taking the host and guest paths to
the device as explicit parameters.  This makes the function more
self-contained and will enable some future extensions.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
8ceadcc5a9 agent/device: Sanity check guest IOMMU groups
Each VFIO device passed into the guest could represent a whole IOMMU group
of devices on the host.  Since these devices aren't DMA isolated from each
other, they must appear as the same IOMMU group in the guest as well.

The VMM should enforce that for us, but double check it, since things can't
work otherwise.  This also means we determine the guest IOMMU group for the
VFIO device, which we'll be needing later.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
ff59db7534 agent/device: Add function to get IOMMU group for a PCI device
For upcoming VFIO extensions we'll need to work with the IOMMU groups of
VFIO devices.  This helps us towards that by adding pci_iommu_group() to
retrieve the IOMMU group (if any) of a given PCI device.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
13b06a35d5 agent/device: Rebind VFIO devices to VFIO driver inside guest
VFIO devices can be added to a Kata container and they will be passed
through to the sandbox guest.  However, inside the guest those devices
will bind to a native guest driver, so they will no longer appear as VFIO
devices within the guest.  This behaviour differs from runc or other
conventional container runtimes.

This code allows the agent to match the behaviour of other runtimes,
if instructed to by kata-runtime.  VFIO devices it's informed about
with the "vfio" type instead of the existing "vfio-gk" type will be
rebound to the vfio-pci driver within the guest.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
David Gibson
e22bd78249 agent/device: Add helper function for binding a guest device to a driver
For better VFIO support, we're going to need to take control of which guest
driver controls specific guest devices.  To assist with that, add the
pci_driver_override() function to force a specific guest device to be
bound to a specific guest driver.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-25 12:28:33 +11:00
Manabu Sugimoto
b40eedc9f7 rustjail: Consistent coding style of LinuxDevice type
Use `"c".to_string` in the device type of `dev/full`
in order to consistent with the coding style of other devices

Fixes: #2890

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-25 09:15:59 +09:00
Jianyong Wu
57c0f93f54 agent: fix race condition when test watcher
create_tmpfs won't pass as the race condition in watcher umount. quote
James's words here:

1. Rust runs all tests in parallel.
2. Mounts are a process-wide, not a per-thread resource.
The only test that calls watcher.mount() is create_tmpfs().
However, other tests create BindWatcher objects.
3. BindWatcher's drop() implementation calls self.cleanup(),
which calls unmount for the mountpoint create_tmpfs() asserts.
4. The other tests are calling unmount whenever a BindWatcher goes
out of scope.

To avoid that issue, let the tests using BindWatcher in watcher and
sandbox.rs run sequentially.

Fixes: #2809
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-24 17:31:53 +08:00
Jianyong Wu
1a96b8ba35 template: disable template unit test on arm
Template is broken on arm. here we disable the template unit test
temporarily.

Fixes: #2809
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-23 15:07:25 +08:00
Jianyong Wu
43b13a4a6d runtime: DefaultMaxVCPUs should not greater than defaultMaxQemuVCPUs
DefaultMaxVCPUs may be larger than the defaultMaxQemuVCPUs that should
be checked and avoided.

Fixes: #2809
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-23 15:07:25 +08:00
Jianyong Wu
c59c36732b runtime: current vcpu number should be limited
The physical current vcpu number should not be used directly as the
largest vcpu number is limited to defaultMaxQemuVCPUs.
Here, a new helper is introduced in pkg/katautils/config.go to get
current vcpu number.

Fixes: #2809
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-23 15:07:25 +08:00
Jianyong Wu
fa922517d9 runtime: kernel version with '+' as suffix panic in parse
The current kernel version parse lib can't process suffix '+', as the
modified kernel version will add '+' as suffix, thus panic will occur.

For example, if the current kernel version is "5.14.0-rc4+", test
TestHostNetworkingRequested will panic:
--- FAIL: TestHostNetworkingRequested (0.00s)
panic: &{DistroName:ubuntu DistroVersion:18.04
KernelVersion:5.11.0-rc3+ Issue: Passed:[] Failed:[] Debug:true
ActualEUID:0}: failed to check test constraints: error: Build meta data
is empty

Here, remove the suffix '+' in kernel version fix helper.

Fixes: #2809
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-23 15:07:25 +08:00
Manohar Castelino
52268d0ece hypervisor: Expose the hypervisor itself
Export the top level hypervisor type

s/hypervisor/Hypervisor

Fixes: #2880

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-10-22 16:46:02 -07:00
Eric Ernst
a72bed5b34 hypervisor: update tests based on createSandbox->CreateVM change
Fixup a couple of broken tests.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
f434bcbf6c hypervisor: createSandbox is CreateVM
Last of a series of commits to export the top level
hypervisor generic methods.

s/createSandbox/CreateVM

Fixes #2880

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
76f1ce9e30 hypervisor: startSandbox is StartVM
s/startSandbox/StartVM

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
fd24a695bf hypervisor: waitSandbox is waitVM
renaming...

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
a6385c8fde hypervisor: stopSandbox is StopVM
Renaming. There is no Sandbox specific logic except tracing.

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
f989078cd2 hypervisor: resumeSandbox is ResumeVM
renaming...

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
73b4f27c46 hypervisor: saveSandbox is SaveVM
rename

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
7308610c41 hypervisor: pauseSandbox is nothing but PauseVM
renaming

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
8f78e1cc19 hypervisor: The SandboxConsole is the VM's console
update naming

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
4d47aeef2e hypervisor: Export generic interface methods
This is in preparation for creating a seperate hypervisor package.
Non functional change.

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Manohar Castelino
6baf2586ee hypervisor: Minimal exports of generic hypervisor internal fields
Export commonly used hypervisor fields and utility functions.
These need to be exposed to allow the hypervisor to be consumed
externally.

Note: This does not change the hypervisor interface definition.
Those changes will be separate commits.

Signed-off-by: Manohar Castelino <mcastelino@apple.com>
2021-10-22 16:45:35 -07:00
Eric Adams
37fa453dd2 osbuilder: Update QAT driver in Dockerfile
This is to bump the OOT QAT 1.7 driver version to the
latest version. I dida test on my QAT enabled system and
everything functioned as expected.

Fixes: #2877

Signed-off-by: Eric Adams <eric.adams@intel.com>
2021-10-22 00:08:24 +00:00
GabyCT
03877f3479 Merge pull request #2872 from likebreath/1020/clh_v19.0
Upgrade to Cloud Hypervisor v19.0
2021-10-21 10:26:55 -05:00
James O. D. Hunt
8c8bcb7b00 Merge pull request #2810 from mythi/sgx-doc
docs: use-cases: Update Intel SGX use case
2021-10-21 12:28:29 +01:00
James O. D. Hunt
09741272bc Merge pull request #2783 from likebreath/1001/clh_enable_seccomp
virtcontainers: clh: Enable the `seccomp` feature
2021-10-21 09:21:33 +01:00
Bo Chen
8030b6caf0 virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor v19.0.
Note: The client code of cloud-hypervisor's (CLH) OpenAPI is
automatically generated by openapi-generator [1-2].

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-20 15:48:55 -07:00
Bo Chen
8296754e07 versions: Upgrade to Cloud Hypervisor v19.0
Highlights from the Cloud Hypervisor release v19.0: 1) Improved PTY
handling for serial and virtio-console; 2) PCI boot time optimisations;
3) Improved TDX support; 4) Live migration enhancements (support with
virtio-mem and virtio-balloon); 5) virtio-mem support with vfio-user; 6)
AArch64 for virtio-iommu; 7) Various bug fixes for live-migration and
VFIO passthrough.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v19.0

Fixes: #2871

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-20 15:39:53 -07:00
James O. D. Hunt
de45c783ca Merge pull request #2864 from liubin/fix/2791-delete-cri-containerd-from-versions
runtime: delete cri containerd plugin from versions.yaml
2021-10-20 13:21:34 +01:00
James O. D. Hunt
c1adb075ad Merge pull request #1937 from jodh-intel/add-tracing-docs
docs: Write tracing documentation
2021-10-20 10:14:46 +01:00
Binbin Zhang
2b13944964 docs: Fix outdated links
fix outdated links which were checked out by workflow/docs-url-alive-check

Fixes #2630

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-10-20 16:54:39 +08:00
Archana Shinde
6abc70725f Merge pull request #2523 from Bevisy/main-2295
runtime: delete useless src/runtime/cli/exit.go
2021-10-20 01:37:20 -07:00
Mikko Ylinen
4f75ccb903 docs: use-cases: Update Intel SGX use case
The upstream kernel SGX support has changed drastically since
the initial version of the Intel SGX use case doc was written.

The updated use case documents how to easily setup SGX with
Kata Containers running in a Kubernetes cluster.

Fixes: #2811
Depends-on: github.com/kata-containers/tests#4079

Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
Co-authored-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-20 09:20:57 +03:00
Binbin Zhang
4f018b5287 runtime: delete useless src/runtime/cli/exit.go
simply use os.Exit() replace exit()
delete useless ci/go-no-os-exit.sh;

Fixes: #2295

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-10-20 11:42:37 +08:00
Shiming Zhang
7a80aeb0b8 docs: Moving from EOT to EOF
Only this uses EOT, the others are EOF, uniformly changed to EOF to
avoid confusion

Fixes: #2550

Signed-off-by: Shiming Zhang <wzshiming@foxmail.com>
2021-10-20 01:27:23 +08:00
James O. D. Hunt
09a5e03f4a docs: Write tracing documentation
Add documentation explaining how to trace the runtime and agent.

Fixes: #1892.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-19 17:33:01 +01:00
Julio Montes
448fe0a5ed Merge pull request #2853 from devimc/2021-10-15/snap/clh+expKernel
snap: add cloud-hypervisor and experimental kernel
2021-10-19 11:19:11 -05:00
bin
b625f62d4b runtime: delete cri containerd plugin from versions.yaml
Delete cri containerd plugin from versions.yaml.

Releated to:
- https://github.com/kata-containers/tests/issues/4061
- https://github.com/kata-containers/kata-containers/issues/2791

Fixes: #2791

Signed-off-by: bin <bin@hyper.sh>
2021-10-19 21:36:08 +08:00
Julio Montes
24fff57c23 snap: make curl commands consistent
remove -k and -Ssf from curl commands

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-10-19 08:36:06 -05:00
Julio Montes
2b9f79cfc9 snap: add cloud-hypervisor and experimental kernel
Add cloud-hypervisor and experimental kernel as part of the kata snap

fixes #2852

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-10-19 08:35:06 -05:00
James O. D. Hunt
9db56ffd85 Merge pull request #2863 from wainersm/osbuilder_dracut_rust
osbuilder: Call detect_rust_version() right before install_rust.sh
2021-10-19 11:48:39 +01:00
Archana Shinde
1ba069b303 Merge pull request #2860 from davidhay1969/update_developer_guide
docs: Updating Developer Guide re qemu-img
2021-10-19 01:40:52 -07:00
Bin Liu
29234c6d45 Merge pull request #2859 from ManaSugi/add-libseccomp-dep
versions: Add libseccomp and gperf version
2021-10-19 13:05:00 +08:00
Chelsea Mafrica
4ce2b14e60 Merge pull request #2817 from jodh-intel/clh+fc-agent-tracing
Enable agent tracing for hybrid VSOCK hypervisors
2021-10-18 22:01:52 -07:00
Bin Liu
72d1a04cf1 Merge pull request #2761 from liubin/fix/2752-optimize-test-code
runtime: optimize test code
2021-10-19 12:21:04 +08:00
Bin Liu
78d3f319e2 Merge pull request #2792 from liubin/fix/2791-remove-cri-containerd-from-source
runtime: use containerd package instead of cri-containerd
2021-10-19 10:39:25 +08:00
bin
273a1a9ac6 runtime: optimize test code
This PR includes these optimize changes:

- Remove the dependency on the container engine.
  The old code uses runc to generate config.json and
  Docker to export rootfs, that will be heavy and need
  additional dependency.
  Using a fixed config for busybox image can avoid
  the heavy processing above.

- Moved duplicate code to pkg/katatestutils package

Fixes: #2752

Signed-off-by: bin <bin@hyper.sh>
2021-10-19 09:54:49 +08:00
bin
76f16fd1a7 runtime: use containerd package instead of cri-containerd
cri-containerd project has been merged into containerd repo, and
we should not reference it any more in code and docs.

This commit will use containerd package instead of cri-containerd
package.

Fixes: #2791

Signed-off-by: bin <bin@hyper.sh>
2021-10-19 09:40:20 +08:00
bin
6d55b1bafa docs: use containerd to replace cri-containerd
cri-containerd plugin is deprecated, use containerd instead.

Fixes: #2791

Signed-off-by: bin <bin@hyper.sh>
2021-10-19 09:38:56 +08:00
bin
ed02bc9041 packaging: add containerd to versions.yaml
This commit will add containerd to versions.yaml.

Please at now there are both containerd and cri-containerd
in the versions.yaml.

After updating of kata-containers/tests repo, the cri-containerd
should be removed.

Fixes: #2791

Signed-off-by: bin <bin@hyper.sh>
2021-10-19 09:38:56 +08:00
Wainer dos Santos Moschetta
50da26d3e6 osbuilder: Call detect_rust_version() right before install_rust.sh
When building with dracut method the build_rootfs_distro() is not called, in turn
detect_rust_version() isn't either, so the install_rust.sh script is gave a null
rust version. This changed the script to call detect_rust_version() right before
install_rust.sh.

Related to commit: f34f67d610
Fixes #2862
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-10-18 16:54:53 -04:00
James O. D. Hunt
41c49a7bf5 Merge pull request #2771 from fengwang666/debug-pid
runtime: update sandbox root dir cleanup behavior in rootless hypervisor
2021-10-18 17:47:47 +01:00
Dave Hay
b4fadc9456 docs: Updating Developer Guide re qemu-img
Adding notes re `qemu-img` dependency for non-Docker builds of image

Fixes #2477

Signed-off-by: Dave Hay <david_hay@uk.ibm.com>
2021-10-18 15:39:15 +01:00
Manabu Sugimoto
b8e69ce5bd versions: Add libseccomp and gperf version
Add `libseccomp` and `gperf` version information to support
for seccomp feature in Kata agent: #1788.

Fixes: #2858

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-10-18 23:21:02 +09:00
James O. D. Hunt
d0e5e55e55 Merge pull request #2569 from Bevisy/main-2188
utils: kata-manager: Update kata-manager.sh for new containerd config
2021-10-18 14:50:16 +01:00
Julien Ropé
17a8c5c685 runtime: Fix random failure for TestIoCopy
When running the TestIoCopy test, on some occasions, the test
runs too quick, and closes the stdin pipe before the ioCopy()
routine start to read from it. This causes a SIGSEGV error.

To fix this issue, I am adding additional read/write tests before
closing the pipes. As the read operation waits for the writer to
be done, this actually synchronizes the threads and make sure
the final tests (with closed pipes) works as expected.

Fixes: #2042

Signed-off-by: Julien Ropé <jrope@redhat.com>
2021-10-18 15:25:57 +02:00
James O. D. Hunt
f16a99603c Merge pull request #2399 from Jakob-Naucke/container-osbuilder-respin
osbuilder: Re-enable building the agent in Docker
2021-10-18 12:06:37 +01:00
Bin Liu
1cb38ecbe7 Merge pull request #2843 from zhaojizhuang/fixroute
agent: Do not fail when trying to adding existing routes
2021-10-18 15:52:29 +08:00
Bin Liu
c2be2dfb61 Merge pull request #2848 from c3d/bug/2847-tag-typo
tracing: Fix typo in "package" tag name
2021-10-18 14:50:47 +08:00
Fabiano Fidêncio
681b80473f Merge pull request #2846 from fidencio/wip/kata-deploy-add-dockerignore-file
kata-deploy: add .dockerignore file
2021-10-16 10:39:07 +02:00
Chelsea Mafrica
6ffe9e5afe Merge pull request #2816 from cmaf/add-var-name-kata
runtime: change name in config settings back to "kata"
2021-10-15 14:09:41 -07:00
Jakob Naucke
f34f67d610 osbuilder: Specify version when installing Rust
and update the script in `ci/` accordingly.
When only parts of the Kata Containers repositories are checked out
(e.g. when building with Snap) and no Rust version is provided in
calling `install_rust.sh`, the scripts will attempt to clone the
appropriate repos to read the version, which will fail because the
directories already exist. Since we have read the version already, we
can just specify it.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-15 16:27:40 +02:00
Jakob Naucke
135a0802c5 osbuilder: Pass CI env to container agent build
The agent build inside a Docker or Podman container has been re-enabled,
but we have since introduced the `$CI` environment variable. Pass it to
avoid checking out the tests repo to main when there is a dependency.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-15 16:27:39 +02:00
Jakob Naucke
eb5dd76e9d osbuilder: Re-enable building the agent in Docker
or Podman. This is a partial revert of
76c18aa345. The rationale behind that
commit was the fact that the agent could not be built on Alpine, and
then this capability was removed altogether. The issue in Alpine has
since been resolved (see
https://github.com/kata-containers/osbuilder/issues/386). At the same
time, this ensures being able to run a glibc agent on hosts with distros
more recent than the osbuilder distro used (i.e. as of now, when you
build the agent on the host, and its glibc is newer than the one used in
the guest, the agent may encounter unresolved symbols).

Fixes #2398
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-15 16:27:37 +02:00
Christophe de Dinechin
bcffa26305 tracing: Fix typo in "package" tag name
The tracing tags for api.go contain `"packages"` as a tag name,
whereas all other tags contain `"package"`.

Fixes: #2847

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-10-15 14:48:00 +02:00
James O. D. Hunt
e61f5e2931 runtime: Show socket path in kata-env output
Display a pseudo path to the sandbox socket in the output of
`kata-runtime env` for those hypervisors that use Hybrid VSOCK.

The path is not a real path since the command does not create a sandbox.
The output includes a `{ID}` tag which would be replaced with the real
sandbox ID (name) when the sandbox was created.

This feature is only useful for agent tracing with the trace forwarder
where the configured hypervisor uses Hybrid VSOCK.

Note that the features required a new `setConfig()` method to be added
to the `hypervisor` interface. This isn't normally needed as the
specified hypervisor configuration passed to `setConfig()` is also
passed to `createSandbox()`. However the new call is required by
`kata-runtime env` to display the correct socket path for Firecracker.
The new method isn't wholly redundant for the main code path though as
it's now used by each hypervisor's `createSandbox()` call.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-15 11:45:29 +01:00
James O. D. Hunt
5b3a349db5 trace-forwarder: Support Hybrid VSOCK
Add support for Hybrid VSOCK. Unlike standard vsock (`vsock(7)`), under
hybrid VSOCK, the hypervisor creates a "master" *UNIX* socket on the
host. For guest-initiated VSOCK connections (such as the Kata agent uses
for agent tracing), the hypervisor will then attempt to open a VSOCK
port-specific variant of the socket which it expects a server to be
listening on. Running the trace forwarder with the new `--socket-path`
option and passing it the Hypervisor specific master UNIX socket path,
the trace forwarder will listen on the VSOCK port-specific socket path
to handle Kata agent traces.

For further details and examples, see the README or run the
trace forwarder with `--help`.

Fixes: #2786.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-15 11:45:29 +01:00
James O. D. Hunt
baf4784a29 Merge pull request #2353 from jodh-intel/rm-trace-type-and-mode
tracing: Remove trace mode and trace type
2021-10-15 11:44:44 +01:00
Fabiano Fidêncio
e42bc05c8a kata-deploy: add .dockerignore file
.dockerignore file is similar to .gitignore and serves the purpose to
simply ignore paths in the build context.

For now, let me just use it to fix the following problem:
```
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz .
error checking context: 'no permission to read from
'(...)/local-build/build/firecracker/builddir/firecracker/(...)/crc64-1.0.0/.gitignore''.
```

Fixes: #2845

Signed-off-by: Fabiano Fidêncio <fabiano@fidencio.org>
2021-10-15 12:00:14 +02:00
James O. D. Hunt
321be0f794 tracing: Remove trace mode and trace type
Remove the `trace_mode` and `trace_type` agent tracing options as
decided in the Architecture Committee meeting.

See:

- https://github.com/kata-containers/kata-containers/pull/2062

Fixes: #2352.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-10-15 10:09:38 +01:00
zhaojizhuang
7d0b616cf3 agent: Do not fail when trying to adding existing routes
Adding a route that already exists should not be a reason for the agent to fail
booting and thus preventing the sandbox to start.

Fixes #2712

Signed-off-by: zhaojizhuang <571130360@qq.com>
2021-10-14 18:38:26 +02:00
Fabiano Fidêncio
119edcc443 Merge pull request #2837 from fidencio/2.3.0-alpha2-branch-bump
# Kata Containers 2.3.0-alpha2
2021-10-14 09:52:37 +02:00
Fabiano Fidêncio
8873ddab9e release: Kata Containers 2.3.0-alpha2
- kata-monitor: add index page
- clh: Refine the usage of guest console and kernel parameters with Cloud Hypervisor
- agent: exec should inherit container process capabilities
- GitHubActions: fix invalid format of require-pr-porting-labels.yaml
- agent: flush root span before process finish
- Extend PCI submodules to represent non-zero functions and addresses
- packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
- runtime: don't start shim management server in tests
- qemu: use GitLab repos instead of qemu.org
- runtime: optimize code for managing temp users for rootless mode
- Agent configuration file and API restriction
- Delete file virtcontainers-setup.sh
- vendor: Update containerd to v1.5.7
- runtime: Optimize func noNeedForOutput and add test cases
- runtime: Fix !x86 static checks
- #2676: fixing centos gpg key url for ppc64le
- Pass the host route IP family to the guest
- cmd: get return value for setCPUtype
- packaging: Configure QEMU with --enable-pie
- clh: Enable guest userland output
- cmd: Fix mismatched types in testModuleData
- runtime: update .gitignore to ignore monitor_address file
- runtime: fix the make check-go-static command error
- virtcontainers: clean up useless code
- Remove forced PCI rescans from agent
- kernel: Enable SGX in experimental kernel.
- runtime: fix nil reference in cleanup rootless user
- qemu: prepare to upgrade qemu version to 6.1.0 for arm
- kata-monitor (minor) improvements
- virtcontainers: Fix incorrect scripts path
- runtime: clear virtcontainers cgroup duplicated function
- Kata monitor: cache improvements
- virtiofs: fix error report in TestVirtiofsdStart when go test running

176dee6f agent: exec should inherit container process capabilities
7b2bfd4e virtcontainers: clh: Use 'quiet' as the default kernel parameter
3e24e46c virtcontainers: clh: Turn-off serial and virtio-console by default
2d7b65e8 agent: flush root span before process finish
5c77cc2c runtime: don't start shim management server in tests
72044180 agent/device: Return PCI address from wait_for_pci_device()
e50b05d9 agent/pci: Add type to represent PCI addresses
8528157b agent/pci:  Extend Slot type to represent PCI function as well
bf8f582c runtime: optimize code for managing temp users for rootless mode
a9c2a4ba GitHubActions: fix invalid format of require-pr-porting-labels.yaml
c4236cb2 packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
08360c98 agent: Add an agent configutation file example
8a4e69d2 agent: rpc: Return UNIMPLEMENTED for not allowed endpoints
0ea2e3af agent: config: Allow for building the configuration from a file
63539dc9 agent: config: Add allowed endpoints
a953fea3 agent: config: Simplify configuration creation
b888edc2 agent: config: Implement Default
7eac2ec7 protection: add confidential compute frame for arm
8acfc154 check: fix typecheck failure in qemu_arm64_test.go
5b02d54e virtcontainers: fix lint failure on ppc64le
ff9728f0 virtcontainers: nolint guestProtection
5c138c8f runtime: Fix field alignment on s390x
191d0016 vendor: Update containerd to v1.5.7
f7f6bd01 kata-monitor: add index page
a44cde7e agent: netlink: Use the grpc IP family field when updating the route
71ce6cfe runtime: Pass the route IP family to the agent
99450bd1 agent: protos: Add a Family field to the Route payload
f85fe702 runtime: vendor: Bump the netlink package dependency
e439cec7 cmd: fix field alignment on ppc64le
e5159ea7 cmd: get return value for setCPUtype
2ce8d426 clh: Suppress hypervisor output to make guest output visible
cd1064b1 packaging: Configure QEMU with --enable-pie
762922a5 runtime: delete func ConstraintsToVCPUs
4f485430 runtime: delete virtcontainers-setup.sh
80f6b977 osbuilder: fixing centos gpg key url for ppc64le
bb99bfb4 runtime: fix the make check-go-static command error
870771d7 runtime: update .gitignore to ignore monitor_address file
18bff584 runtime: Optimize func noNeedForOutput and add test cases
e5fe53f0 runtime: fix nil reference in cleanup rootless user
2304a596 runtime: set the sandbox storage path static
315295e0 runtime: rename GetSanboxesStoragePath() --> GetSandboxesStoragePath()
13e65f2e cmd: Fix mismatched types in testModuleData
da42cbc0 actions: Build experimental kernel on kata-deploy push action
dffc5092 kernel: Enable SGX in experimental kernel.
ff6a677d kernel-build: Enable multiple config types.
90046964 experimental-kernel: bump 5.13.10
1fbb7304 build: kata-deploy kernel experimental
907459c1 agent/device: Don't force PCI rescans
75f426dd agent: Simplify do_add_swap()
aad1a873 runtime/device: Give the agent information about VFIO devices
ebd7b618 runtime: Don't repeat GetDeviceByID between appendDevices() and append*()
ad45c52f runtime/device: Record guest PCI path for VFIO devices
5c2af3e3 runtime/device: Refactor hotplugVFIODevice() to have common exit path
8bc71105 agent/device: Add device type for VFIO devices
f7a27075 agent: Move driver type constants into device.rs
5b1eb08b agent/uevent: Improve logging of wait_for_uevent()
cf36fd87 runtime: Fix some leftover go fmt errors
6d94957a kernel: reduce alignment size of memory hotplug to 128M
48090f62 qemu: disable plug on arm64 when pie is added
57e3712d virtiofs: fix error report in TestVirtiofsdStart when go test running
8b0bc1f4 kata-monitor: bump version to 0.2.0
bfb556d5 kata-monitor: refresh kata sandbox list on fs events
0e854f3b kata-monitor: improve detection of kata workloads
80463b44 qemu: use GitLab repos instead of qemu.org
3b0c4bf9 runtime: clear virtcontainers cgroup duplicated function
afad910d kata-monitor: add getSandboxFS()
e38686f7 runtime: add GetSandboxesStoragePath()
245a12bb kata-monitor: improve sandbox caching
fc067d61 kata-monitor: warn when unable to retrive the lower level runtime
53ec4df9 kata-monitor: minor fixes
47516988 virtcontainers: Fix incorrect scripts path
814cea96 virtcontainers: clean up useless code

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-14 06:56:30 +02:00
Bin Liu
8be85fda4f Merge pull request #2775 from fgiudici/kata-monitor_issue2292
kata-monitor: add index page
2021-10-14 09:12:57 +08:00
GabyCT
5c7e1b457c Merge pull request #2821 from likebreath/1011/clh_console
clh: Refine the usage of guest console and kernel parameters with Cloud Hypervisor
2021-10-13 13:36:32 -05:00
Eric Ernst
6cc4d6b54e Merge pull request #2829 from bergwolf/capability
agent: exec should inherit container process capabilities
2021-10-13 09:02:03 -07:00
Peng Tao
176dee6f37 agent: exec should inherit container process capabilities
Otherwise rustjail would not set its capabilities and it ends up getting
all capabilities.

Fixes: #2828
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-10-13 17:24:52 +08:00
Bo Chen
7b2bfd4eca virtcontainers: clh: Use 'quiet' as the default kernel parameter
The 'quiet' kernel parameter can avoid guest kernel logs while booting,
which can reduce boot time.

Fix: #2820

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-11 22:06:27 -07:00
Bo Chen
3e24e46c70 virtcontainers: clh: Turn-off serial and virtio-console by default
We will need to have console output from the guest only for debugging
purposes. As a result, we can turn-off both the serial and
virtio-console devices by default for better boot time.

Fixes: #2820

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-11 22:06:23 -07:00
GabyCT
88affdb7b7 Merge pull request #2799 from liubin/fix/github-action-format
GitHubActions: fix invalid format of require-pr-porting-labels.yaml
2021-10-11 09:36:04 -05:00
Bin Liu
b7cd4ca2b8 Merge pull request #2813 from liubin/fix/2812-flush-root-span
agent: flush root span before process finish
2021-10-11 18:46:09 +08:00
bin
2d7b65e8eb agent: flush root span before process finish
Variables in rust will be dropped at the end of the function.

In function real_main the trace will be shut down by `tracer::end_tracing()`,
but at this time the root span is in an active state, so this root span
will not be sent to the trace collector.

This can be fixed by dropping the root span manually.

Fixes: #2812

Signed-off-by: bin <bin@hyper.sh>
2021-10-11 17:14:37 +08:00
Chelsea Mafrica
3f95469a78 runtime: logging: Add variable for syslog tag
The variable for 'name' in config-settings.go.in was previously
hardcoded as "kata". In e7c42fb it was changed to the runtime name,
which is "kata-runtime". Add a variable to specify a syslog identifier
for consistency for tests and documentation that use it.

Fixes #2806

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-10-11 02:12:13 -07:00
Marcel Apfelbaum
06f4ab10b4 Merge pull request #2764 from dgibson/more-pci
Extend PCI submodules to represent non-zero functions and addresses
2021-10-10 15:57:54 +03:00
Marcel Apfelbaum
9796babd92 Merge pull request #2311 from dgibson/mmconfig
packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
2021-10-10 15:11:33 +03:00
Feng Wang
adc9e0baaf runtime: fix two bugs in rootless hypervisor
Update the sandbox dir clean up logic to be more appropriate
Add different seeds for randInt() method

Fixes #2770

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-10-08 15:52:42 -07:00
Bo Chen
51cbe14584 runtime: Add option "disable_seccomp" to config hypervisor.clh
This patch adds an option "disable_seccomp" to the config
hypervisor.clh, from which users can disable the `seccomp`
feature from Cloud Hypervisor when needed (for debugging purposes).

Fixes: #2782

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-08 15:10:30 -07:00
Bo Chen
98b7350a1b virtcontainers: clh: Enable the seccomp feature
This patch enables the `seccomp` feature from Cloud Hypervisor which
provides fine-grained allowed syscalls for each of its worker
threads. It brings important security benefits, while would increase
memory footprint.

Fixes: #2782

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-08 15:07:43 -07:00
Bin Liu
0300e91cd0 Merge pull request #2808 from liubin/fix/2805-fix-test-leak-of-monitor-socket
runtime: don't start shim management server in tests
2021-10-08 19:42:09 +08:00
bin
5c77cc2c49 runtime: don't start shim management server in tests
Shim management server is running in a go routine, in test mode
this will cause the directory where the listen socket
file(/run/vc/sbs/777-77-77777777/shim-monitor.sock) in leak
after the tests finished.

Fixes: #2805

Signed-off-by: bin <bin@hyper.sh>
2021-10-08 18:41:53 +08:00
Jakob Naucke
4152c45e4c Merge pull request #2706 from yuanzhe-liu0/qemu_link
qemu: use GitLab repos instead of qemu.org
2021-10-08 12:03:55 +02:00
David Gibson
72044180e4 agent/device: Return PCI address from wait_for_pci_device()
wait_for_pci_device() waits for the PCI device at the given path to become
ready, but it doesn't currently give you any meaningful handle on that
device.

Change the signature, so that it returns the PCI address of the device.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
David Gibson
e50b05d93c agent/pci: Add type to represent PCI addresses
Add a new pci::Address type which represents a guest PCI address in
DDDD:BB:SS.F form.

fixes #2745

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
David Gibson
8528157b9b agent/pci: Extend Slot type to represent PCI function as well
pci::Slot represents a PCI slot.  However, in all cases where we use it, we
actually care about addressing a specific PCI function.  So, at the moment
we can only refer to function 0 in each slot.

Replace pci::Slot with pci::SlotFn to represent both the slot and function.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
Fupan Li
988eb95621 Merge pull request #2760 from liubin/fix/2759-optimize-code-for-managing-temp-users
runtime: optimize code for managing temp users for rootless mode
2021-10-08 13:49:14 +08:00
bin
bf8f582c1d runtime: optimize code for managing temp users for rootless mode
This commit does two chagnes:

- move code for managing temp users to rootless.go.
- use common function in qemu.go when shutdown the VM.

Fixes: #2759

Signed-off-by: bin <bin@hyper.sh>
2021-10-08 11:04:21 +08:00
Eric Ernst
011c58d626 Merge pull request #2517 from sameo/topic/agent-config
Agent configuration file and API restriction
2021-10-07 08:54:51 -07:00
Bin Liu
10ec4b133c Merge pull request #2742 from liubin/fix/2741-delete-file-code
Delete file virtcontainers-setup.sh
2021-10-07 11:54:47 +08:00
bin
a9c2a4ba8e GitHubActions: fix invalid format of require-pr-porting-labels.yaml
The yaml file has an indent issue from line 15.

And the branches filter should be under pull_request_target but
not the pull_request trigger.

Also actions/checkout@v2 does not need the token parameter.

Fixes: #2798

Signed-off-by: bin <bin@hyper.sh>
2021-10-07 10:23:44 +08:00
David Gibson
c4236cb2d1 packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
The guest kernel configuration suggested for Kata, and which is used by the
CI didn't include CONFIG_PCI_MMCONFIG.  That's kind of weird, MMCONFIG is
the modern normal way of handling configuration cycles.

In addition, due to a complex set of interactions through the ACPI code,
disabling MMCONFIG means that SHPC hotplug doesn't work: the driver is
included in the guest kernel, but will fail to probe on PCI to PCI bridges,
meaning it won't actually be activated.

Enable MMCONFIG so that we suggest and testa more typical guest kernel
configuration.

fixes #2288

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-07 13:21:48 +11:00
Samuel Ortiz
08360c981d agent: Add an agent configutation file example
With all endpoints allowed.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-07 04:04:52 +02:00
Samuel Ortiz
8a4e69d237 agent: rpc: Return UNIMPLEMENTED for not allowed endpoints
From the endpoints string described through the configuration file, we
build a hash set of allowed enpoints. If a configuration files does not
include an endpoints section, we assume all endpoints are not allowed.
If there is no configuration file, then all endpoints are allowed.

Then for every ttrpc request, we check if the name of the endpoint is
part of the hashset. If it is not, then we return ttrcp::UNIMPLEMENTED.

Fixes: #1837

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 04:04:32 +02:00
Samuel Ortiz
0ea2e3af07 agent: config: Allow for building the configuration from a file
When the kernel command line includes a agent.config_file=<path> entry,
then we will try to override the default confiuguration values with the
ones we parse from a TOML file at <path>.

As the configuration file overrides the default values, we need to go
through a simplified builder that convert a set of Option<> fields into
the actual AgentConfig structure.

Fixes: #1837

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
63539dc9fd agent: config: Add allowed endpoints
They will define the list of endpoints that an agent supports.
They're empty and non actionable for now.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
a953fea324 agent: config: Simplify configuration creation
We dont need a constructor and derive directly from the command line
parsing.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
b888edc2fc agent: config: Implement Default
A single constructor setting default value is a typical pattern for a
Default implementation.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Fabiano Fidêncio
4cde619c68 Merge pull request #2797 from fidencio/wip/upgrade-vendored-containerd
vendor: Update containerd to v1.5.7
2021-10-06 21:05:44 +02:00
Chelsea Mafrica
6e3fcce2a2 Merge pull request #2748 from liubin/fix/2747-add-test
runtime: Optimize func noNeedForOutput and add test cases
2021-10-06 11:24:57 -07:00
Fabiano Fidêncio
04cdf5b1f0 Merge pull request #2774 from Jakob-Naucke/fix-s390x-alignment
runtime: Fix !x86 static checks
2021-10-06 19:57:00 +02:00
Jianyong Wu
7eac2ec786 protection: add confidential compute frame for arm
Even CCA, which is the confidential compute archtecture, has not been
ready, add a empty implementation to avoid static check error.

Fixes: #2789
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Suggested-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-06 15:53:36 +02:00
Jianyong Wu
8acfc154de check: fix typecheck failure in qemu_arm64_test.go
fix typecheck failure in qemu_arm64_test.go

Fixes: #2789
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-06 15:53:35 +02:00
Amulya Meka
5b02d54e23 virtcontainers: fix lint failure on ppc64le
Add nolint for arch specific code to exclude
from lint check.

Fixes: #2773

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-06 15:53:35 +02:00
Jakob Naucke
ff9728f032 virtcontainers: nolint guestProtection
Exclude from lint checking for it is ultimately only used in
architecture-specific code.

Fixes: #2273
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-06 15:53:35 +02:00
Jakob Naucke
5c138c8f12 runtime: Fix field alignment on s390x
Follow-up of #2237 for s390x -- field alignment isn't always minimal

Fixes: #2773
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-06 15:53:35 +02:00
Fabiano Fidêncio
191d001610 vendor: Update containerd to v1.5.7
Bump containerd to v1.5.7 in order to bring in a fix for CVE-2021-41103,
"insufficiently restricted permissions ons plugins directories
(https://github.com/advisories/GHSA-c2h3-6mxw-7mvq)".

dependabot found a potential security vulnerability and raised a PR to
fix it.  However, dependabot does not properly follows nor understands
the needed of our CIs (mainly related to formatting the PR and whatnot),
thus I'm re-raising it.

Fixes: #2796
Supersedes: #2787

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-06 10:40:43 +02:00
Chelsea Mafrica
1f6a551570 Merge pull request #2755 from paleozogt/centos-ppc64le-gpg
#2676: fixing centos gpg key url for ppc64le
2021-10-05 09:37:58 -07:00
Eric Ernst
2bc7561561 Merge pull request #2769 from sameo/topic/agent-route
Pass the host route IP family to the guest
2021-10-05 07:20:33 -07:00
Chelsea Mafrica
db7d3b91bd Merge pull request #2780 from Amulyam24/checks
cmd: get return value for setCPUtype
2021-10-04 22:19:59 -07:00
Bin Liu
f7f6bd0142 kata-monitor: add index page
Add an index page to the kata-monitor endpoint.

Porting of https://github.com/liubin/kata-containers/commit/a45aa0696d55

Fixes: #2292
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-10-04 18:13:56 +02:00
Archana Shinde
5fd963530d Merge pull request #2756 from gkurz/fix-enable-pie
packaging: Configure QEMU with --enable-pie
2021-10-04 03:50:00 -07:00
Samuel Ortiz
a44cde7e8d agent: netlink: Use the grpc IP family field when updating the route
Not all routes have either a gateway or a destination IP.
Interface routes, where the source, destination and gateway are undefined,
will default to IP v4 with the current is_ipv6() check even when they
are v6 routes.

We use the provided gRPC Route.Family field instead. This field is built
from the host netlink messages, and is a reliable way of finding out
a route's IP family.

Fixes: #2768

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:39:46 +02:00
Samuel Ortiz
71ce6cfe9e runtime: Pass the route IP family to the agent
When updating the guest routing table, we should forward the IP family
information up to the guest.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:17 +02:00
Samuel Ortiz
99450bd1f7 agent: protos: Add a Family field to the Route payload
Our check for the IP family is working as long as we have either a
gateway or a destination IP. Some routes are missing both.
The RT netlink messages provide the IP family information for each
route, so we can carry that piece of information up to the guest. That
will allow for a more reliable route IP family determination.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:17 +02:00
Samuel Ortiz
f85fe70231 runtime: vendor: Bump the netlink package dependency
We need to be able to get the IP family from the netlink route meesages,
and the Route.Family field only got recently added to the netlink
package.

The update generates static check warnings about the call for
nethandler.Delete() being deprecated in favor of a Close() call instead.
So we include the s/Delete()/Close()/ change as part of this PR.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:01 +02:00
Amulya Meka
e439cec7c5 cmd: fix field alignment on ppc64le
Optimising structure field alignment.

Fixes: #2779

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-01 11:45:27 +00:00
Amulya Meka
e5159ea755 cmd: get return value for setCPUtype
Accept and assert the return value in testSetCPUTypeGeneric.

Fixes: #2779

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-01 11:44:14 +00:00
Bo Chen
fd5c858390 Merge pull request #2751 from jodh-intel/clh-fix-guest-output
clh: Enable guest userland output
2021-09-30 09:05:30 -07:00
James O. D. Hunt
2ce8d4263c clh: Suppress hypervisor output to make guest output visible
Reduce the cloud-hypervisor log level from `Debug` to `Info` when hypervisor
debug is enabled. This is required since `Debug` level:

- Is overkill for debugging hypervisor failures.
- Effectively hides the output from the guest kernel and userland: CLH
  generates so much output that the output from the guest gets "lost in
  the noise" (experiments show that for each full CLH debug message, at most
  1 _byte_ of guest output is displayed).

Fixes: #2726.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-09-30 14:22:09 +01:00
Greg Kurz
cd1064b16f packaging: Configure QEMU with --enable-pie
We explicitely set the Postion Independant Executlable (PIE) options
in the extra CFLAGS and LDFLAGS that are passed to the QEMU configure
script for all archs. This means that these options are used pretty
much everywhere, including when building the sample plugins under the
test directory. These cannot be linked with -pie and break the build,
as experienced recently on ARM (see PR #2732).

This only broke on ARM because other archs are configured with
--disable-tcg : this disables plugins which are built by default
otherwise.

The --enable-pie option is all that is needed. The QEMU build system
knows which binaries should be created as PIE, e.g. the important
bits like QEMU and virtiofsd, and which ones should not, e.g. the
sample plugins that aren't used in production.

Rely on --enable-pie only, for all archs. This allows to drop the
workaround that was put in place in PR #2732.

Fixes: #2757
Signed-off-by: Greg Kurz <groug@kaod.org>
2021-09-30 11:17:41 +02:00
Jakob Naucke
8739a73dd3 Merge pull request #2736 from Amulyam24/kata-check-test
cmd: Fix mismatched types in testModuleData
2021-09-30 10:20:19 +02:00
bin
762922a521 runtime: delete func ConstraintsToVCPUs
ConstraintsToVCPUs is not used any more.

Fixes: #2741

Signed-off-by: bin <bin@hyper.sh>
2021-09-30 14:44:41 +08:00
bin
4f4854308a runtime: delete virtcontainers-setup.sh
This file is not used anymore.

Fixes: #2741

Signed-off-by: bin <bin@hyper.sh>
2021-09-30 14:44:30 +08:00
Chelsea Mafrica
96c033ba6c Merge pull request #2763 from liubin/fix/2762-update-gitignore
runtime: update .gitignore to ignore monitor_address file
2021-09-29 09:45:57 -07:00
Carlos Venegas
7183de47df Merge pull request #2766 from YchauWang/wyc-runtime-cmd
runtime: fix the make check-go-static command error
2021-09-29 10:53:02 -05:00
Aaron Simmons
80f6b97710 osbuilder: fixing centos gpg key url for ppc64le
The centos ppc64le gpg key at mirror.centos.org doesn't exist (link rot?).
Replacing it with url from CentOS/sig-core-AltArch on github.

Fixes: #2676

Signed-off-by: Aaron Simmons <paleozogt@gmail.com>
2021-09-29 09:20:51 -06:00
Bin Liu
4ac7199282 Merge pull request #2494 from rapiz1/clean-up-code
virtcontainers: clean up useless code
2021-09-29 22:56:13 +08:00
wangyongchao.bj
bb99bfb45d runtime: fix the make check-go-static command error
modify the make script of the check-go-static, changing the `./cli` path to `./cmd/kata-runtime`

Fixes: #2765

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-29 15:37:25 +08:00
David Gibson
b57613f53e Merge pull request #1682 from dgibson/rescan
Remove forced PCI rescans from agent
2021-09-29 13:03:55 +10:00
bin
870771d76d runtime: update .gitignore to ignore monitor_address file
Run tests sometimes generate pkg/containerd-shim-v2/monitor_address,
and `git status` will treat it as a new file.

Package containerd-shim-v2 has moved to pkg/containerd-shim-v2,
the monitor_address in .gitignore should be updated too.

Fixes: #2762

Signed-off-by: bin <bin@hyper.sh>
2021-09-29 09:24:14 +08:00
Chelsea Mafrica
20f4c252b8 Merge pull request #2519 from jcvenegas/kernel-experimental-5.13.10
kernel: Enable SGX in experimental kernel.
2021-09-28 11:00:46 -07:00
Fupan Li
823818cfbc Merge pull request #2744 from fengwang666/nil-bug
runtime: fix nil reference in cleanup rootless user
2021-09-28 22:43:24 +08:00
Fabiano Fidêncio
f9ecaaa6be Merge pull request #2732 from jongwu/plugin
qemu: prepare to upgrade qemu version to 6.1.0 for arm
2021-09-28 12:12:48 +02:00
bin
46720c61c1 runtime: set tags for trace span
Set tags for trace span in hook.go and remove FIXME.

Fixes: #2749

Signed-off-by: bin <bin@hyper.sh>
2021-09-28 18:05:03 +08:00
bin
18bff58487 runtime: Optimize func noNeedForOutput and add test cases
Optimize func noNeedForOutput and add test cases for this func.

Fixes: #2747

Signed-off-by: bin <bin@hyper.sh>
2021-09-28 16:58:44 +08:00
Feng Wang
e5fe53f0a9 runtime: fix nil reference in cleanup rootless user
It seems the client (crio) can send multiple requests to stop the Kata VM,
resulting a nil reference if the uid has already been cleaned up by a different thread.

Fixes #2743

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-27 21:28:47 -07:00
Chelsea Mafrica
0b087a873d Merge pull request #2739 from fgiudici/kata-monitor_improvements3
kata-monitor (minor) improvements
2021-09-27 15:45:21 -07:00
Francesco Giudici
2304a59601 runtime: set the sandbox storage path static
Since we now have "unix://" kind of socket returned by the
SocketAddress() function, there is no more need to build the sandbox
storage path dynamically to keep OS compatibility.

Fixes: #2738
Suggested-by: Christophe de Dinechin <dinechin@redhat.com>
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-27 15:57:34 +02:00
Francesco Giudici
315295e0ef runtime: rename GetSanboxesStoragePath() --> GetSandboxesStoragePath()
Add the missing 'd'.

Fixes: #2738
Suggested-by: Jakob Naucke <jakob.naucke@ibm.com>
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-27 15:56:14 +02:00
Bin Liu
3217b03b17 Merge pull request #2522 from Bevisy/main-2515
virtcontainers: Fix incorrect scripts path
2021-09-27 21:14:40 +08:00
Bin Liu
39df808f6a Merge pull request #2695 from YchauWang/wyc-vc-cgroup
runtime: clear virtcontainers cgroup duplicated function
2021-09-27 21:12:39 +08:00
Amulya Meka
13e65f2ee8 cmd: Fix mismatched types in testModuleData
Rectify the values of testModuleData with the correct
types in TestCCCheckCLiFunction in kata-check_(!x86)_test.go

Fixes: #2735

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-09-27 07:17:07 +00:00
Peng Tao
05995632c3 Merge pull request #2566 from fgiudici/kata-monitor_improvements
Kata monitor: cache improvements
2021-09-27 12:29:13 +08:00
Carlos Venegas
da42cbc0a7 actions: Build experimental kernel on kata-deploy push action
Build experimental kernel on kata-deploy push action.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
dffc50928a kernel: Enable SGX in experimental kernel.
Enable Intel SGX support in experimental kernel.

Fixes: #2518

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
ff6a677d16 kernel-build: Enable multiple config types.
Optional build types are common for early adoption.
Lets add a flag to build and optional config.

e.g.
kernel-build.sh -b experimental

In the future instead of add more flags just add a new build type.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
90046964ef experimental-kernel: bump 5.13.10
Upgrade Linux kernel to latest stable release.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
1fbb73041b build: kata-deploy kernel experimental
Allow build experimental kernel from kata-deploy.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 02:56:59 +00:00
David Gibson
907459c1c1 agent/device: Don't force PCI rescans
The agent initiates a PCI rescan from two places.  One is triggered
for each virtio-blk PCI device, and one is triggered unconditionally
when we start a new container.

The PCI bus rescan code was added long time ago in Clear Containers due to
lack of ACPI support in QEMU 2.9 + q35.  Since Kata routinely plugs devices
under a PCIe-to-PCI bridge, that left SHPC as the only available hotplug
mechanism.

However, while Kata was using SHPC on the qemu side, it wasn't actually
using it on the guest side.  Due to a quirk of our guest kernel
configuration, the SHPC driver never bound to the bridge, and *no* hotplug
was working at all.  To work around that, Kata was forcing the rescan
manually, which would discover the new device.  That was very fragile (we
were arguably relying on a kernel bug).  Even if we were using SHPC
propertly, it includes a mandatory 5s delay during plug operations
(designed for physical cards and human operators), which makes it
unsuitable quick start up.

Worse, the forced PCI rescans could race with either SHPC or PCIe native
hotplug sequences, causing several problems.  In some cases this could put
the device into an entirely broken state where it wouldn't respond to
config space accesses at all.

Since pull request #2323 was merged, we have instead used ACPI hotplug
which is both fast, and more solid in terms of semantics and races.  So,
the forced PCI rescans are no longer necessary.  Remove them all.

fixes #683

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
75f426dd1e agent: Simplify do_add_swap()
do_add_swap() has some mildly complex code to translate the PCI path of
a virtio-blk device (where the swap will reside) into a /dev path. However,
the device module already has get_virtio_blk_pci_device_name() which does
exactly that.  The existing code has some further advantages: it uses
more precise matching of the sysfs paths, and if necessary it will wait for
the device to be added to the guest.

While we're there, remove an unnecessary 'as u8' from the PCI path
construction: pci::Path::new() already accepts anything which implements
TryInfo<u8>, which u32 certainly does.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
aad1a8734f runtime/device: Give the agent information about VFIO devices
We send information about several kinds of devices to the agent so
that it can apply specific handling.  We don't currently do this with
VFIO devices.  However we need to do that so that the agent can
properly wait for VFIO devices to be ready (previously it did that
using a PCI rescan which may not be reliable and has some very bad
side effects).

This patch collates and sends the relevant information.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
ebd7b61884 runtime: Don't repeat GetDeviceByID between appendDevices() and append*()
Both appendBlockDevice and appendVhostUserBlkDevice start by using
GetDeviceByID to lookup the api.Device object corresponding to their
ContainerDevice object.  However their common caller, appendDevices() has
already done this.

This changes it so the looked up api.Device is passed to the individual
append*Device() functions.  This slightly reduces duplicated work, but more
importantly it makes it clearer that append*Device() don't need to check
for a nil result from GetDeviceByID, since the caller has already done
that.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
ad45c52fbe runtime/device: Record guest PCI path for VFIO devices
For several device types which correspond to a PCI device in the guest
we record the device's PCI path in the guest.  We don't currently do
that for VFIO devices, but we're going to need to for better handling
of SR-IOV devices.

To accomplish this, we have to determine the guest PCI path from the
information the VMM gives us:

For qemu, we query the slot of the device and its bridge from QMP.

For cloud-hypervisor, the device add interface gives us a guest PCI
address.  In fact this represents a design error in the clh API -
there's no way it can really know the guest PCI address in general.
It works in this case, because clh doesn't use PCI bridges, so the
device will always be on the root bus.  Based on that, the PCI path is
simply the device's slot number.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
5c2af3e308 runtime/device: Refactor hotplugVFIODevice() to have common exit path
hotplugVFIODevice() has several different paths depending if we're
plugging into a root port or a PCIE<->PCI bridge and if we're using a
regular or mediated VFIO device.

We're going to want some common code on the successful exit path here,
so refactor the function to allow that without duplication.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
8bc71105f4 agent/device: Add device type for VFIO devices
Currently, VFIO devices attached to a Kata container aren't described to
the agent at all.  We essentially just hope they're ready by the time
we've entered the container proper, which is usually the case because of
the PCI rescan - but that causes other problems.

This adds a new device type to the agent representing VFIO devices.  The
agent will use its existing uevent watching mechanisms to wait for the
associated guest PCI device to appear before proceeding.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
f7a2707505 agent: Move driver type constants into device.rs
Currently the constants giving the names for each device/driver type in
the protocol are in mount.rs, and used in device.rs.  Since these constants
are inherently related to, well, devices, it makes more sense to put them
in device.rs and use them from mount.rs.

This will become even more so with planned extensions which will add some
device types that will not be used in mount.rs at all.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
5b1eb08bde agent/uevent: Improve logging of wait_for_uevent()
These messages will help when debugging matchers not matching properly.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
cf36fd87ad runtime: Fix some leftover go fmt errors
A few "go fmt" errors appear to have crept it.  Clean them up with
"go fmt ./..." in the src/runtime directory.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
Jianyong Wu
6d94957a14 kernel: reduce alignment size of memory hotplug to 128M
After 5.11-rc4, memory hotplug alignment size is reduced to 128M for 4K
page.
It works better for memory hotplug and nvdimm plug in kata on arm.
without this patch, memory hotplug will fail for the current memory
hotplug alignment is 1G but the nvdimm size align with 128M in kata.
After port it here, we can avoid a fix in qemu side.

Note: if you change the page size to other size than 4K, memory hotplug
will has no effect.

Fixes: #2707
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-26 15:33:33 +08:00
Jianyong Wu
48090f624a qemu: disable plug on arm64 when pie is added
For qemu 6.1.0 build on arm64, compile error occurs when "-pie" is added
 to ldflag.
tests/plugins/empty.c won't be linked as a sysmbol is missing.
I consider there maybe a bug.
Before figure it out, we should disable plugins for qemu 6.1.0 on arm64.

Fixes: #2707
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-26 15:33:33 +08:00
Fabiano Fidêncio
c811dd7484 Merge pull request #2720 from Kvasscn/kata_dev_virtiofsd_ctx
virtiofs: fix error report in TestVirtiofsdStart when go test running
2021-09-25 12:17:00 +02:00
Fabiano Fidêncio
ed705482a2 Merge pull request #2730 from fidencio/wip/release-fix-using-vendored-sources
workflows: Fix the config file path for using vendored sources
2021-09-24 23:37:10 +02:00
Fabiano Fidêncio
a525991c2c workflows: Fix the config file path for using vendored sources
There's a typo in the file that should receive the output of `cargo
vendor`.  We should use forward the output to `.cargo/config` instead of
`.cargo/vendor`.

This was introduced by 21c8511630.

Fixes: #2729

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-24 20:26:27 +02:00
Fabiano Fidêncio
9ad44750e8 Merge pull request #2727 from fidencio/wip/fix-wrong-tags-attribution
workflows: Fix tag attribution
2021-09-24 19:28:33 +02:00
Fabiano Fidêncio
39dcbaa672 workflows: Fix tag attribution
While releasing kata-containers 2.3.0-alpha1 we've hit some issues as
the tags attribution is done incorrectly.  We want an array of tags to
iterate over, but the currently code is just lost is the parenthesis.

This issue was introduced in a156288c1f.

Fixes: #2725

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-24 17:58:50 +02:00
Fabiano Fidêncio
832d57c960 Merge pull request #2722 from fidencio/2.3.0-alpha1-branch-bump
# Kata Containers 2.3.0-alpha1
2021-09-24 15:03:06 +02:00
Fabiano Fidêncio
04139ba686 release: Kata Containers 2.3.0-alpha1
- virtiofs: Create shared directory with 0700 mode, not 0750
- watcher: ensure we create target mount point for storage
- packaging: fix qemu build on ppc64le
- runtime: tracing: Use root context to stop tracing
- Replace SHPC with ACPI PCI hotplug for Kata guests
- kata-deploy: Also provide "stable" & "latest" tags
- runtime: tracing: Fix logger passed in newContainer
- virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
- sandbox: Allow the device to be accessed,such as /dev/null and /dev/u…
- qemu: add v5.1.0 dir under tag_patches
- threat-model: Add missing threat-model document
- docs: documentation for running non-root VMM
- workflows,release: Upload the vendored cargo code
- runtime: run the QEMU VMM process with a non-root user
- runtime: update .gitignore file cleare the vc shim config
- runtime: fix empty cgroup path validation error
- ci: Call agent shutdown test only in the correspondent CI_JOB
- runtime: Remove outdated TestStoreContainer
- runtime: refactor commandline code directory
- virtcontainers: update VC HypervisorConfig API add three lost fields
- virtcontainers: add unit tests for container.go
- runtime: clh: Enable hugepages support
- agent: Simplify mount point creation
- versions: Allow newer Rust versions
- runtime/qemu: Move from query-cpus to query-cpus-fast
- Update Kata to use qemu-6.1
- Host cgroups improvements and simplifications
- Add doc for guest swap
- versions: Upgrade to Cloud Hypervisor v18.0
- runtime: Fix README link
- qemu: remove default config for arm64.
- sandbox: Add device permissions such as /dev/null to cgroup
- virtcontainers: fc: parse vcpuID correctly
- kata-tarball: Build and test fixes
- test: enable running tests under root user
- osbuilder: Change to "=" operator to make script more portable
- makefile: Fix error exit status code
- osbuilder: fix inconsistent calculation of fs size
- virtcontainers: Remove NewStoreFeature
- snap: Test variable instead of executing "branch"
- license: drop redundent license files
- Fix swap fail insert fail issue

272771dc watcher: ensure we create target mount point for storage
439e5ac3 packaging: fix qemu build on ppc64le
8bbcb06a qemu: Disable SHPC hotplug
cc4983ee runtime: Remove unused qemuArchBase.appendBridges definition
e248de46 vendor: Update govmm
0ca8c272 qemu: add v5.1.0 dir under tag_patches
3bdcfaa6 kata-deploy: Add more info about the stable tag
41c590fa kata-deploy: Improve README
debf3c9f kata-deploy: Remove qemu-virtiofs runtime class
43a72d76 release: update the kata-deploy yaml files accordingly
ea9b2f9c kata-deploy: Add "stable" info to the README
e5411056 kata-deploy: Update the README
9acf4e5d kata-deploy: Add `stable` yaml files
a86babe0 kata-deploy: Point to the `latest` release
a156288c workflows: Add "stable" & "latest" tags to kata-deploy
305afc8b docs: documentation for running non-root VMM
1fe080fd threat-model: Add missing threat-model document
21c85116 workflows,release: Upload the vendored cargo code
9a6d56f1 runtime: fix empty cgroup path validation error
90e63887 ci: Call agent shutdown test only in the correspondent CI_JOB
48fb1d92 virtiofs: Create shared directory with 0700 mode, not 0750
077b77c1 runtime: tracing: Fix logger passed in newContainer
39cd05e0 runtime: tracing: Use root context to stop tracing
1cfe5930 runtime: Run QEMU using a non-root user/group
fd983738 runtime: update .gitignore file cleare the vc shim config
067c44d0 runtime: fix UT build failure
9353cd77 runtime: Remove outdated TestStoreContainer
9a311a2b docs: fix invalid kernel dax doc url
e7c42fbc runtime: unify generated config
4f7cc186 runtime: refactor commandline code directory
9d3cd984 agent/mount: Remove unused ensure_destination_exists()
64aa5623 agent: Correct mount point creation
08d7aebc agent/mount: Split out regular file case from ensure_destination_exists()
9fa3beff agent: Remove unnecessary BareMount structure
49282854 agent: Simplify BareMount::mount by using nix::mount::mount
d00decc9 runtime: clh: Enable hugepages support
64bb803f runtime/qemu: Move from query-cpus to query-cpus-fast
25ac3524 versions: Allow newer Rust versions
851d5f86 tests: Correct heading in static checks test
4b7e4a4c runtime: Vendoring update
8d9d6e6a docs: Host cgroups documentation update
9bed2ade virtcontainers: Convert to the new cgroups package API
b42ed393 virtcontainers: cgroups: Add a containerd API based cgroups package
f17752b0 virtcontainers: container: Do not create and manage container host cgroups
dc7e9bce virtcontainers: sandbox: Host cgroups partitioning
f811026c virtcontainers: Unconditionally create the sandbox cgroup manager
a6066404 virtcontainers: update VC HypervisorConfig API add three lost fields
bb18cd47 virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
58e77a3c sandbox: Allow the device to be accessed,such as /dev/null and /dev/urandom
d67a414b src/runtime/README.md: Fix URL of Licence
13b8bb0c runtime: Fix README link
25670d30 packaging/qemu: Update qemu-exerimental version to v6.1.0
041a513f versions: Update qemu to v6.1.0
62baa48e virtcontainers: fc: parse vcpuID correctly
81de2d47 packaging: Correct error message in apply_patches.sh
f785ff0b virtcontainers: clh: Revert the workaround incorrect default values
0e0e59dc virtcontainers: clh: Re-generate the client code
f0b53314 versions: Upgrade to Cloud Hypervisor v18.0
11652136 actions: test make kata-tarball
626d659f actions: kata-deploy on PRs and use makefile
78d99f51 kata-deploy: Make verbose single builds
59486b85 kata-deploy: Add tarball suffix to makefile targets
96e1246b makefile: Include kata-deploy targets
74d645cd how-to: Add how-to-setup-swap-devices-in-guest-kernel.md
d865c809 virtcontainers: add unit tests for container.go
71f915c6 sandbox: Add device permissions such as /dev/null to cgroup
2174fee4 docs: Add swap annotations introduction
2abc450a test: enable running tests under root user
924a68d0 osbuilder: Change to "=" operator to make script more portable
1fff9be7 qemu: remove default config for arm64.
e2a9e78c virtcontainers: Remove NewStoreFeature
bfcee911 osbuilder: fix inconsistent calculation of fs size
4996f9b7 snap: Test variable instead of executing "branch"
256c3b27 license: drop redundent license files
bcc9fa3b hotplugAddBlockDevice: Use ExecuteBlockdevAddWithDriverCache with swap
bd85da04 vendor: Update vendor/github.com/kata-containers/govmm
d422789f makefile: Fix error exit status code

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-24 12:32:26 +02:00
zhanghj
57e3712dbd virtiofs: fix error report in TestVirtiofsdStart when go test running
Initialize ctx with context.Background() instead of nil value.

Fixes: #2718

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-09-24 16:06:06 +08:00
Fabiano Fidêncio
279f8e9d03 Merge pull request #2590 from c3d/issue/2589-virtiofsd-perms
virtiofs: Create shared directory with 0700 mode, not 0750
2021-09-24 09:16:40 +02:00
Eric Ernst
fa44e5c1e5 Merge pull request #2703 from egernst/watcher-fixup
watcher: ensure we create target mount point for storage
2021-09-23 21:59:08 -07:00
Chelsea Mafrica
e987632deb Merge pull request #2693 from Amulyam24/qemu-build
packaging: fix qemu build on ppc64le
2021-09-23 10:31:34 -07:00
Julio Montes
1766c93b08 Merge pull request #2662 from cmaf/tracing-stop-rootctx
runtime: tracing: Use root context to stop tracing
2021-09-23 11:50:35 -05:00
Eric Ernst
272771dcf9 watcher: ensure we create target mount point for storage
We would only create the target when updating files. We need to make
sure that we create the target if the source is a directory. Without
this, we'll fail to start a container that utilizes an empty configmap,
for example.

Add unit tests for this.

Fixes: #2638

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-09-23 08:29:28 -07:00
Julio Montes
5d2a82fbf9 Merge pull request #2323 from dgibson/acpi-pcihp
Replace SHPC with ACPI PCI hotplug for Kata guests
2021-09-23 09:55:31 -05:00
Francesco Giudici
8b0bc1f45e kata-monitor: bump version to 0.2.0
We now support any container engine CRI compliant. Let's bump the
kata-monitor version to 0.2.0.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Francesco Giudici
bfb556d56a kata-monitor: refresh kata sandbox list on fs events
This commit stops the container engine polling in favor of
the kata sandbox storage path monitoring.
The pod cache list is now refreshed based on fs events and synced with
the container engine only when needed.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Francesco Giudici
0e854f3b80 kata-monitor: improve detection of kata workloads
When the container engine is different than containerd or CRI-O we
lack proper detection of kata workloads and consider all the pods as
kata ones.
Instead of querying the container engine for the lower level runtime
used in each pod, check if a directory matching the pod exists in
the virtualcontainers sandboxes storage path.
This provides a container engine independent way to check for kata pods.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Yuanzhe Liu
80463b445a qemu: use GitLab repos instead of qemu.org
arm using qemu 5.1.0, thus is affected by the wired submodules
link.

Fixes: #2705
Signed-off-by: Yuanzhe Liu <yuanzheliu09@gmail.com>
2021-09-23 12:07:44 +00:00
Samuel Ortiz
3276f3b5b6 Merge pull request #2453 from fidencio/wip/kata-deploy-use-stable-and-latest-tags
kata-deploy: Also provide "stable" & "latest" tags
2021-09-23 13:54:01 +02:00
Fabiano Fidêncio
0ececc630f Merge pull request #2666 from cmaf/tracing-newContainer-logger
runtime: tracing: Fix logger passed in newContainer
2021-09-23 13:07:19 +02:00
Fabiano Fidêncio
e33c26ba18 Merge pull request #2622 from YchauWang/wyc-vc-api
virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
2021-09-23 13:05:33 +02:00
Fabiano Fidêncio
47170e302a Merge pull request #2616 from Bevisy/main-2615
sandbox: Allow the device to be accessed,such as /dev/null and /dev/u…
2021-09-23 13:04:18 +02:00
Amulya Meka
439e5ac3b0 packaging: fix qemu build on ppc64le
Since the qemu upgrade to v6.1.0, the build fails
with a linking issue. Adding --disable-tcg to fix
it.

Fixes: #2710

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-09-23 06:27:15 +00:00
David Gibson
8bbcb06af5 qemu: Disable SHPC hotplug
Under certain circumstances[0] Kata will attempt to use SHPC hotplug
for PCI devices on the guest.  In fact we explicitly enable SHPC on
our PCI to PCI bridges, regardless of the qemu default.

SHPC was designed a long, long time ago for physical hotplugging and
works very poorly for a virtual environment. In particular it has a
mandatory 5s delay to allow a (real, human) operator to back out the
operation if they press a button by mistake. This alone makes it
unusable for a fast start up application like Kata.

Worse, the agent forces a PCI rescan during startup.  That will race
with the SHPC hotplug operation causing the device to go into a bad
state where config space can't be accessed from the guest at all.

The only reason we've sort of gotten away with this is that our
default guest kernel configuration triggers what's arguably a kernel
bug effectively disabling SHPC.  That makes the agent rescan the only
reason we see the new device.

Now that we require a qemu >=6.1, which includes ACPI PCI hotplug on
the q35 machine, we can explicitly disable SHPC in all cases.  It's
nothing but trouble.

fixes #2174

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-23 10:27:26 +10:00
David Gibson
cc4983eeac runtime: Remove unused qemuArchBase.appendBridges definition
qemuArchBase.appendBridges is never actually used, because the bare
qemuArchBase type is itself never used (outside of unit tests).  Instead
*all* the subclasses of qemuArchBase override appendBridges() to call
the very similar, but not identical genericAppendBridges.  So, we can
remove the qemuArchBase.appendBridges implementation.

Furthermore, all those subclasses override appendBridges() in exactly
the same way, and so we can remove *those* definitions and replace the
base class qemuArchBase appendBridges() with that version, calling
genericAppendBridges().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-23 10:15:08 +10:00
David Gibson
e248de4616 vendor: Update govmm
Update to commit 1b60b536f3, in particular to get extensions to
allow IO and memory window reservations to be set on PCI bridges.

https://github.com/kata-containers/govmm/pull/201

Git log:

de039da govmm/qemu: Let IO/memory reservations be specified for bridge devices

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-23 10:14:29 +10:00
Chelsea Mafrica
b6ff23d21b Merge pull request #2697 from jongwu/patch_qemu
qemu: add v5.1.0 dir under tag_patches
2021-09-22 09:08:05 -07:00
Jianyong Wu
0ca8c27241 qemu: add v5.1.0 dir under tag_patches
A related dir is needed when apply qemu patch using script. As qemu 5.1
is used for arm, a dir of "v5.1.0" is needed under tag_patches.

Fixes: #2696
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-22 18:07:24 +08:00
Archana Shinde
771a934fc5 Merge pull request #2341 from amshinde/add-threat-model
threat-model: Add missing threat-model document
2021-09-22 01:17:05 -07:00
wangyongchao.bj
3b0c4bf9a0 runtime: clear virtcontainers cgroup duplicated function
There are `DeviceToDeviceCgroup` and `deviceToDeviceCgroup` two functions,
 creating a `specs.LinuxDeviceCgroup` object. We clear the new function `deviceToDeviceCgroup`.

Fixes: #2694

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-22 15:13:34 +08:00
Fabiano Fidêncio
3bdcfaa658 kata-deploy: Add more info about the stable tag
Let's make it as clear as possible for the user that if they go for a
tagged version of kata-deploy, eg, 2.2.1, they'll have the kata runtime
2.2.1 deployed on their cluster.

Suggested-by: Eric Adams <eric.adams@intel.com>
Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 23:13:45 +02:00
Fabiano Fidêncio
41c590fa0a kata-deploy: Improve README
Let's add more instructions in the README in order to make clear to the
reader what they can do to check whether kata-deploy is ready, or
whether they have to wait till proceeding with the next instruction.

Suggested-by: Eric Adams <eric.adams@intel.com>
Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 23:13:45 +02:00
Fabiano Fidêncio
debf3c9fe9 kata-deploy: Remove qemu-virtiofs runtime class
There's only one QEMU runtime class deployed as part of kata-deploy, and
that includes virtiofs support (which is the default for quite some time
already).  Knowing this, let's just remove the `qemu-virtiofs` runtime
class definition.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
43a72d76e2 release: update the kata-deploy yaml files accordingly
Let's teach our `update-repository-version.sh` script to properly update
the kata-deploy tags on both kata-deploy and kata-cleanup yaml files.

The 3 scenarios that we're dealing with, based on which branch we're
targetting, are:
```
 1) [main] ------> [main]        NO-OP
   "alpha0"       "alpha1"

                   +----------------+----------------+
                   |      from      |       to       |
  -----------------+----------------+----------------+
  kata-deploy      | "latest"       | "latest"       |
  -----------------+----------------+----------------+
  kata-deploy-base | "stable        | "stable"       |
  -----------------+----------------+----------------+

 2) [main] ------> [stable] Update kata-deploy and
   "alpha2"         "rc0"   get rid of kata-deploy-base

                   +----------------+----------------+
                   |      from      |       to       |
  -----------------+----------------+----------------+
  kata-deploy      | "latest"       | "rc0"          |
  -----------------+----------------+----------------+
  kata-deploy-base | "stable"       | REMOVED        |
  -----------------+----------------+----------------+

 3) [stable] ------> [stable]    Update kata-deploy
    "x.y.z"         "x.y.(z+1)"

                   +----------------+----------------+
                   |      from      |       to       |
  -----------------+----------------+----------------+
  kata-deploy      | "x.y.z"        | "x.y.(z+1)"    |
  -----------------+----------------+----------------+
  kata-deploy-base | NON-EXISTENT   | NON-EXISTENT   |
  -----------------+----------------+----------------+
```

And we can easily cover those 3 cases only with the information about
the "${target_branch}" and the "${new_version}", where:
* case 1) if "${target_branch}" is "main" *and* "${new_version}"
  contains "alpha", do nothing
* case 2) if "${target_branch}" is "main" *and* "${new_version}"
  contains "rc":
  * change the kata-deploy & kata-cleanup tags from "latest" to
    "${new_version}".
  * delete the kata-deploy-stable & kata-cleanup-stable files.
* case 3) if the "${target_branch}" contains "stable":
  * change the kata-deploy & kata-cleanup tags from "${current_version}"
    to "${new_version}".

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
ea9b2f9c92 kata-deploy: Add "stable" info to the README
Similar to the instructions we have for the "latest" images, let's also
add instructions about the "stable" images.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
e541105680 kata-deploy: Update the README
Let's just point to our repo URLs rather than assume users using
kata-deploy will have our repo cloned.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
9acf4e5d32 kata-deploy: Add stable yaml files
This is **not** the nicest patch of my career, and I know it adds code
duplication.  However, I've decided to take this approach in order to
have easier / better instructions for users who're consuming
kata-deploy.

Having both stable & latest yaml on `main` will let us point to just one
place, without having to update the instructions.

I know, would be better to have those generated from a .in file,
wouldn't it?  For sure, but then we'd lose the ability to just point to
those files from kata-deploy pages (either on dockerhub or quay.io).

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
a86babe0d0 kata-deploy: Point to the latest release
Instead of point to a specific release number, let's point to the
`latest` tag on the main branch.

There's still some work needed in order to point to the `stable` tag on
the stable-x.y branches, as this is something that should be done
automagically as part of the release process.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
a156288c1f workflows: Add "stable" & "latest" tags to kata-deploy
When releasing a tarball, let's *also* add the "stable" & "latest" tags
to the kata-deploy image.

The "stable" tag refers to any official release, while the "latest" tag
refers to any pre-release / release candidate.

Fixes: #2302

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
32c3fb71f2 Merge pull request #2546 from fengwang666/rootless-qemu-doc
docs: documentation for running non-root VMM
2021-09-21 22:45:33 +02:00
Fabiano Fidêncio
08e55a279a Merge pull request #2573 from fidencio/wip/upload-cargo-vendored-tarball
workflows,release: Upload the vendored cargo code
2021-09-21 21:45:59 +02:00
Fabiano Fidêncio
2bee8bc6bd Merge pull request #2432 from fengwang666/qemu-rootless
runtime: run the QEMU VMM process with a non-root user
2021-09-21 21:37:02 +02:00
Feng Wang
305afc8b70 docs: documentation for running non-root VMM
Documentation for running non-root QEMU VMM in Kata runtime

Fixes: #2545

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-21 11:20:37 -07:00
Archana Shinde
1fe080fd24 threat-model: Add missing threat-model document
This was added in the 1.x repo and is missing in the 2.x repo.
Copying over the document from 1.x.
This is a starting point and focuses on the devices / interfaces
with the virtual machine, and ultimately to the container itself.

We then discuss how these devices/interfaces vary by VMM/hypervisor.

The threat model drawing is created via gdocs, located here:
https://docs.google.com/drawings/d/1dPi9DG9bcCUXlayxrR2OUa1miEZXewtW7YCt4r_VDmA/edit?usp=sharing

For Kata 2.x, the block named as `kata-runtime` has been changed to
`kata-shim`.

Fixes: #2340

Signed-off-by: Eric Ernst <eric.ernst@intel.com>
Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2021-09-21 20:20:39 +05:30
Samuel Ortiz
3a4aca4d67 Merge pull request #2671 from YchauWang/wyc-runtime-config
runtime: update .gitignore file cleare the vc shim config
2021-09-21 15:15:09 +02:00
Fabiano Fidêncio
21c8511630 workflows,release: Upload the vendored cargo code
As part of the release, let's also upload a tarball with the vendored
cargo code.  By doing this we allow distros, which usually don't have
access to the internet while performing the builds, to just add the
vendored code as a second source, making the life of the downstream
maintainers slightly easier*.

Fixes: #1203

*: The current workflow requires the downstream maintainer to download
the tarball, unpack it, run `cargo vendor`, create the tarball, etc.
Although this doesn't look like a ridiculous amount of work, it's better
if we can have it in an automated fashion.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 09:28:16 +02:00
Fabiano Fidêncio
9ea78ac386 Merge pull request #2675 from fengwang666/cgroup-bug-fix
runtime: fix empty cgroup path validation error
2021-09-21 08:48:22 +02:00
Feng Wang
9a6d56f1ab runtime: fix empty cgroup path validation error
An empty cgroup path shouldn't fail cgroup creation

Fixes #2674

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-20 13:48:09 -07:00
GabyCT
c4bafc4e68 Merge pull request #2684 from GabyCT/topic/remoteagenttest
ci: Call agent shutdown test only in the correspondent CI_JOB
2021-09-20 14:13:36 -05:00
Gabriela Cervantes
90e6388726 ci: Call agent shutdown test only in the correspondent CI_JOB
The agent shutdown test should only run on the CI JOB of CRI_CONTAINERD_K8S_MINIMAL
which is the only one where testing tracing is being enabled, however, this
test is being triggered in multiple CI jobs where it should not run. This PR
fixes that issue.

Fixes #2683

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-09-20 17:04:48 +00:00
Julio Montes
45d40179c2 Merge pull request #2655 from Jakob-Naucke/no-store-container
runtime: Remove outdated TestStoreContainer
2021-09-20 11:22:50 -05:00
Christophe de Dinechin
48fb1d9203 virtiofs: Create shared directory with 0700 mode, not 0750
A discussion on the Linux kernel mailing list [1] exposed that virtiofsd makes a
core assumption that the file systems being shared are not accessible by any
non-privileged user. We currently create the `shared` directory in the sandbox
with the default `0750` permissions, which gives read and directory traversal
access to the group. There is no real good reason for a non-root user to access
the shared directory, and this is potentially dangerous.

Fixes: #2589

[1]: https://lore.kernel.org/linux-fsdevel/YTI+k29AoeGdX13Q@redhat.com/

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-09-20 10:47:18 +02:00
Francesco Giudici
afad910d0e kata-monitor: add getSandboxFS()
Retrieve the absolute sandbox storage path. We will soon need this to
monitor the creation/deletion of new kata sandboxes.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
e38686f74d runtime: add GetSandboxesStoragePath()
The storage path we use to collect the sandbox files is defined in the
virtcontainers/persist/fs package.
We create the runtime socket in that storage path, by hardcoding the
full path in the SocketAddress() function in the runtime package.
This commit splits the hardcoded path by the socket address path so that
the runtime package will be able to provide the storage path to all the
components that may need it.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
245a12bbb7 kata-monitor: improve sandbox caching
In order to retrieve the list of sandboxes, we poll the container engine
every 15 seconds via the CRI. Once we have the list we have to inspect
each pod to find out the kata ones.
This commit extend the sandbox cache to keep track of all the pods,
marking the kata ones, so that during the next polling only the new
sandboxes should be inspected to figure out which ones are using the
kata runtime.

Fixes: #2563
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
fc067d61d4 kata-monitor: warn when unable to retrive the lower level runtime
this is an unexpected event (likely a change in how containerd/cri-o
record the lower level runtime in the pod) and should be more visible:
raise the log level to "warning".

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:54 +02:00
Francesco Giudici
53ec4df953 kata-monitor: minor fixes
fix comment and use literals

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:54 +02:00
Chelsea Mafrica
077b77c178 runtime: tracing: Fix logger passed in newContainer
Change logger in Trace call in newContainer from sandbox.Logger() to
nil. Passing nil will cause an error to be logged by kataTraceLogger
instead of the sandbox logger, which will avoid having the log message
report it as part of the sandbox subsystem when it is part of the
container subsystem.

The kataTraceLogger will not log it as related to the container
subsystem, but since the container logger has not been created at this
point, and we already use the kataTraceLogger in other instances where a
subsystem's logger has not been created yet, this PR makes the call
consistent with other code.

Fixes #2665

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-09-17 11:41:04 -07:00
Chelsea Mafrica
39cd05e0bb runtime: tracing: Use root context to stop tracing
Call StopTracing with s.rootCtx, which is the root context for tracing,
instead of s.ctx, which is parent to a subset of trace spans.

Fixes #2661

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-09-17 11:39:13 -07:00
Feng Wang
1cfe59304d runtime: Run QEMU using a non-root user/group
A random generated user/group is used to start QEMU VMM process.
The /dev/kvm group owner is also added to the QEMU process to grant it access.

Fixes #2444

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-17 11:28:44 -07:00
wangyongchao.bj
fd98373850 runtime: update .gitignore file cleare the vc shim config
update .gitignore file, remove the follow configurations:
/virtcontainers/shim/mock/cc-shim/cc-shim
/virtcontainers/shim/mock/kata-shim/kata-shim
/virtcontainers/shim/mock/shim

Fixes: #2670

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-17 15:25:28 +08:00
Hui Zhu
fff82b4ef5 Merge pull request #2628 from bergwolf/runtime-reorg
runtime: refactor commandline code directory
2021-09-17 10:37:22 +08:00
Chelsea Mafrica
6159ef3499 Merge pull request #2626 from YchauWang/wyc-vc-api02
virtcontainers: update VC HypervisorConfig API add three lost fields
2021-09-16 16:46:27 -07:00
Peng Tao
067c44d0b6 runtime: fix UT build failure
storeContainer has been removed.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 19:42:02 +08:00
Jakob Naucke
9353cd77fd runtime: Remove outdated TestStoreContainer
Due to #2332 being merged after running tests for #2604, and the latter
being merged now, a test for the now removed `storeContainer` was added.
Remove it.

Fixes: #2652
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-09-16 12:26:37 +02:00
Peng Tao
9a311a2b58 docs: fix invalid kernel dax doc url
And use a released version instead of the master branch so that it no
longer gets invalidated.

Depends-on: github.com/kata-containers/kata-containers#2645
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 17:19:18 +08:00
Peng Tao
e7c42fbc76 runtime: unify generated config
We don't need to maintain two generated config.go and even have
duplicates between them.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 17:19:18 +08:00
Peng Tao
4f7cc18622 runtime: refactor commandline code directory
Move all command line code to `cmd` and move containerd-shim-v2 to pkg.

Fixes: #2627
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 17:19:18 +08:00
Samuel Ortiz
7bf96d2457 Merge pull request #2604 from Amulyam24/container_tests
virtcontainers: add unit tests for container.go
2021-09-16 11:02:16 +02:00
Samuel Ortiz
9ed024e0bf Merge pull request #2649 from likebreath/0916/clh_hugepages
runtime: clh: Enable hugepages support
2021-09-16 10:57:34 +02:00
David Gibson
b46adbc527 Merge pull request #2428 from dgibson/simplify-mount-storage
agent: Simplify mount point creation
2021-09-16 14:43:29 +10:00
David Gibson
9d3cd9841f agent/mount: Remove unused ensure_destination_exists()
The only remaining callers of ensure_destination_exists() are in its own
unit tests.  So, just remove it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
64aa562355 agent: Correct mount point creation
mount_storage() first makes sure the mount point for the storage volume
exists.  It uses fs::create_dir_all() in the case of 9p or virtiofs volumes
otherwise ensure_destination_exists().  But.. ensure_destination_exists()
boils down to an fs::create_dir_all() in most cases anyway.  The only case
it doesn't is for a bind fstype, where it creates a file instead of a
directory.  But, that's not correct anyway because we need to create either
a file or a directory depending on the source of the bind mount, which
ensure_destination_exists() doesn't know.

The 9p/virtiofs paths also check if the mountpoint exists before calling
fs::create_dir_all(), which is unnecessary (fs::create_dir_all already
handles that case).

mount_storage() does have the information to know what we need to create,
so have it explicitly call ensure_destination_file_exists() for the bind
mount to a non-directory case, and fs::create_dir_all() in all other cases.

fixes #2390

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
08d7aebc28 agent/mount: Split out regular file case from ensure_destination_exists()
ensure_destination_exists() can create either a directory or a regular file
depending on the arguments.  This patch extracts the regular file specific
option into its own helper: ensure_destination_file_exists().  This:
 - Avoids doing some steps in the directory case (they're already handled
   by create_dir_all())
 - Enables some further future cleanups

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
9fa3beff4f agent: Remove unnecessary BareMount structure
struct Baremount contains the information necessary to make a new mount.
As a datastructure, however, it's pointless, since every user just
constructs it, immediately calls the BareMount::mount() method then
discards the structure.

Simplify the code by making this a direct function call baremount().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
49282854f1 agent: Simplify BareMount::mount by using nix::mount::mount
BareMount::mount does some complicated marshalling and uses unsafe code to
call into the mount(2) system call.  However, we're already using the nix
crate which provides a more Rust-like wrapper for mount(2).  We're even
already using nix::mount::umount and nix::mount::MsFlags from the same
module.

In the same way, we can replace the direct usage of libc::umount() with
nix::mount::umount() in one of the tests.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
bac849ecba Merge pull request #2634 from dgibson/newer-rust
versions: Allow newer Rust versions
2021-09-16 12:23:37 +10:00
Bo Chen
d00decc97d runtime: clh: Enable hugepages support
This patch adds the configuration option that allows to use hugepages
with Cloud Hypervisor guests.

Fixes: #2648

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-15 10:43:57 -07:00
GabyCT
2a26c2397d Merge pull request #2645 from dgibson/query-cpus
runtime/qemu: Move from query-cpus to query-cpus-fast
2021-09-15 10:35:03 -05:00
David Gibson
64bb803fcf runtime/qemu: Move from query-cpus to query-cpus-fast
We recently updated to using qemu-6.1 (from qemu 5.2).  Unfortunately one
breaking change in qemu 6.0 wasn't caught by the CI.

The query-cpus QMP command has been removed, replaced by query-cpus-fast
(which has been available since qemu 2.12).  govmm already had support for
query-cpus-fast, we just weren't using it, so the change is quite easy.

fixes #2643

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-15 16:41:26 +10:00
David Gibson
e7deee948a Merge pull request #2502 from dgibson/qemu-6.1
Update Kata to use qemu-6.1
2021-09-15 11:06:14 +10:00
David Gibson
25ac3524c9 versions: Allow newer Rust versions
Rust 1.47.0 which is the latest we note as tested in versions.yaml is now
getting fairly old - many current distros have newer versions (e.g.
Rust 1.54.0 in Fedora 34).  Bring this more up to date.

Note that this is only updating the 'newest-version', not the minimum
required version.

The new version changes the name of the 'clippy::unknown_clipp_lints'
option to simply 'unknown_lints' so we need to change that as well to avoid
warnings.

fixes #2633

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-15 08:58:28 +10:00
Eric Ernst
e4cb6cbfbb Merge pull request #2332 from sameo/topic/host-cgroups
Host cgroups improvements and simplifications
2021-09-14 09:09:10 -07:00
David Gibson
851d5f8613 tests: Correct heading in static checks test
The github static checks action has a section heading called "Building
rust".  It doesn't actually build rust, though, just installs it with
rustup.  Correct the misleading message.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-14 20:17:27 +10:00
Tim Zhang
842c76cb40 Merge pull request #2359 from teawater/swap_doc
Add doc for guest swap
2021-09-14 15:54:10 +08:00
zhanghj
d789b42937 package: assign proper value to redefined_string
Fixes: #2624

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-09-14 14:38:36 +08:00
Samuel Ortiz
4b7e4a4c70 runtime: Vendoring update
Due to the libcontainer dependencies removal.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
8d9d6e6af0 docs: Host cgroups documentation update
Update according to the new sandbox/overhead cgroup split.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
9bed2ade0f virtcontainers: Convert to the new cgroups package API
The new API is based on containerd's cgroups package.
With that conversion we can simpligy the virtcontainers sandbox code and
also uniformize our cgroups external API dependency. We now only depend
on containerd/cgroups for everything cgroups related.

Depends-on: github.com/kata-containers/tests#3805
Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
b42ed39349 virtcontainers: cgroups: Add a containerd API based cgroups package
Eventually, we will convert the virtcontainers and the whole Kata
runtime code base to only rely on that package.

This will make Kata only depends on the simpler containerd cgroups API.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
f17752b0dc virtcontainers: container: Do not create and manage container host cgroups
The only process we are adding there is the container host one, and
there is no such thing anymore.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:33 +02:00
Samuel Ortiz
dc7e9bce73 virtcontainers: sandbox: Host cgroups partitioning
This is a simplification of the host cgroup handling by partitioning the
host cgroups into 2: A sandbox cgroup and an overhead cgroup.

The sandbox cgroup is always created and initialized. The overhead
cgroup is only available when sandbox_cgroup_only is unset, and is
unconstrained on all controllers. The goal of having an overhead cgroup
is to be more flexible on how we manage a pod overhead. Having such
cgroup will allow for setting a fixed overhead per pod, for a subset of
controllers, while at the same time not having the pod being accounted
for those resources.

When sandbox_cgroup_only is not set, we move all non vCPU threads
to the overhead cgroup and let them run unconstrained. When it is set,
all pod related processes and threads will run in the sandbox cgroup.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:29 +02:00
Samuel Ortiz
f811026c77 virtcontainers: Unconditionally create the sandbox cgroup manager
Regardless of the sandbox_cgroup_only setting, we create the sandbox
cgroup manager and set the sandbox cgroup path at the same time.

Without doing this, the hypervisor constraint routine is mostly a NOP as
the sandbox state cgroup path is not initialized.

Fixes #2184

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:05:57 +02:00
wangyongchao.bj
a6066404f7 virtcontainers: update VC HypervisorConfig API add three lost fields
Sync the virtcontainers api.md document, add `ConfidentialGuest` `EntropySourceList` `GuestSwap` three
 fields to the HypervisorConfig API.

Fixes #2625

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-14 10:42:54 +08:00
wangyongchao.bj
bb18cd475c virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
sync the virtcontainers api.md document, add SandboxBindMounts field to the SandboxConfig API.
And update the order of the SandboxConfig API fields.

Fixes #2621

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-14 09:56:47 +08:00
Eric Ernst
967db0cbcc Merge pull request #2544 from likebreath/0831/upgrade_clh_v18.0
versions: Upgrade to Cloud Hypervisor v18.0
2021-09-13 11:27:45 -07:00
Fabiano Fidêncio
9381f23ccf Merge pull request #2613 from sameo/topic/runtime-readme
runtime: Fix README link
2021-09-13 17:44:56 +02:00
Binbin Zhang
58e77a3c13 sandbox: Allow the device to be accessed,such as /dev/null and /dev/urandom
If the device has no permission, such as /dev/null, /dev/urandom,
it needs to be added into cgroup.

Fixes: #2615

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-13 20:47:16 +08:00
Samuel Ortiz
057eb80ac9 Merge pull request #2596 from jongwu/qemu_mak
qemu: remove default config for arm64.
2021-09-13 11:23:35 +02:00
Samuel Ortiz
75ef8c243a Merge pull request #2603 from Bevisy/main-2539
sandbox: Add device permissions such as /dev/null to cgroup
2021-09-13 11:04:51 +02:00
Samuel Ortiz
62a1a6f827 Merge pull request #2593 from nubificus/fix_fc_vcpu_thread
virtcontainers: fc: parse vcpuID correctly
2021-09-13 09:23:53 +02:00
Hui Zhu
d67a414b2b src/runtime/README.md: Fix URL of Licence
Fix URL of Licence of src/runtime/README.md.

Fixes: #2326

Signed-off-by: Hui Zhu <teawater@gmail.com>
2021-09-13 09:11:42 +08:00
Samuel Ortiz
13b8bb0c74 runtime: Fix README link
The LICENSE file lives in the project's root.

Fixes #2612

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-09-11 09:44:40 +02:00
David Gibson
25670d3058 packaging/qemu: Update qemu-exerimental version to v6.1.0
This brings it back into line with the normal qemu version.  We refer to
v6.1.0 by full SHA in versions.yaml, rather than the tag, so that
apply_patches.sh sees it as different and applies the virtiofs DAX patches
which is what the experimental version is actually about having.

The virtiofs DAX patches themselves are updated to the version from
https://gitlab.com/virtio-fs/qemu, virtio-fs-dev branch as of commit
3620cb0a.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-11 16:43:26 +10:00
David Gibson
041a513f80 versions: Update qemu to v6.1.0
We need qemu-6.1 for ACPI PCI hotplug support for the q35 machine.  At the
moment qemu will use SHPC hotplug under the PCIe to PCI bridge on q35.
SHPC is too slow to use for our purposes (it requires a 5s delay).

Update the qemu version to v6.1.0.  This leaves the experimental version
*older* than the normal version, but we'll fix that up later.

We also need to tweak the snapcraft.yaml, since the location for configs
has changed in the new qemu version.

fixes #1691

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-11 16:43:22 +10:00
Anastassios Nanos
62baa48ef5 virtcontainers: fc: parse vcpuID correctly
In getThreadIDs(), the cpuID variable is derived from a string that
already contains a whitespace. As a result, strings.SplitAfter returns
the cpuID with a leading space. This makes any go variant of string to int
fail (strconv.ParseInt() in our case). This patch makes sure that the
leading space character is removed so the string passed to
strconv.ParseInt() is "CPUID" and not " CPUID".

This has been caused by a change in the naming scheme of vcpu threads
for Firecracker after v0.19.1.

Fixes: #2592

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2021-09-10 09:39:56 +00:00
David Gibson
81de2d476b packaging: Correct error message in apply_patches.sh
If the script doesn't find a patches directory it expects, it gives an
error saying to create a dummy 'no_patches' file if you really don't want
any patches applied for that version.

But actual practice in the tree is to call the dummy file 'no_patches.txt'
rather than simply 'no_patches'.  Correct the message to match existing
practice.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-10 11:19:10 +10:00
Bo Chen
f785ff0bf2 virtcontainers: clh: Revert the workaround incorrect default values
Given the fix to the bugs of the openapi spec file is included in the
Cloud Hypervisor v18.0 [1], this patch reverts the workaround we carried
in the CLH driver.

This reverts commit 932ee41b3f.

[1] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/3029

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-09 14:52:53 -07:00
Bo Chen
0e0e59dc5f virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor v18.0.
Note: The client code of cloud-hypervisor's (CLH) OpenAPI is
automatically generated by openapi-generator [1-2].

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-09 14:51:55 -07:00
Bo Chen
f0b5331430 versions: Upgrade to Cloud Hypervisor v18.0
Highlights from the Cloud Hypervisor release v18.0: 1) Experimental User
Device (vfio-user) support; 2) Migration support for vhost-user devices;
3) VHDX disk image support; 4) Device pass through on MSHV hypervisor;
5) AArch64 for support virtio-mem; 6) Live migration on MSHV hypervisor;
7) AArch64 CPU topology support; 8) Power button support on AArch64; 9)
Various bug fixes on PTY, TTY, signal handling, and live-migration on
AArch64.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v18.0

Fixes: #2543

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-09 14:44:04 -07:00
Carlos Venegas
230eae3ff3 Merge pull request #2417 from jcvenegas/docker-build-fixes
kata-tarball: Build and test fixes
2021-09-09 14:14:26 -05:00
Carlos Venegas
116521367e actions: test make kata-tarball
make kata-tarball is the main way to
build a kata in a single host. Lets
test it to make sure it works on every PR.

Fixes: #2416

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
626d659fd9 actions: kata-deploy on PRs and use makefile
- Run kata-deploy tarball generation action on every PR.

- Use kata-deploy makefile targets.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
78d99f5129 kata-deploy: Make verbose single builds
If a binary tarball for a single component is done,
the logs will be shown in stdout.

e.g.

make kernel-tarball

To build all a the same time still store logs in files.

make kata-tarball

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
59486b855a kata-deploy: Add tarball suffix to makefile targets
Now that local-build kata-deploy makefile is inlucded in toplevel
makefile, lets use the suffix `-tarball` to avoid name collitions
and identify the tarball releted targets.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
96e1246bce makefile: Include kata-deploy targets
Use kata-deploy targets from toplevel.
This will help if want to build and
reinstall just one single kata component.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Fupan Li
a2db68e347 Merge pull request #2565 from liubin/fix/2446-run-tests-by-root
test: enable running tests under root user
2021-09-09 17:45:35 +08:00
Hui Zhu
74d645cd21 how-to: Add how-to-setup-swap-devices-in-guest-kernel.md
Add how-to-setup-swap-devices-in-guest-kernel.md to how-to to introduce
how to setup swap device in guest kernel.

Fixes: #2326

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-09 17:24:24 +08:00
Fabiano Fidêncio
f28e6e506f Merge pull request #2585 from Bevisy/main-2584
osbuilder: Change to "=" operator to make script more portable
2021-09-09 10:53:47 +02:00
Amulyam24
d865c80986 virtcontainers: add unit tests for container.go
Fixes: #268

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-09-09 13:09:38 +05:30
Binbin Zhang
71f915c63f sandbox: Add device permissions such as /dev/null to cgroup
adds the default devices for unix such as /dev/null, /dev/urandom to
the container's resource cgroup spec

Fixes: #2539

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-09 15:33:24 +08:00
Hui Zhu
2174fee48d docs: Add swap annotations introduction
Add swap annotations introduction of
`io.katacontainers.config.hypervisor.enable_guest_swap`,
`io.katacontainers.container.resource.swappiness"` and
`io.katacontainers.container.resource.swap_in_bytes"` to
how-to-set-sandbox-config-kata.md.

Fixes: #2326

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-09 15:28:51 +08:00
bin
2abc450a4d test: enable running tests under root user
Add tests that run under root user to test special cases.

Fixes: #2446

Signed-off-by: bin <bin@hyper.sh>
2021-09-09 14:21:34 +08:00
Binbin Zhang
924a68d08d osbuilder: Change to "=" operator to make script more portable
zsh doesn't support "==" as equal comparison operator, so
replace "==" with "=" to make the script more portable

Fixes: #2584

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-08 16:57:34 +08:00
Jianyong Wu
1fff9be707 qemu: remove default config for arm64.
The current default config in qemu for arm64 doesn't suit for qemu
version 5.1+, so remove them here.

Fixes: #2595
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-08 16:25:22 +08:00
Julio Montes
9bbaa66f39 Merge pull request #2480 from Bevisy/main
makefile: Fix error exit status code
2021-09-06 07:28:15 -05:00
Binbin Zhang
4d7ddffe6f utils: kata-manager: Update kata-manager.sh for new containerd config
update script for new containerd config

Fixes: #2188

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-05 17:15:15 +08:00
Binbin Zhang
f5172d1c36 cli: Fix outdated kata-runtime bash completion
adapt to the latest kata-runtime version

Fixes: #2254

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-04 22:26:44 +08:00
Bin Liu
3c9170ea0d Merge pull request #2561 from rapiz1/patch-4
osbuilder: fix inconsistent calculation of fs size
2021-09-04 15:14:07 +08:00
Bin Liu
103fdd3f6c Merge pull request #2564 from Bevisy/main-2296
virtcontainers: Remove NewStoreFeature
2021-09-03 10:41:21 +08:00
James O. D. Hunt
c64867ad34 Merge pull request #2559 from dgibson/snapbranch
snap: Test variable instead of executing "branch"
2021-09-02 15:42:28 +01:00
James O. D. Hunt
f3a1bf3b45 Merge pull request #2552 from bergwolf/license
license: drop redundent license files
2021-09-02 14:31:18 +01:00
Binbin Zhang
e2a9e78c9e virtcontainers: Remove NewStoreFeature
remove NewStoreFeature

Fixes: #2296

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-02 21:28:36 +08:00
Yujia Qiao
bfcee91164 osbuilder: fix inconsistent calculation of fs size
This patch fixes inconsistent calculations of the rootfs size.
For `du` and `df`, `-B 1MB` is different from `-BM`. The
former is the power of 1000, and the latter is the power of
1024. So comparing them doesn't make sense. The bug may result
in a larger image than needed.

Fixes: #2560

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-09-02 16:00:29 +08:00
David Gibson
4996f9b7da snap: Test variable instead of executing "branch"
In snapcraft.yaml we have a case statement on $(branch) - that is on the
output of executing a command "branch".  From the selections it appears
that what it actually wants is to simply select on the contents of the
$branch variable, which should be ${branch} instead.

fixes #2558

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-02 14:38:15 +10:00
Julio Montes
716ab32acf Merge pull request #2551 from teawater/fix_swap
Fix swap fail insert fail issue
2021-09-01 08:25:27 -05:00
Peng Tao
256c3b2747 license: drop redundent license files
There is no need to keep multiple copies of the license file in
different directory. We can just use the top level one for the project.

Fixes: #2553
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-01 15:10:04 +08:00
Hui Zhu
bcc9fa3b35 hotplugAddBlockDevice: Use ExecuteBlockdevAddWithDriverCache with swap
Use ExecuteBlockdevAddWithDriverCache with swap in
hotplugAddBlockDevice to handle swap file cannot work OK with
ExecuteBlockdevAddWithCache issue.

Fixes: #2548

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-01 14:13:11 +08:00
Hui Zhu
bd85da0461 vendor: Update vendor/github.com/kata-containers/govmm
Update vendor/github.com/kata-containers/govmm for
ExecuteBlockdevAddWithDriverCache.

Fixes: #2548

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-01 13:59:19 +08:00
Binbin Zhang
4751698829 virtcontainers: Fix incorrect scripts path
modify to the correct relative path

Fixes: #2515

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-27 19:16:53 +00:00
Yujia Qiao
814cea9601 virtcontainers: clean up useless code
Fixes: #2275

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-08-24 16:04:34 +08:00
Binbin Zhang
d422789fac makefile: Fix error exit status code
Generate `config-generated.go` file under src/runtime/cli/containerd-shim-kata-v2 before excuting test or coverage.

Fixes #2479

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-23 11:31:33 +08:00
Fabiano Fidêncio
d45c86de29 versions: Update CRI-O to its 1.22 release
As kubernetes version has been bumped to 1.22, let's bump the CRI-O
version accordingly.

Related: #2434

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-08-12 11:39:17 +02:00
Fabiano Fidêncio
c4a642636b versions: Update k8s & critools to v1.22
Let's test our `main` branch against the latest version of k8s.  In
order to do the bump, let's also update critools version accordingly.

Depends-on: github.com/kata-containers/tests#3818

Fixes: #2433

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-08-12 11:38:37 +02:00
Christophe de Dinechin
881b996443 agent: Make wording of error message match CRI-O test suite
The CRI-O integration test suite has two tests that fail because they search for
"not found" in the error message, but we emit "is not exist".

Change the error message to match the expectations of the test suite.

Fixes: #2036

Reported-by: Julien Ropé <jrope@redhat.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-08-04 09:33:09 +02:00
861 changed files with 38726 additions and 45668 deletions

View File

@@ -1,6 +1,6 @@
name: kata-deploy-build
name: kata deploy build
on: push
on: [push, pull_request]
jobs:
build-asset:
@@ -9,6 +9,7 @@ jobs:
matrix:
asset:
- kernel
- kernel-experimental
- shim-v2
- qemu
- cloud-hypervisor
@@ -24,7 +25,7 @@ jobs:
- name: Build ${{ matrix.asset }}
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh --build="${KATA_ASSET}"
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
sudo cp -r --preserve=all "${build_dir}" "kata-build"
@@ -47,12 +48,21 @@ jobs:
uses: actions/download-artifact@v2
with:
name: kata-artifacts
path: kata-artifacts
path: build
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
make merge-builds
- name: store-artifacts
uses: actions/upload-artifact@v2
with:
name: kata-static-tarball
path: kata-static.tar.xz
make-kata-tarball:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: make kata-tarball
run: |
make kata-tarball
sudo make install-tarball

View File

@@ -5,60 +5,121 @@ on:
name: test-kata-deploy
jobs:
check_comments:
if: ${{ github.event.issue.pull_request }}
check-comment-and-membership:
runs-on: ubuntu-latest
if: |
github.event.issue.pull_request
&& github.event_name == 'issue_comment'
&& github.event.action == 'created'
&& startsWith(github.event.comment.body, '/test_kata_deploy')
steps:
- name: Check for Command
id: command
uses: kata-containers/slash-command-action@v1
- name: Check membership
uses: kata-containers/is-organization-member@1.0.1
id: is_organization_member
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
command: "test_kata_deploy"
reaction: "true"
reaction-type: "eyes"
allow-edits: "false"
permission-level: admin
- name: verify command arg is kata-deploy
organization: kata-containers
username: ${{ github.event.comment.user.login }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Fail if not member
run: |
echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
result=${{ steps.is_organization_member.outputs.result }}
if [ $result == false ]; then
user=${{ github.event.comment.user.login }}
echo Either ${user} is not part of the kata-containers organization
echo or ${user} has its Organization Visibility set to Private at
echo https://github.com/orgs/kata-containers/people?query=${user}
echo
echo Ensure you change your Organization Visibility to Public and
echo trigger the test again.
exit 1
fi
create-and-test-container:
needs: check_comments
build-asset:
runs-on: ubuntu-latest
needs: check-comment-and-membership
strategy:
matrix:
asset:
- cloud-hypervisor
- firecracker
- kernel
- qemu
- rootfs-image
- rootfs-initrd
- shim-v2
steps:
- name: get-PR-ref
id: get-PR-ref
- uses: actions/checkout@v2
- name: Install docker
run: |
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
echo "reference for PR: " ${ref}
echo "##[set-output name=pr-ref;]${ref}"
curl -fsSL https://test.docker.com -o test-docker.sh
sh test-docker.sh
- name: check out
uses: actions/checkout@v2
with:
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
- name: build-container-image
id: build-container-image
- name: Build ${{ matrix.asset }}
run: |
PR_SHA=$(git log --format=format:%H -n1)
VERSION="2.0.0"
ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
wget "${ARTIFACT_URL}" -O tools/packaging/kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} -t quay.io/kata-containers/kata-deploy-ci:${PR_SHA} ./tools/packaging/kata-deploy
docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
docker push katadocker/kata-deploy-ci:$PR_SHA
docker login -u ${{ secrets.QUAY_DEPLOYER_USERNAME }} -p ${{ secrets.QUAY_DEPLOYER_PASSWORD }} quay.io
docker push quay.io/kata-containers/kata-deploy-ci:$PR_SHA
echo "##[set-output name=pr-sha;]${PR_SHA}"
- name: test-kata-deploy-ci-in-aks
uses: ./tools/packaging/kata-deploy/action
with:
packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
sudo cp -r "${build_dir}" "kata-build"
env:
PKG_SHA: ${{ steps.build-container-image.outputs.pr-sha }}
KATA_ASSET: ${{ matrix.asset }}
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
if-no-files-found: error
create-kata-tarball:
runs-on: ubuntu-latest
needs: build-asset
steps:
- uses: actions/checkout@v2
- name: get-artifacts
uses: actions/download-artifact@v2
with:
name: kata-artifacts
path: kata-artifacts
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
- name: store-artifacts
uses: actions/upload-artifact@v2
with:
name: kata-static-tarball
path: kata-static.tar.xz
kata-deploy:
needs: create-kata-tarball
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: get-kata-tarball
uses: actions/download-artifact@v2
with:
name: kata-static-tarball
- name: build-and-push-kata-deploy-ci
id: build-and-push-kata-deploy-ci
run: |
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
pushd $GITHUB_WORKSPACE
git checkout $tag
pkg_sha=$(git rev-parse HEAD)
popd
mv kata-static.tar.xz $GITHUB_WORKSPACE/tools/packaging/kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t quay.io/kata-containers/kata-deploy-ci:$pkg_sha $GITHUB_WORKSPACE/tools/packaging/kata-deploy
docker login -u ${{ secrets.QUAY_DEPLOYER_USERNAME }} -p ${{ secrets.QUAY_DEPLOYER_PASSWORD }} quay.io
docker push quay.io/kata-containers/kata-deploy-ci:$pkg_sha
mkdir -p packaging/kata-deploy
ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action
echo "::set-output name=PKG_SHA::${pkg_sha}"
- name: test-kata-deploy-ci-in-aks
uses: ./packaging/kata-deploy/action
with:
packaging-sha: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}}
env:
PKG_SHA: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}

View File

@@ -1,295 +0,0 @@
name: Publish release tarball
on:
push:
tags:
- '1.*'
jobs:
get-artifact-list:
runs-on: ubuntu-latest
steps:
- name: get the list
run: |
pushd $GITHUB_WORKSPACE
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
git checkout $tag
popd
$GITHUB_WORKSPACE/tools/packaging/artifact-list.sh > artifact-list.txt
- name: save-artifact-list
uses: actions/upload-artifact@master
with:
name: artifact-list
path: artifact-list.txt
build-kernel:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_kernel"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- run: |
sudo apt-get update && sudo apt install -y flex bison libelf-dev bc iptables
- name: build-kernel
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-kernel.tar.gz
build-experimental-kernel:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_experimental_kernel"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- run: |
sudo apt-get update && sudo apt install -y flex bison libelf-dev bc iptables
- name: build-experimental-kernel
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-experimental-kernel.tar.gz
build-qemu:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_qemu"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- name: build-qemu
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-qemu.tar.gz
# Job for building the image
build-image:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_image"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- name: build-image
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-image.tar.gz
# Job for building firecracker hypervisor
build-firecracker:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_firecracker"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- name: build-firecracker
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-firecracker.tar.gz
# Job for building cloud-hypervisor
build-clh:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_clh"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- name: build-clh
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-clh.tar.gz
# Job for building kata components
build-kata-components:
runs-on: ubuntu-16.04
needs: get-artifact-list
env:
buildstr: "install_kata_components"
steps:
- uses: actions/checkout@v1
- name: get-artifact-list
uses: actions/download-artifact@master
with:
name: artifact-list
- name: build-kata-components
run: |
if grep -q $buildstr ./artifact-list/artifact-list.txt; then
$GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
echo "artifact-built=true" >> $GITHUB_ENV
else
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
path: kata-static-kata-components.tar.gz
gather-artifacts:
runs-on: ubuntu-16.04
needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
steps:
- uses: actions/checkout@v1
- name: get-artifacts
uses: actions/download-artifact@master
with:
name: kata-artifacts
- name: colate-artifacts
run: |
$GITHUB_WORKSPACE/.github/workflows/gather-artifacts.sh
- name: store-artifacts
uses: actions/upload-artifact@master
with:
name: release-candidate
path: kata-static.tar.xz
kata-deploy:
needs: gather-artifacts
runs-on: ubuntu-latest
steps:
- name: get-artifacts
uses: actions/download-artifact@master
with:
name: release-candidate
- name: build-and-push-kata-deploy-ci
id: build-and-push-kata-deploy-ci
run: |
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
git clone https://github.com/kata-containers/packaging
pushd packaging
git checkout $tag
pkg_sha=$(git rev-parse HEAD)
popd
mv release-candidate/kata-static.tar.xz ./packaging/kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:$pkg_sha -t quay.io/kata-containers/kata-deploy-ci:$pkg_sha ./packaging/kata-deploy
docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
docker push katadocker/kata-deploy-ci:$pkg_sha
docker login -u ${{ secrets.QUAY_DEPLOYER_USERNAME }} -p ${{ secrets.QUAY_DEPLOYER_PASSWORD }} quay.io
docker push quay.io/kata-containers/kata-deploy-ci:$pkg_sha
echo "::set-output name=PKG_SHA::${pkg_sha}"
- name: test-kata-deploy-ci-in-aks
uses: ./packaging/kata-deploy/action
with:
packaging-sha: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}}
env:
PKG_SHA: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
- name: push-tarball
run: |
# tag the container image we created and push to DockerHub
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
docker tag katadocker/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} katadocker/kata-deploy:${tag}
docker push katadocker/kata-deploy:${tag}
upload-static-tarball:
needs: kata-deploy
runs-on: ubuntu-latest
steps:
- name: download-artifacts
uses: actions/download-artifact@master
with:
name: release-candidate
- name: install hub
run: |
HUB_VER=$(curl -s "https://api.github.com/repos/github/hub/releases/latest" | jq -r .tag_name | sed 's/^v//')
wget -q -O- https://github.com/github/hub/releases/download/v$HUB_VER/hub-linux-amd64-$HUB_VER.tgz | \
tar xz --strip-components=2 --wildcards '*/bin/hub' && sudo mv hub /usr/local/bin/hub
- name: push static tarball to github
run: |
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
tarball="kata-static-$tag-x86_64.tar.xz"
repo="https://github.com/kata-containers/runtime.git"
mv release-candidate/kata-static.tar.xz "release-candidate/${tarball}"
git clone "${repo}"
cd runtime
echo "uploading asset '${tarball}' to '${repo}' tag: ${tag}"
GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "../release-candidate/${tarball}" "${tag}"

View File

@@ -100,10 +100,14 @@ jobs:
run: |
# tag the container image we created and push to DockerHub
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
docker tag katadocker/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} katadocker/kata-deploy:${tag}
docker tag quay.io/kata-containers/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} quay.io/kata-containers/kata-deploy:${tag}
docker push katadocker/kata-deploy:${tag}
docker push quay.io/kata-containers/kata-deploy:${tag}
tags=($tag)
tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable"))
for tag in ${tags[@]}; do \
docker tag katadocker/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} katadocker/kata-deploy:${tag} && \
docker tag quay.io/kata-containers/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} quay.io/kata-containers/kata-deploy:${tag} && \
docker push katadocker/kata-deploy:${tag} && \
docker push quay.io/kata-containers/kata-deploy:${tag}; \
done
upload-static-tarball:
needs: kata-deploy
@@ -127,3 +131,49 @@ jobs:
pushd $GITHUB_WORKSPACE
echo "uploading asset '${tarball}' for tag: ${tag}"
GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}"
popd
upload-cargo-vendored-tarball:
needs: upload-static-tarball
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: generate-and-upload-tarball
run: |
pushd $GITHUB_WORKSPACE/src/agent
cargo vendor >> .cargo/config
popd
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
tarball="kata-containers-$tag-vendor.tar.gz"
pushd $GITHUB_WORKSPACE
tar -cvzf "${tarball}" src/agent/.cargo/config src/agent/vendor
GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}"
popd
upload-libseccomp-tarball:
needs: upload-cargo-vendored-tarball
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: download-and-upload-tarball
env:
GITHUB_TOKEN: ${{ secrets.GIT_UPLOAD_TOKEN }}
GOPATH: ${HOME}/go
run: |
pushd $GITHUB_WORKSPACE
./ci/install_yq.sh
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
versions_yaml="versions.yaml"
version=$(${GOPATH}/bin/yq read ${versions_yaml} "externals.libseccomp.version")
repo_url=$(${GOPATH}/bin/yq read ${versions_yaml} "externals.libseccomp.url")
download_url="${repo_url}/releases/download/v${version}"
tarball="libseccomp-${version}.tar.gz"
asc="${tarball}.asc"
curl -sSLO "${download_url}/${tarball}"
curl -sSLO "${download_url}/${asc}"
# "-m" option should be empty to re-use the existing release title
# without opening a text editor.
# For the details, check https://hub.github.com/hub-release.1.html.
hub release edit -m "" -a "${tarball}" "${tag}"
hub release edit -m "" -a "${asc}" "${tag}"
popd

View File

@@ -12,8 +12,7 @@ on:
- reopened
- labeled
- unlabeled
pull_request:
branches:
branches:
- main
jobs:
@@ -32,8 +31,6 @@ jobs:
- name: Checkout code to allow hub to communicate with the project
uses: actions/checkout@v2
with:
token: ${{ secrets.KATA_GITHUB_ACTIONS_TOKEN }}
- name: Install porting checker script
run: |

View File

@@ -13,7 +13,7 @@ jobs:
test:
strategy:
matrix:
go-version: [1.15.x, 1.16.x]
go-version: [1.16.x, 1.17.x]
os: [ubuntu-20.04]
runs-on: ${{ matrix.os }}
env:
@@ -60,13 +60,21 @@ jobs:
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
env:
GOPATH: ${{ runner.workspace }}/kata-containers
- name: Building rust
- name: Installing rust
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh
PATH=$PATH:"$HOME/.cargo/bin"
rustup target add x86_64-unknown-linux-musl
rustup component add rustfmt clippy
- name: Setup seccomp
run: |
libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX)
gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX)
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}"
echo "Set environment variables for the libseccomp crate to link the libseccomp library statically"
echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV
echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV
# Check whether the vendored code is up-to-date & working as the first thing
- name: Check vendored code
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
@@ -84,3 +92,7 @@ jobs:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && make test
- name: Run Unit Tests As Root User
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && sudo -E PATH="$PATH" make test

View File

@@ -18,6 +18,7 @@ TOOLS += agent-ctl
STANDARD_TARGETS = build check clean install test vendor
include utils.mk
include ./tools/packaging/kata-deploy/local-build/Makefile
all: build
@@ -33,10 +34,4 @@ generate-protocols:
static-checks: build
bash ci/static-checks.sh
binary-tarball:
make -f ./tools/packaging/kata-deploy/local-build/Makefile
install-binary-tarball:
make -f ./tools/packaging/kata-deploy/local-build/Makefile install
.PHONY: all default static-checks binary-tarball install-binary-tarball

View File

@@ -1 +1 @@
2.3.0-alpha0
2.3.1

View File

@@ -1,30 +0,0 @@
#!/bin/bash
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Check there are no os.Exit() calls creeping into the code
# We don't use that exit path in the Kata codebase.
# Allow the path to check to be over-ridden.
# Default to the current directory.
go_packages=${1:-.}
echo "Checking for no os.Exit() calls for package [${go_packages}]"
candidates=`go list -f '{{.Dir}}/*.go' $go_packages`
for f in $candidates; do
filename=`basename $f`
# skip all go test files
[[ $filename == *_test.go ]] && continue
# skip exit.go where, the only file we should call os.Exit() from.
[[ $filename == "exit.go" ]] && continue
files="$f $files"
done
[ -z "$files" ] && echo "No files to check, skipping" && exit 0
if egrep -n '\<os\.Exit\>' $files; then
echo "Direct calls to os.Exit() are forbidden, please use exit() so atexit() works"
exit 1
fi

109
ci/install_libseccomp.sh Executable file
View File

@@ -0,0 +1,109 @@
#!/bin/bash
#
# Copyright 2021 Sony Group Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -o errexit
cidir=$(dirname "$0")
source "${cidir}/lib.sh"
clone_tests_repo
source "${tests_repo_dir}/.ci/lib.sh"
# The following variables if set on the environment will change the behavior
# of gperf and libseccomp configure scripts, that may lead this script to
# fail. So let's ensure they are unset here.
unset PREFIX DESTDIR
arch=$(uname -m)
workdir="$(mktemp -d --tmpdir build-libseccomp.XXXXX)"
# Variables for libseccomp
# Currently, specify the libseccomp version directly without using `versions.yaml`
# because the current Snap workflow is incomplete.
# After solving the issue, replace this code by using the `versions.yaml`.
# libseccomp_version=$(get_version "externals.libseccomp.version")
# libseccomp_url=$(get_version "externals.libseccomp.url")
libseccomp_version="2.5.1"
libseccomp_url="https://github.com/seccomp/libseccomp"
libseccomp_tarball="libseccomp-${libseccomp_version}.tar.gz"
libseccomp_tarball_url="${libseccomp_url}/releases/download/v${libseccomp_version}/${libseccomp_tarball}"
cflags="-O2"
# Variables for gperf
# Currently, specify the gperf version directly without using `versions.yaml`
# because the current Snap workflow is incomplete.
# After solving the issue, replace this code by using the `versions.yaml`.
# gperf_version=$(get_version "externals.gperf.version")
# gperf_url=$(get_version "externals.gperf.url")
gperf_version="3.1"
gperf_url="https://ftp.gnu.org/gnu/gperf"
gperf_tarball="gperf-${gperf_version}.tar.gz"
gperf_tarball_url="${gperf_url}/${gperf_tarball}"
# We need to build the libseccomp library from sources to create a static library for the musl libc.
# However, ppc64le and s390x have no musl targets in Rust. Hence, we do not set cflags for the musl libc.
if ([ "${arch}" != "ppc64le" ] && [ "${arch}" != "s390x" ]); then
# Set FORTIFY_SOURCE=1 because the musl-libc does not have some functions about FORTIFY_SOURCE=2
cflags="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 -O2"
fi
die() {
msg="$*"
echo "[Error] ${msg}" >&2
exit 1
}
finish() {
rm -rf "${workdir}"
}
trap finish EXIT
build_and_install_gperf() {
echo "Build and install gperf version ${gperf_version}"
mkdir -p "${gperf_install_dir}"
curl -sLO "${gperf_tarball_url}"
tar -xf "${gperf_tarball}"
pushd "gperf-${gperf_version}"
./configure --prefix="${gperf_install_dir}"
make
make install
export PATH=$PATH:"${gperf_install_dir}"/bin
popd
echo "Gperf installed successfully"
}
build_and_install_libseccomp() {
echo "Build and install libseccomp version ${libseccomp_version}"
mkdir -p "${libseccomp_install_dir}"
curl -sLO "${libseccomp_tarball_url}"
tar -xf "${libseccomp_tarball}"
pushd "libseccomp-${libseccomp_version}"
./configure --prefix="${libseccomp_install_dir}" CFLAGS="${cflags}" --enable-static
make
make install
popd
echo "Libseccomp installed successfully"
}
main() {
local libseccomp_install_dir="${1:-}"
local gperf_install_dir="${2:-}"
if [ -z "${libseccomp_install_dir}" ] || [ -z "${gperf_install_dir}" ]; then
die "Usage: ${0} <libseccomp-install-dir> <gperf-install-dir>"
fi
pushd "$workdir"
# gperf is required for building the libseccomp.
build_and_install_gperf
build_and_install_libseccomp
popd
}
main "$@"

View File

@@ -12,5 +12,5 @@ source "${cidir}/lib.sh"
clone_tests_repo
pushd ${tests_repo_dir}
.ci/install_rust.sh
.ci/install_rust.sh ${1:-}
popd

View File

@@ -6,4 +6,9 @@
#
FROM registry.centos.org/centos:8
RUN yum -y update && yum -y install git sudo wget
RUN yum -y update && \
yum -y install \
git \
sudo \
wget && \
yum clean all

View File

@@ -8,11 +8,14 @@
set -e
cidir=$(dirname "$0")
source "${cidir}/lib.sh"
export CI_JOB="${CI_JOB:-}"
clone_tests_repo
pushd ${tests_repo_dir}
.ci/run.sh
# temporary fix, see https://github.com/kata-containers/tests/issues/3878
[ "$(uname -m)" != "s390x" ] && tracing/test-agent-shutdown.sh
if [ "$(uname -m)" != "s390x" ] && [ "$CI_JOB" == "CRI_CONTAINERD_K8S_MINIMAL" ]; then
tracing/test-agent-shutdown.sh
fi
popd

View File

@@ -86,6 +86,16 @@ One of the `initrd` and `image` options in Kata runtime config file **MUST** be
The main difference between the options is that the size of `initrd`(10MB+) is significantly smaller than
rootfs `image`(100MB+).
## Enable seccomp
Enable seccomp as follows:
```
$ sudo sed -i '/^disable_guest_seccomp/ s/true/false/' /etc/kata-containers/configuration.toml
```
This will pass container seccomp profiles to the kata agent.
## Enable full debug
Enable full debug as follows:
@@ -216,6 +226,18 @@ $ go get -d -u github.com/kata-containers/kata-containers
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/agent && make
```
The agent is built with seccomp capability by default.
If you want to build the agent without the seccomp capability, you need to run `make` with `SECCOMP=no` as follows.
```
$ make -C $GOPATH/src/github.com/kata-containers/kata-containers/src/agent SECCOMP=no
```
> **Note:**
>
> - If you enable seccomp in the main configuration file but build the agent without seccomp capability,
> the runtime exits conservatively with an error message.
## Get the osbuilder
```
@@ -234,9 +256,21 @@ the following example.
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs
$ sudo rm -rf ${ROOTFS_DIR}
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true ./rootfs.sh ${distro}'
```
You MUST choose a distribution (e.g., `ubuntu`) for `${distro}`.
You can get a supported distributions list in the Kata Containers by running the following.
```
$ ./rootfs.sh -l
```
If you want to build the agent without seccomp capability, you need to run the `rootfs.sh` script with `SECCOMP=no` as follows.
```
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
```
You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `fedora`, `suse`, and `ubuntu` for `${distro}`. By default `seccomp` packages are not included in the rootfs image. Set `SECCOMP` to `yes` to include them.
> **Note:**
>
@@ -272,6 +306,7 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
> - If you do *not* wish to build under Docker, remove the `USE_DOCKER`
> variable in the previous command and ensure the `qemu-img` command is
> available on your system.
> - If `qemu-img` is not installed, you will likely see errors such as `ERROR: File /dev/loop19p1 is not a block device` and `losetup: /tmp/tmp.bHz11oY851: Warning: file is smaller than 512 bytes; the loop device may be useless or invisible for system tools`. These can be mitigated by installing the `qemu-img` command (available in the `qemu-img` package on Fedora or the `qemu-utils` package on Debian).
### Install the rootfs image
@@ -290,12 +325,23 @@ $ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img)
$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs"
$ sudo rm -rf ${ROOTFS_DIR}
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true ./rootfs.sh ${distro}'
```
`AGENT_INIT` controls if the guest image uses the Kata agent as the guest `init` process. When you create an initrd image,
always set `AGENT_INIT` to `yes`. By default `seccomp` packages are not included in the initrd image. Set `SECCOMP` to `yes` to include them.
always set `AGENT_INIT` to `yes`.
You MUST choose one of `alpine`, `centos`, `clearlinux`, `euleros`, and `fedora` for `${distro}`.
You MUST choose a distribution (e.g., `ubuntu`) for `${distro}`.
You can get a supported distributions list in the Kata Containers by running the following.
```
$ ./rootfs.sh -l
```
If you want to build the agent without seccomp capability, you need to run the `rootfs.sh` script with `SECCOMP=no` as follows.
```
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
```
> **Note:**
>

View File

@@ -11,6 +11,10 @@ For details of the other Kata Containers repositories, see the
* [Installation guides](./install/README.md): Install and run Kata Containers with Docker or Kubernetes
## Tracing
See the [tracing documentation](tracing.md).
## More User Guides
* [Upgrading](Upgrading.md): how to upgrade from [Clear Containers](https://github.com/clearcontainers) and [runV](https://github.com/hyperhq/runv) to [Kata Containers](https://github.com/kata-containers) and how to upgrade an existing Kata Containers system to the latest version.
@@ -40,6 +44,7 @@ Documents that help to understand and contribute to Kata Containers.
* [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
* [Kata Containers E2E Flow](design/end-to-end-flow.md): The entire end-to-end flow of Kata Containers
* [Kata Containers design](./design/README.md): More Kata Containers design documents
* [Kata Containers threat model](./threat-model/threat-model.md): Kata Containers threat model
### How to Contribute

View File

@@ -64,7 +64,7 @@
### Check Git-hub Actions
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/release.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).

View File

@@ -14,7 +14,7 @@ through the [CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and
Kata Containers creates a QEMU\*/KVM virtual machine for pod that `kubelet` (Kubernetes) creates respectively.
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/containerd-shim-v2)
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/cmd/containerd-shim-kata-v2/)
is the Kata Containers entrypoint, which
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
@@ -259,7 +259,7 @@ With `RuntimeClass`, users can define Kata Containers as a `RuntimeClass` and th
## DAX
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.txt)
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.rst?h=v5.14)
feature to efficiently map some host-side files into the guest VM space.
In particular, Kata Containers uses the QEMU NVDIMM feature to provide a
memory-mapped virtual device that can be used to DAX map the virtual machine's

View File

@@ -12,187 +12,244 @@ The OCI [runtime specification][linux-config] provides guidance on where the con
> [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups
> hierarchy for containers or to run a new process in an existing container
cgroups are hierarchical, and this can be seen with the following pod example:
Cgroups are hierarchical, and this can be seen with the following pod example:
- Pod 1: `cgroupsPath=/kubepods/pod1`
- Container 1:
`cgroupsPath=/kubepods/pod1/container1`
- Container 2:
`cgroupsPath=/kubepods/pod1/container2`
- Container 1: `cgroupsPath=/kubepods/pod1/container1`
- Container 2: `cgroupsPath=/kubepods/pod1/container2`
- Pod 2: `cgroupsPath=/kubepods/pod2`
- Container 1:
`cgroupsPath=/kubepods/pod2/container2`
- Container 2:
`cgroupsPath=/kubepods/pod2/container2`
- Container 1: `cgroupsPath=/kubepods/pod2/container2`
- Container 2: `cgroupsPath=/kubepods/pod2/container2`
Depending on the upper-level orchestrator, the cgroup under which the pod is placed is
managed by the orchestrator. In the case of Kubernetes, the pod-cgroup is created by Kubelet,
while the container cgroups are to be handled by the runtime. Kubelet will size the pod-cgroup
based on the container resource requirements.
Depending on the upper-level orchestration layers, the cgroup under which the pod is placed is
managed by the orchestrator or not. In the case of Kubernetes, the pod cgroup is created by Kubelet,
while the container cgroups are to be handled by the runtime.
Kubelet will size the pod cgroup based on the container resource requirements, to which it may add
a configured set of [pod resource overheads](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/).
Kata Containers introduces a non-negligible overhead for running a sandbox (pod). Based on this, two scenarios are possible:
1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod-cgroup, or
2) Kata Containers do not fully constrain the VMM and associated processes, instead placing a subset of them outside of the pod-cgroup.
Kata Containers introduces a non-negligible resource overhead for running a sandbox (pod). Typically, the Kata shim,
through its underlying VMM invocation, will create many additional threads compared to process based container runtimes:
the para-virtualized I/O back-ends, the VMM instance or even the Kata shim process, all of those host processes consume
memory and CPU time not directly tied to the container workload, and introduces a sandbox resource overhead.
In order for a Kata workload to run without significant performance degradation, its sandbox overhead must be
provisioned accordingly. Two scenarios are possible:
Kata Containers provides two options for how cgroups are handled on the host. Selection of these options is done through
the `SandboxCgroupOnly` flag within the Kata Containers [configuration](../../src/runtime/README.md#configuration)
file.
1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod cgroup.
For example, Kubernetes [`PodOverhead`](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/)
feature lets the orchestrator add a configured sandbox overhead to the sum of all its containers resources. In
that case, the pod sandbox is properly sized and all Kata created processes will run under the pod cgroup
defined constraints and limits.
2) The upper-layer orchestrator does **not** take the sandbox overhead into account and the pod cgroup is not
sized to properly run all Kata created processes. With that scenario, attaching all the Kata processes to the sandbox
cgroup may lead to non-negligible workload performance degradations. As a consequence, Kata Containers will move
all processes but the vCPU threads into a dedicated overhead cgroup under `/kata_overhead`. The Kata runtime will
not apply any constraints or limits to that cgroup, it is up to the infrastructure owner to optionally set it up.
## `SandboxCgroupOnly` enabled
Those 2 scenarios are not dynamically detected by the Kata Containers runtime implementation, and thus the
infrastructure owner must configure the runtime according to how the upper-layer orchestrator creates and sizes the
pod cgroup. That configuration selection is done through the `sandbox_cgroup_only` flag within the Kata Containers
[configuration](../../src/runtime/README.md#configuration) file.
With `SandboxCgroupOnly` enabled, it is expected that the parent cgroup is sized to take the overhead of running
a sandbox into account. This is ideal, as all the applicable Kata Containers components can be placed within the
given cgroup-path.
## `sandbox_cgroup_only = true`
In the context of Kubernetes, Kubelet will size the pod-cgroup to take the overhead of running a Kata-based sandbox
into account. This will be feasible in the 1.16 Kubernetes release through the `PodOverhead` feature.
Setting `sandbox_cgroup_only` to `true` from the Kata Containers configuration file means that the pod cgroup is
properly sized and takes the pod overhead into account. This is ideal, as all the applicable Kata Containers processes
can simply be placed within the given cgroup path.
In the context of Kubernetes, Kubelet can size the pod cgroup to take the overhead of running a Kata-based sandbox
into account. This has been supported since the 1.16 Kubernetes release, through the
[`PodOverhead`](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/) feature.
```
+----------------------------------------------------------+
| +---------------------------------------------------+ |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | | kata-shimv2, VMM and threads: | | | |
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
| | | | | | | |
| | | | kata_<sandbox-id> | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 1 | | |
| | +---------------------------------------------+ | |
| | | |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | | kata-shimv2, VMM and threads: | | | |
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
| | | | | | | |
| | | | kata_<sandbox-id> | | | |
| | | +--------------------------------------+ | | |
| | |Pod 2 | | |
| | +---------------------------------------------+ | |
| |kubepods | |
| +---------------------------------------------------+ |
| |
|Node |
+----------------------------------------------------------+
┌─────────────────────────────────────────┐
│ │
┌──────────────────────────────────┐ │
│ │
│ ┌─────────────────────────────┐ │ │
│ │ │ │ │
│ │ ┌─────────────────────┐
│ │ │ vCPU threads
│ │ │ I/O threads │ │ │ │
│ │ │ │ VMM
│ │ │ Kata Shim
│ │ │ │ │ │ │
│ │ │ /kata_<sandbox_id>
│ │ │ └─────────────────────┘ │ │
│ │Pod 1 │ │ │
│ └─────────────────────────────┘ │ │
│ │
│ │ ┌─────────────────────────────┐
│ │ │ │
│ │ ┌─────────────────────┐ │ │ │
│ │ │ vCPU threads
│ │ │ │ I/O threads │ │ │
│ │ │ VMM
│ │ │ │ Kata Shim │ │
│ │ │
│ │ │ │ /kata_<sandbox_id> │
│ │ │ └─────────────────────┘ │ │ │
│ │ │Pod 2 │ │ │
│ │ └─────────────────────────────┘ │ │
│ │ │ │
│ │/kubepods │ │
│ └──────────────────────────────────┘ │
│ │
│ Node │
└─────────────────────────────────────────┘
```
### What does Kata do in this configuration?
1. Given a `PodSandbox` container creation, let:
### Implementation details
```
podCgroup=Parent(container.CgroupsPath)
KataSandboxCgroup=<podCgroup>/kata_<PodSandboxID>
```
When `sandbox_cgroup_only` is enabled, the Kata shim will create a per pod
sub-cgroup under the pod's dedicated cgroup. For example, in the Kubernetes context,
it will create a `/kata_<PodSandboxID>` under the `/kubepods` cgroup hierarchy.
On a typical cgroup v1 hierarchy mounted under `/sys/fs/cgroup/`, the memory cgroup
subsystem for a pod with sandbox ID `12345678` would live under
`/sys/fs/cgroup/memory/kubepods/kata_12345678`.
2. Create the cgroup, `KataSandboxCgroup`
In most cases, the `/kata_<PodSandboxID>` created cgroup is unrestricted and inherits and shares all
constraints and limits from the parent cgroup (`/kubepods` in the Kubernetes case). The exception is
for the `cpuset` and `devices` cgroup subsystems, which are managed by the Kata shim.
3. Join the `KataSandboxCgroup`
After creating the `/kata_<PodSandboxID>` cgroup, the Kata Containers shim will move itself to it, **before** starting
the virtual machine. As a consequence all processes subsequently created by the Kata Containers shim (the VMM itself, and
all vCPU and I/O related threads) will be created in the `/kata_<PodSandboxID>` cgroup.
Any process created by the runtime will be created in `KataSandboxCgroup`.
The runtime will limit the cgroup in the host only if the sandbox doesn't have a
container type annotation, but the caller is free to set the proper limits for the `podCgroup`.
### Why create a kata-cgroup under the parent cgroup?
In the example above the pod cgroups are `/kubepods/pod1` and `/kubepods/pod2`.
Kata creates the unrestricted sandbox cgroup under the pod cgroup.
And why not directly adding the per sandbox shim directly to the pod cgroup (e.g.
`/kubepods` in the Kubernetes context)?
### Why create a Kata-cgroup under the parent cgroup?
The Kata Containers shim implementation creates a per-sandbox cgroup
(`/kata_<PodSandboxID>`) to support the `Docker` use case. Although `Docker` does not
have a notion of pods, Kata Containers still creates a sandbox to support the pod-less,
single container use case that `Docker` implements. Since `Docker` does create any
cgroup hierarchy to place a container into, it would be very complex for Kata to map
a particular container to its sandbox without placing it under a `/kata_<containerID>>`
sub-cgroup first.
`Docker` does not have a notion of pods, and will not create a cgroup directory
to place a particular container in (i.e., all containers would be in a path like
`/docker/container-id`. To simplify the implementation and continue to support `Docker`,
Kata Containers creates the sandbox-cgroup, in the case of Kubernetes, or a container cgroup, in the case
of docker.
### Advantages
### Improvements
Keeping all Kata Containers processes under a properly sized pod cgroup is ideal
and makes for a simpler Kata Containers implementation. It also helps with gathering
accurate statistics and preventing Kata workloads from being noisy neighbors.
- Get statistics about pod resources
#### Pod resources statistics
If the Kata caller wants to know the resource usage on the host it can get
statistics from the pod cgroup. All cgroups stats in the hierarchy will include
the Kata overhead. This gives the possibility of gathering usage-statics at the
pod level and the container level.
- Better host resource isolation
#### Better host resource isolation
Because the Kata runtime will place all the Kata processes in the pod cgroup,
the resource limits that the caller applies to the pod cgroup will affect all
processes that belong to the Kata sandbox in the host. This will improve the
isolation in the host preventing Kata to become a noisy neighbor.
## `SandboxCgroupOnly` disabled (default, legacy)
## `sandbox_cgroup_only = false` (Default setting)
If the cgroup provided to Kata is not sized appropriately, Kata components will
consume resources that the actual container workloads expect to see and use.
This can cause instability and performance degradations.
To avoid that situation, Kata Containers creates an unconstrained overhead
cgroup and moves all non workload related processes (Anything but the virtual CPU
threads) to it. The name of this overhead cgroup is `/kata_overhead` and a per
sandbox sub cgroup will be created under it for each sandbox Kata Containers creates.
Kata Containers does not add any constraints or limitations on the overhead cgroup. It is up to the infrastructure
owner to either:
- Provision nodes with a pre-sized `/kata_overhead` cgroup. Kata Containers will
load that existing cgroup and move all non workload related processes to it.
- Let Kata Containers create the `/kata_overhead` cgroup, leave it
unconstrained or resize it a-posteriori.
If the cgroup provided to Kata is not sized appropriately, instability will be
introduced when fully constraining Kata components, and the user-workload will
see a subset of resources that were requested. Based on this, the default
handling for Kata Containers is to not fully constrain the VMM and Kata
components on the host.
```
+----------------------------------------------------------+
| +---------------------------------------------------+ |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | |Container 1 |-|Container 2 | | | |
| | | | |-| | | | |
| | | | Shim+container1 |-| Shim+container2 | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 1 | | |
| | +---------------------------------------------+ | |
| | | |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | |Container 1 |-|Container 2 | | | |
| | | | |-| | | | |
| | | | Shim+container1 |-| Shim+container2 | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 2 | | |
| | +---------------------------------------------+ | |
| |kubepods | |
| +---------------------------------------------------+ |
| +---------------------------------------------------+ |
| | Hypervisor | |
| |Kata | |
| +---------------------------------------------------+ |
| |
|Node |
+----------------------------------------------------------+
┌────────────────────────────────────────────────────────────────────┐
│ ┌─────────────────────────────┐ ┌───────────────────────────┐ │
│ │ │ │
┌─────────────────────────┼────┼─────────────────────────┐ │ │
│ │ │ │ │
│ ┌─────────────────────┐ │ │ ┌─────────────────────┐ │ │ │
│ │ vCPU threads │ │ │ │ VMM │ │ │ │
│ │ │ │ │ │ │ │ I/O threads │ │ │ │
│ │ │ │ │ │ │ │ Kata Shim │ │ │ │
│ │ │ │ │ │ │ │ │ │
│ │ /kata_<sandbox_id> │ │ │ │ /<sandbox_id> │ │ │ │
│ └─────────────────────┘ │ │ └─────────────────────┘ │ │ │
│ │ │ │ │
│ Pod 1 │ │ │ │ │
└─────────────────────────┼────┼─────────────────────────┘ │ │
│ │ │ │
│ │ │ │
┌─────────────────────────┼────┼─────────────────────────┐ │ │
│ │ │ │ │
│ ┌─────────────────────┐ │ │ ┌─────────────────────┐ │ │ │
│ │ vCPU threads │ │ │ │ VMM │ │ │ │
│ │ │ │ │ │ I/O threads │ │ │ │
│ │ │ │ │ │ Kata Shim │ │ │ │
│ │ │ │ │ │ │ │ │ │
│ │ /kata_<sandbox_id> │ │ │ │ /<sandbox_id> │ │ │ │
│ └─────────────────────┘ │ │ └─────────────────────┘ │ │ │
│ │ │ │ │ │
Pod 2 │ │ │
│ │ └─────────────────────────┼────┼─────────────────────────┘ │ │
│ │ │ │ │ │
│ │ /kubepods │ │ /kata_overhead │ │
│ └─────────────────────────────┘ └───────────────────────────┘ │
│ │
│ │
│ Node │
└────────────────────────────────────────────────────────────────────┘
```
### What does this method do?
### Implementation Details
1. Given a container creation let `containerCgroupHost=container.CgroupsPath`
1. Rename `containerCgroupHost` path to add `kata_`
1. Let `PodCgroupPath=PodSanboxContainerCgroup` where `PodSanboxContainerCgroup` is the cgroup of a container of type `PodSandbox`
1. Limit the `PodCgroupPath` with the sum of all the container limits in the Sandbox
1. Move only vCPU threads of hypervisor to `PodCgroupPath`
1. Per each container, move its `kata-shim` to its own `containerCgroupHost`
1. Move hypervisor and applicable threads to memory cgroup `/kata`
When `sandbox_cgroup_only` is disabled, the Kata Containers shim will create a per pod
sub-cgroup under the pods dedicated cgroup, and another one under the overhead cgroup.
For example, in the Kubernetes context, it will create a `/kata_<PodSandboxID>` under
the `/kubepods` cgroup hierarchy, and a `/<PodSandboxID>` under the `/kata_overhead` one.
_Note_: the Kata Containers runtime will not add all the hypervisor threads to
the cgroup path requested, only vCPUs. These threads are run unconstrained.
On a typical cgroup v1 hierarchy mounted under `/sys/fs/cgroup/`, for a pod which sandbox
ID is `12345678`, create with `sandbox_cgroup_only` disabled, the 2 memory subsystems
for the sandbox cgroup and the overhead cgroup would respectively live under
`/sys/fs/cgroup/memory/kubepods/kata_12345678` and `/sys/fs/cgroup/memory/kata_overhead/12345678`.
This mitigates the risk of the VMM and other threads receiving an out of memory scenario (`OOM`).
Unlike when `sandbox_cgroup_only` is enabled, the Kata Containers shim will move itself
to the overhead cgroup first, and then move the vCPU threads to the sandbox cgroup as
they're created. All Kata processes and threads will run under the overhead cgroup except for
the vCPU threads.
With `sandbox_cgroup_only` disabled, Kata Containers assumes the pod cgroup is only sized
to accommodate for the actual container workloads processes. For Kata, this maps
to the VMM created virtual CPU threads and so they are the only ones running under the pod
cgroup. This mitigates the risk of the VMM, the Kata shim and the I/O threads going through
a catastrophic out of memory scenario (`OOM`).
#### Impact
#### Pros and Cons
If resources are reserved at a system level to account for the overheads of
running sandbox containers, this configuration can be utilized with adequate
stability. In this scenario, non-negligible amounts of CPU and memory will be
utilized unaccounted for on the host.
Running all non vCPU threads under an unconstrained overhead cgroup could lead to workloads
potentially consuming a large amount of host resources.
[linux-config]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md
[cgroupspath]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
On the other hand, running all non vCPU threads under a dedicated overhead cgroup can provide
accurate metrics on the actual Kata Container pod overhead, allowing for tuning the overhead
cgroup size and constraints accordingly.
[linux-config]: https://github.com/opencontainers/runtime-spec/blob/main/config-linux.md
[cgroupspath]: https://github.com/opencontainers/runtime-spec/blob/main/config-linux.md#cgroups-path
# Supported cgroups
Kata Containers supports cgroups `v1` and `v2`. In the following sections each cgroup is
described briefly and what changes are needed in Kata Containers to support it.
Kata Containers currently only supports cgroups `v1`.
In the following sections each cgroup is described briefly.
## Cgroups V1
@@ -244,7 +301,7 @@ diagram:
A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file,
or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file.
Kata Containers supports `v1` by default and no change in the configuration file is needed.
Kata Containers only supports `v1`.
To know more about `cgroups v1`, see [cgroupsv1(7)][2].
## Cgroups V2
@@ -297,22 +354,13 @@ Same as `cgroups v1`, a process can join the cgroup by writing its process id (`
`cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to
`cgroup.threads` file.
For backwards compatibility Kata Containers defaults to supporting cgroups v1 by default.
To change this to `v2`, set `sandbox_cgroup_only=true` in the `configuration.toml` file.
To know more about `cgroups v2`, see [cgroupsv2(7)][3].
Kata Containers does not support cgroups `v2` on the host.
### Distro Support
Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition.
For more information about the status of this feature see [issue #2494][4].
# Summary
| cgroup option | default? | status | pros | cons | cgroups
|-|-|-|-|-|-|
| `SandboxCgroupOnly=false` | yes | legacy | Easiest to make Kata work | Unaccounted for memory and resource utilization | v1
| `SandboxCgroupOnly=true` | no | recommended | Complete tracking of Kata memory and CPU utilization. In Kubernetes, the Kubelet can fully constrain Kata via the pod cgroup | Requires upper layer orchestrator which sizes sandbox cgroup appropriately | v1, v2
[1]: http://man7.org/linux/man-pages/man5/tmpfs.5.html
[2]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_1

View File

@@ -207,7 +207,7 @@ Metrics for Firecracker vmm.
| `kata_firecracker_uart`: <br> Metrics specific to the UART device. | `GAUGE` | | <ul><li>`item`<ul><li>`error_count`</li><li>`flush_count`</li><li>`missed_read_count`</li><li>`missed_write_count`</li><li>`read_count`</li><li>`write_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vcpu`: <br> Metrics specific to VCPUs' mode of functioning. | `GAUGE` | | <ul><li>`item`<ul><li>`exit_io_in`</li><li>`exit_io_out`</li><li>`exit_mmio_read`</li><li>`exit_mmio_write`</li><li>`failures`</li><li>`filter_cpuid`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vmm`: <br> Metrics specific to the machine manager as a whole. | `GAUGE` | | <ul><li>`item`<ul><li>`device_events`</li><li>`panic_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vsock`: <br> Vsock-related metrics. | `GAUGE` | | <ul><li>`item`<ul><li>`activate_fails`</li><li>`cfg_fails`</li><li>`conn_event_fails`</li><li>`conns_added`</li><li>`conns_killed`</li><li>`conns_removed`</li><li>`ev_queue_event_fails`</li><li>`killq_resync`</li><li>`muxer_event_fails`</li><li>`rx_bytes_count`</li><li>`rx_packets_count`</li><li>`rx_queue_event_count`</li><li>`rx_queue_event_fails`</li><li>`rx_read_fails`</li><li>`tx_bytes_count`</li><li>`tx_flush_fails`</li><li>`tx_packets_count`</li><li>`tx_queue_event_count`</li><li>`tx_queue_event_fails`</li><li>`tx_write_fails`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vsock`: <br> VSOCK-related metrics. | `GAUGE` | | <ul><li>`item`<ul><li>`activate_fails`</li><li>`cfg_fails`</li><li>`conn_event_fails`</li><li>`conns_added`</li><li>`conns_killed`</li><li>`conns_removed`</li><li>`ev_queue_event_fails`</li><li>`killq_resync`</li><li>`muxer_event_fails`</li><li>`rx_bytes_count`</li><li>`rx_packets_count`</li><li>`rx_queue_event_count`</li><li>`rx_queue_event_fails`</li><li>`rx_read_fails`</li><li>`tx_bytes_count`</li><li>`tx_flush_fails`</li><li>`tx_packets_count`</li><li>`tx_queue_event_count`</li><li>`tx_queue_event_fails`</li><li>`tx_write_fails`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
### Kata guest OS metrics

View File

@@ -30,7 +30,7 @@ The Kata Containers runtime **MUST** implement the following command line option
The Kata Containers project **MUST** provide two interfaces for CRI shims to manage hardware
virtualization based Kubernetes pods and containers:
- An OCI and `runc` compatible command line interface, as described in the previous section.
This interface is used by implementations such as [`CRI-O`](http://cri-o.io) and [`cri-containerd`](https://github.com/containerd/cri-containerd), for example.
This interface is used by implementations such as [`CRI-O`](http://cri-o.io) and [`containerd`](https://github.com/containerd/containerd), for example.
- A hardware virtualization runtime library API for CRI shims to consume and provide a more
CRI native implementation. The [`frakti`](https://github.com/kubernetes/frakti) CRI shim is an example of such a consumer.

View File

@@ -5,7 +5,7 @@
- [Run Kata containers with `crictl`](run-kata-with-crictl.md)
- [Run Kata Containers with Kubernetes](run-kata-with-k8s.md)
- [How to use Kata Containers and Containerd](containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)
- [How to use Kata Containers and CRI (containerd) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)
- [Kata Containers and service mesh for Kubernetes](service-mesh.md)
- [How to import Kata Containers logs into Fluentd](how-to-import-kata-logs-with-fluentd.md)
@@ -34,3 +34,5 @@
- [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
- [How to monitor Kata Containers in K8s](how-to-set-prometheus-in-k8s.md)
- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)
- [How to setup swap devices in guest kernel](how-to-setup-swap-devices-in-guest-kernel.md)
- [How to run rootless vmm](how-to-run-rootless-vmm.md)

View File

@@ -39,7 +39,7 @@ use `RuntimeClass` instead of the deprecated annotations.
### Containerd Runtime V2 API: Shim V2 API
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/containerd-shim-v2)
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/cmd/containerd-shim-kata-v2/)
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1`
shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy`

View File

@@ -0,0 +1,33 @@
## Introduction
To improve security, Kata Container supports running the VMM process (currently only QEMU) as a non-`root` user.
This document describes how to enable the rootless VMM mode and its limitations.
## Pre-requisites
The permission and ownership of the `kvm` device node (`/dev/kvm`) need to be configured to:
```
$ crw-rw---- 1 root kvm
```
use the following commands:
```
$ sudo groupadd kvm -r
$ sudo chown root:kvm /dev/kvm
$ sudo chmod 660 /dev/kvm
```
## Configure rootless VMM
By default, the VMM process still runs as the root user. There are two ways to enable rootless VMM:
1. Set the `rootless` flag to `true` in the hypervisor section of `configuration.toml`.
2. Set the Kubernetes annotation `io.katacontainers.hypervisor.rootless` to `true`.
## Implementation details
When `rootless` flag is enabled, upon a request to create a Pod, Kata Containers runtime creates a random user and group (e.g. `kata-123`), and uses them to start the hypervisor process.
The `kvm` group is also given to the hypervisor process as a supplemental group to give the hypervisor process access to the `/dev/kvm` device.
Another necessary change is to move the hypervisor runtime files (e.g. `vhost-fs.sock`, `qmp.sock`) to a directory (under `/run/user/[uid]/`) where only the non-root hypervisor has access to.
## Limitations
1. Only the VMM process is running as a non-root user. Other processes such as Kata Container shimv2 and `virtiofsd` still run as the root user.
2. Currently, this feature is only supported in QEMU. Still need to bring it to Firecracker and Cloud Hypervisor (see https://github.com/kata-containers/kata-containers/issues/2567).
3. Certain features will not work when rootless VMM is enabled, including:
1. Passing devices to the guest (`virtio-blk`, `virtio-scsi`) will not work if the non-privileged user does not have permission to access it (leading to a permission denied error). A more permissive permission (e.g. 666) may overcome this issue. However, you need to be aware of the potential security implications of reducing the security on such devices.
2. `vfio` device will also not work because of permission denied error.

View File

@@ -34,8 +34,6 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.agent.enable_tracing` | `boolean` | enable tracing for the agent |
| `io.katacontainers.config.agent.container_pipe_size` | uint32 | specify the size of the std(in/out) pipes created for containers |
| `io.katacontainers.config.agent.kernel_modules` | string | the list of kernel modules and their parameters that will be loaded in the guest kernel. Semicolon separated list of kernel modules and their parameters. These modules will be loaded in the guest kernel using `modprobe`(8). E.g., `e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0` |
| `io.katacontainers.config.agent.trace_mode` | string | the trace mode for the agent |
| `io.katacontainers.config.agent.trace_type` | string | the trace type for the agent |
## Hypervisor Options
| Key | Value Type | Comments |
@@ -91,6 +89,13 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` |
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
| `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest |
## Container Options
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.container.resource.swappiness"` | `uint64` | specify the `Resources.Memory.Swappiness` |
| `io.katacontainers.container.resource.swap_in_bytes"` | `uint64` | specify the `Resources.Memory.Swap` |
# CRI-O Configuration
@@ -100,11 +105,12 @@ In case of CRI-O, all annotations specified in the pod spec are passed down to K
For containerd, annotations specified in the pod spec are passed down to Kata
starting with version `1.3.0` of containerd. Additionally, extra configuration is
needed for containerd, by providing a `pod_annotations` field in the containerd config
file. The `pod_annotations` field is a list of annotations that can be passed down to
Kata as OCI annotations. It supports golang match patterns. Since annotations supported
by Kata follow the pattern `io.katacontainers.*`, the following configuration would work
for passing annotations to Kata from containerd:
needed for containerd, by providing `pod_annotations` field and
`container_annotations` field in the containerd config
file. The `pod_annotations` field and `container_annotations` field are two lists of
annotations that can be passed down to Kata as OCI annotations. They support golang match
patterns. Since annotations supported by Kata follow the pattern `io.katacontainers.*`,
the following configuration would work for passing annotations to Kata from containerd:
```
$ cat /etc/containerd/config
@@ -113,6 +119,7 @@ $ cat /etc/containerd/config
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
pod_annotations = ["io.katacontainers.*"]
container_annotations = ["io.katacontainers.*"]
....
```

View File

@@ -0,0 +1,59 @@
# Setup swap device in guest kernel
## Introduction
Setup swap device in guest kernel can help to increase memory capacity, handle some memory issues and increase file access speed sometimes.
Kata Containers can insert a raw file to the guest as the swap device.
## Requisites
The swap config of the containers should be set by [annotations](how-to-set-sandbox-config-kata.md#container-options). So [extra configuration is needed for containerd](how-to-set-sandbox-config-kata.md#containerd-configuration).
Kata Containers just supports setup swap device in guest kernel with QEMU.
Install and setup Kata Containers as shown [here](../install/README.md).
Enable setup swap device in guest kernel as follows:
```
$ sudo sed -i -e 's/^#enable_guest_swap.*$/enable_guest_swap = true/g' /etc/kata-containers/configuration.toml
```
## Run a Kata Container utilizing swap device
Use following command to start a Kata Container with swappiness 60 and 1GB swap device (swap_in_bytes - memory_limit_in_bytes).
```
$ pod_yaml=pod.yaml
$ container_yaml=container.yaml
$ image="quay.io/prometheus/busybox:latest"
$ cat << EOF > "${pod_yaml}"
metadata:
name: busybox-sandbox1
EOF
$ cat << EOF > "${container_yaml}"
metadata:
name: busybox-test-swap
annotations:
io.katacontainers.container.resource.swappiness: "60"
io.katacontainers.container.resource.swap_in_bytes: "2147483648"
linux:
resources:
memory_limit_in_bytes: 1073741824
image:
image: "$image"
command:
- top
EOF
$ sudo crictl pull $image
$ podid=$(sudo crictl runp $pod_yaml)
$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
$ sudo crictl start $cid
```
Kata Container setups swap device for this container only when `io.katacontainers.container.resource.swappiness` is set.
The following table shows the swap size how to decide if `io.katacontainers.container.resource.swappiness` is set.
|`io.katacontainers.container.resource.swap_in_bytes`|`memory_limit_in_bytes`|swap size|
|---|---|---|
|set|set| `io.katacontainers.container.resource.swap_in_bytes` - `memory_limit_in_bytes`|
|not set|set| `memory_limit_in_bytes`|
|not set|not set| `io.katacontainers.config.hypervisor.default_memory`|
|set|not set|cgroup doesn't support this usage|

View File

@@ -3,7 +3,7 @@
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
The Kubernetes cluster will use the
[CRI containerd plugin](https://github.com/containerd/containerd/tree/main/pkg/cri) and
[CRI containerd](https://github.com/containerd/containerd/) and
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
## Requirements
@@ -71,12 +71,12 @@ $ for service in ${services}; do
service_dir="/etc/systemd/system/${service}.service.d/"
sudo mkdir -p ${service_dir}
cat << EOT | sudo tee "${service_dir}/proxy.conf"
cat << EOF | sudo tee "${service_dir}/proxy.conf"
[Service]
Environment="HTTP_PROXY=${http_proxy}"
Environment="HTTPS_PROXY=${https_proxy}"
Environment="NO_PROXY=${no_proxy}"
EOT
EOF
done
$ sudo systemctl daemon-reload
@@ -172,7 +172,7 @@ If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod w
- Create an pod configuration that using Kata Containers runtime
```bash
$ cat << EOT | tee nginx-kata.yaml
$ cat << EOF | tee nginx-kata.yaml
apiVersion: v1
kind: Pod
metadata:
@@ -183,7 +183,7 @@ If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod w
- name: nginx
image: nginx
EOT
EOF
```
- Create the pod

View File

@@ -22,7 +22,7 @@ This document requires the presence of the ACRN hypervisor and Kata Containers o
- ACRN supported [Hardware](https://projectacrn.github.io/latest/hardware.html#supported-hardware).
> **Note:** Please make sure to have a minimum of 4 logical processors (HT) or cores.
- ACRN [software](https://projectacrn.github.io/latest/tutorials/kbl-nuc-sdc.html#use-the-script-to-set-up-acrn-automatically) setup.
- ACRN [software](https://projectacrn.github.io/latest/tutorials/run_kata_containers.html) setup.
- For networking, ACRN supports either MACVTAP or TAP. If MACVTAP is not enabled in the Service OS, please follow the below steps to update the kernel:
```sh

View File

@@ -16,9 +16,9 @@ from the host, a potentially undesirable side-effect that decreases the security
The following sections document how to configure this behavior in different container runtimes.
#### Containerd and CRI
#### Containerd
The Containerd CRI allows configuring the privileged host devices behavior for each runtime in the CRI config. This is
The Containerd allows configuring the privileged host devices behavior for each runtime in the containerd config. This is
done with the `privileged_without_host_devices` option. Setting this to `true` will disable hot plugging of the host
devices into the guest, even when privileged is enabled.
@@ -41,7 +41,7 @@ See below example config:
```
- [Kata Containers with Containerd and CRI documentation](how-to-use-k8s-with-cri-containerd-and-kata.md)
- [Containerd CRI config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)
- [Containerd CRI config documentation](https://github.com/containerd/containerd/blob/main/docs/cri/config.md)
#### CRI-O

View File

@@ -9,7 +9,7 @@ Kubernetes CRI (Container Runtime Interface) implementations allow using any
OCI-compatible runtime with Kubernetes, such as the Kata Containers runtime.
Kata Containers support both the [CRI-O](https://github.com/kubernetes-incubator/cri-o) and
[CRI-containerd](https://github.com/containerd/cri) CRI implementations.
[containerd](https://github.com/containerd/containerd) CRI implementations.
After choosing one CRI implementation, you must make the appropriate configuration
to ensure it integrates with Kata Containers.
@@ -20,7 +20,7 @@ required to spawn pods and containers, and this is the preferred way to run Kata
An equivalent shim implementation for CRI-O is planned.
### CRI-O
For CRI-O installation instructions, refer to the [CRI-O Tutorial](https://github.com/kubernetes-incubator/cri-o/blob/master/tutorial.md) page.
For CRI-O installation instructions, refer to the [CRI-O Tutorial](https://github.com/cri-o/cri-o/blob/main/tutorial.md) page.
The following sections show how to set up the CRI-O configuration file (default path: `/etc/crio/crio.conf`) for Kata.
@@ -30,7 +30,7 @@ Unless otherwise stated, all the following settings are specific to the `crio.ru
# runtime used and options for how to set up and manage the OCI runtime.
[crio.runtime]
```
A comprehensive documentation of the configuration file can be found [here](https://github.com/cri-o/cri-o/blob/master/docs/crio.conf.5.md).
A comprehensive documentation of the configuration file can be found [here](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md).
> **Note**: After any change to this file, the CRI-O daemon have to be restarted with:
>````
@@ -111,11 +111,7 @@ manage_ns_lifecycle = true
```
### containerd with CRI plugin
If you select containerd with `cri` plugin, follow the "Getting Started for Developers"
instructions [here](https://github.com/containerd/cri#getting-started-for-developers)
to properly install it.
### containerd
To customize containerd to select Kata Containers runtime, follow our
"Configure containerd to use Kata Containers" internal documentation
@@ -160,7 +156,7 @@ $ sudo systemctl restart kubelet
# If using CRI-O
$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
# If using CRI-containerd
# If using containerd
$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
$ export KUBECONFIG=/etc/kubernetes/admin.conf

View File

@@ -34,7 +34,7 @@ as the proxy starts.
Follow the [instructions](../install/README.md)
to get Kata Containers properly installed and configured with Kubernetes.
You can choose between CRI-O and CRI-containerd, both are supported
You can choose between CRI-O and containerd, both are supported
through this document.
For both cases, select the workloads as _trusted_ by default. This way,
@@ -159,7 +159,7 @@ containers with `privileged: true` to `privileged: false`.
There is no difference between Istio and Linkerd in this section. It is
about which CRI implementation you use.
For both CRI-O and CRI-containerd, you have to add an annotation indicating
For both CRI-O and containerd, you have to add an annotation indicating
the workload for this deployment is not _trusted_, which will trigger
`kata-runtime` to be called instead of `runc`.
@@ -193,9 +193,9 @@ spec:
...
```
__CRI-containerd:__
__containerd:__
Add the following annotation for CRI-containerd
Add the following annotation for containerd
```yaml
io.kubernetes.cri.untrusted-workload: "true"
```

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 150 KiB

View File

@@ -0,0 +1,137 @@
# Kata Containers threat model
This document discusses threat models associated with the Kata Containers project.
Kata was designed to provide additional isolation of container workloads, protecting
the host infrastructure from potentially malicious container users or workloads. Since
Kata Containers adds a level of isolation on top of traditional containers, the focus
is on the additional layer provided, not on traditional container security.
This document provides a brief background on containers and layered security, describes
the interface to Kata from CRI runtimes, a review of utilized virtual machine interfaces, and then
a review of threats.
## Kata security objective
Kata seeks to prevent an untrusted container workload or user of that container workload to gain
control of, obtain information from, or tamper with the host infrastructure.
In our scenario, an asset is anything on the host system, or elsewhere in the cluster
infrastructure. The attacker is assumed to be either a malicious user or the workload itself
running within the container. The goal of Kata is to prevent attacks which would allow
any access to the defined assets.
## Background on containers, layered security
Traditional containers leverage several key Linux kernel features to provide isolation and
a view that the container workload is the only entity running on the host. Key features include
`Namespaces`, `cgroups`, `capablities`, `SELinux` and `seccomp`. The canonical runtime for creating such
a container is `runc`. In the remainder of the document, the term `traditional-container` will be used
to describe a container workload created by runc.
Kata Containers provides a second layer of isolation on top of those provided by traditional-containers.
The hardware virtualization interface is the basis of this additional layer. Kata launches a lightweight
virtual machine, and uses the guests Linux kernel to create a container workload, or workloads in the case
of multi-container pods. In Kubernetes and in the Kata implementation, the sandbox is carried out at the
pod level. In Kata, this sandbox is created using a virtual machine.
## Interface to Kata Containers: CRI, v2-shim, OCI
A typical Kata Containers deployment uses Kubernetes with a CRI implementation.
On every node, Kubelet will interact with a CRI implementor, which will in turn interface with
an OCI based runtime, such as Kata Containers. Typical CRI implementors are `cri-o` and `containerd`.
The CRI API, as defined at the Kubernetes [CRI-API repo](https://github.com/kubernetes/cri-api/),
results in a few constructs being supported by the CRI implementation, and ultimately in the OCI
runtime creating the workloads.
In order to run a container inside of the Kata sandbox, several virtual machine devices and interfaces
are required. Kata translates sandbox and container definitions to underlying virtualization technologies provided
by a set of virtual machine monitors (VMMs) and hypervisors. These devices and their underlying
implementations are discussed in detail in the following section.
## Interface to the Kata sandbox/virtual machine
In case of Kata, today the devices which we need in the guest are:
- Storage: In the current design of Kata Containers, we are reliant on the CRI implementor to
assist in image handling and volume management on the host. As a result, we need to support a way of passing to the sandbox the container rootfs, volumes requested
by the workload, and any other volumes created to facilitate sharing of secrets and `configmaps` with the containers. Depending on how these are managed, a block based device or file-system
sharing is required. Kata Containers does this by way of `virtio-blk` and/or `virtio-fs`.
- Networking: A method for enabling network connectivity with the workload is required. Typically this will be done providing a `TAP` device
to the VMM, and this will be exposed to the guest as a `virtio-net` device. It is feasible to pass in a NIC device directly, in which case `VFIO` is leveraged
and the device itself will be exposed to the guest.
- Control: In order to interact with the guest agent and retrieve `STDIO` from containers, a medium of communication is required.
This is available via `virtio-vsock`.
- Devices: `VFIO` is utilized when devices are passed directly to the virtual machine and exposed to the container.
- Dynamic Resource Management: `ACPI` is utilized to allow for dynamic VM resource management (for example: CPU, memory, device hotplug). This is required when containers are resized,
or more generally when containers are added to a pod.
How these devices are utilized varies depending on the VMM utilized. We clarify the default settings provided when integrating Kata
with the QEMU, Firecracker and Cloud Hypervisor VMMs in the following sections.
### Devices
Each virtio device is implemented by a backend, which may execute within userspace on the host (vhost-user), the VMM itself, or within the host kernel (vhost). While it may provide enhanced performance,
vhost devices are often seen as higher risk since an exploit would be already running within the kernel space. While VMM and vhost-user are both in userspace on the host, `vhost-user` generally allows for the back-end process to require less system calls and capabilities compared to a full VMM.
#### `virtio-blk` and `virtio-scsi`
The backend for `virtio-blk` and `virtio-scsi` are based in the VMM itself (ring3 in the context of x86) by default for Cloud Hypervisor, Firecracker and QEMU.
While `vhost` based back-ends are available for QEMU, it is not recommended. `vhost-user` back-ends are being added for Cloud Hypervisor, they are not utilized in Kata today.
#### `virtio-fs`
`virtio-fs` is supported in Cloud Hypervisor and QEMU. `virtio-fs`'s interaction with the host filesystem is done through a vhost-user daemon, `virtiofsd`.
The `virtio-fs` client, running in the guest, will generate requests to access files. `virtiofsd` will receive requests, open the file, and request the VMM
to `mmap` it into the guest. When DAX is utilized, the guest will access the host's page cache, avoiding the need for copy and duplication. DAX is still an experimental feature,
and is not enabled by default.
From the `virtiofsd` [documentation](https://qemu-project.gitlab.io/qemu/tools/virtiofsd.html):
```This program must be run as the root user. Upon startup the program will switch into a new file system namespace with the shared directory tree as its root. This prevents “file system escapes” due to symlinks and other file system objects that might lead to files outside the shared directory. The program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other vectors that could allow an attacker to compromise the system after gaining control of the virtiofsd process.```
DAX-less support for `virtio-fs` is available as of the 5.4 Linux kernel. QEMU VMM supports virtio-fs as of v4.2. Cloud Hypervisor
supports `virtio-fs`.
#### `virtio-net`
`virtio-net` has many options, depending on the VMM and Kata configurations.
##### QEMU networking
While QEMU has options for `vhost`, `virtio-net` and `vhost-user`, the `virtio-net` backend
for Kata defaults to `vhost-net` for performance reasons. The default configuration is being
reevaluated.
##### Firecracker networking
For Firecracker, the `virtio-net` backend is within Firecracker's VMM.
##### Cloud Hypervisor networking
For Cloud Hypervisor, the current backend default is within the VMM. `vhost-user-net` support
is being added (written in rust, Cloud Hypervisor specific).
#### virtio-vsock
##### QEMU vsock
In QEMU, vsock is backed by `vhost_vsock`, which runs within the kernel itself.
##### Firecracker and Cloud Hypervisor
In Firecracker and Cloud Hypervisor, vsock is backed by a unix-domain-socket in the hosts userspace.
#### VFIO
Utilizing VFIO, devices can be passed through to the virtual machine. We will assess this separately. Exposure to
host is limited to gaps in device pass-through handling. This is supported in QEMU and Cloud Hypervisor, but not
Firecracker.
#### ACPI
ACPI is necessary for hotplug of CPU, memory and devices. ACPI is available in QEMU and Cloud Hypervisor. Device, CPU and memory hotplug
are not available in Firecracker.
## Devices and threat model
![Threat model](threat-model-boundaries.svg "threat-model")

213
docs/tracing.md Normal file
View File

@@ -0,0 +1,213 @@
# Overview
This document explains how to trace Kata Containers components.
# Introduction
The Kata Containers runtime and agent are able to generate
[OpenTelemetry][opentelemetry] trace spans, which allow the administrator to
observe what those components are doing and how much time they are spending on
each operation.
# OpenTelemetry summary
An OpenTelemetry-enabled application creates a number of trace "spans". A span
contains the following attributes:
- A name
- A pair of timestamps (recording the start time and end time of some operation)
- A reference to the span's parent span
All spans need to be *finished*, or *completed*, to allow the OpenTelemetry
framework to generate the final trace information (by effectively closing the
transaction encompassing the initial (root) span and all its children).
For Kata, the root span represents the total amount of time taken to run a
particular component from startup to its shutdown (the "run time").
# Architecture
## Runtime tracing architecture
The runtime, which runs in the host environment, has been modified to
optionally generate trace spans which are sent to a trace collector on the
host.
## Agent tracing architecture
An OpenTelemetry system (such as [Jaeger][jaeger-tracing]) uses a collector to
gather up trace spans from the application for viewing and processing. For an
application to use the collector, it must run in the same context as
the collector.
This poses a problem for tracing the Kata Containers agent since it does not
run in the same context as the collector: it runs inside a virtual machine (VM).
To allow spans from the agent to be sent to the trace collector, Kata provides
a [trace forwarder][trace-forwarder] component. This runs in the same context
as the collector (generally on the host system) and listens on a
[`VSOCK`][vsock] channel for traces generated by the agent, forwarding them on
to the trace collector.
> **Note:**
>
> This design supports agent tracing without having to make changes to the
> image, but also means that [custom images][osbuilder] can also benefit from
> agent tracing.
The following diagram summarises the architecture used to trace the Kata
Containers agent:
```
+--------------------------------------------+
| Host |
| |
| +---------------+ |
| | OpenTelemetry | |
| | Trace | |
| | Collector | |
| +---------------+ |
| ^ +---------------+ |
| | spans | Kata VM | |
| +-----+-----+ | | |
| | Kata | spans o +-------+ | |
| | Trace |<-----------------| Kata | | |
| | Forwarder | VSOCK o | Agent | | |
| +-----------+ Channel | +-------+ | |
| +---------------+ |
+--------------------------------------------+
```
# Agent tracing prerequisites
- You must have a trace collector running.
Although the collector normally runs on the host, it can also be run from
inside a Docker image configured to expose the appropriate host ports to the
collector.
The [Jaeger "all-in-one" Docker image][jaeger-all-in-one] method
is the quickest and simplest way to run the collector for testing.
- If you wish to trace the agent, you must start the
[trace forwarder][trace-forwarder].
> **Notes:**
>
> - If agent tracing is enabled but the forwarder is not running,
> the agent will log an error (signalling that it cannot generate trace
> spans), but continue to work as normal.
>
> - The trace forwarder requires a trace collector (such as Jaeger) to be
> running before it is started. If a collector is not running, the trace
> forwarder will exit with an error.
# Enable tracing
By default, tracing is disabled for all components. To enable _any_ form of
tracing an `enable_tracing` option must be enabled for at least one component.
> **Note:**
>
> Enabling this option will only allow tracing for subsequently
> started containers.
## Enable runtime tracing
To enable runtime tracing, set the tracing option as shown:
```toml
[runtime]
enable_tracing = true
```
## Enable agent tracing
To enable agent tracing, set the tracing option as shown:
```toml
[agent.kata]
enable_tracing = true
```
> **Note:**
>
> If both agent tracing and runtime tracing are enabled, the resulting trace
> spans will be "collated": expanding individual runtime spans in the Jaeger
> web UI will show the agent trace spans resulting from the runtime
> operation.
# Appendices
## Agent tracing requirements
### Host environment
- The host kernel must support the VSOCK socket type.
This will be available if the kernel is built with the
`CONFIG_VHOST_VSOCK` configuration option.
- The VSOCK kernel module must be loaded:
```
$ sudo modprobe vhost_vsock
```
### Guest environment
- The guest kernel must support the VSOCK socket type:
This will be available if the kernel is built with the
`CONFIG_VIRTIO_VSOCKETS` configuration option.
> **Note:** The default Kata Containers guest kernel provides this feature.
## Agent tracing limitations
- Agent tracing is only "completed" when the workload and the Kata agent
process have exited.
Although trace information *can* be inspected before the workload and agent
have exited, it is incomplete. This is shown as `<trace-without-root-span>`
in the Jaeger web UI.
If the workload is still running, the trace transaction -- which spans the entire
runtime of the Kata agent -- will not have been completed. To view the complete
trace details, wait for the workload to end, or stop the container.
## Performance impact
[OpenTelemetry][opentelemetry] is designed for high performance. It combines
the best of two previous generation projects (OpenTracing and OpenCensus) and
uses a very efficient mechanism to capture trace spans. Further, the trace
points inserted into the agent are generated dynamically at compile time. This
is advantageous since new versions of the agent will automatically benefit
from improvements in the tracing infrastructure. Overall, the impact of
enabling runtime and agent tracing should be extremely low.
## Agent shutdown behaviour
In normal operation, the Kata runtime manages the VM shutdown and performs
certain optimisations to speed up this process. However, if agent tracing is
enabled, the agent itself is responsible for shutting down the VM. This it to
ensure all agent trace transactions are completed. This means there will be a
small performance impact for container shutdown when agent tracing is enabled
as the runtime must wait for the VM to shutdown fully.
## Set up a tracing development environment
If you want to debug, further develop, or test tracing,
[enabling full debug][enable-full-debug]
is highly recommended. For working with the agent, you may also wish to
[enable a debug console][setup-debug-console]
to allow you to access the VM environment.
[enable-full-debug]: https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#enable-full-debug
[jaeger-all-in-one]: https://www.jaegertracing.io/docs/getting-started/
[jaeger-tracing]: https://www.jaegertracing.io
[opentelemetry]: https://opentelemetry.io
[osbuilder]: https://github.com/kata-containers/kata-containers/blob/main/tools/osbuilder
[setup-debug-console]: https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#set-up-a-debug-console
[trace-forwarder]: /src/trace-forwarder
[vsock]: https://wiki.qemu.org/Features/VirtioVsock

View File

@@ -67,7 +67,7 @@ To use large BARs devices (for example, Nvidia Tesla P100), you need Kata versio
The following configuration in the Kata `configuration.toml` file as shown below can work:
Hotplug for PCI devices by `shpchp` (Linux's SHPC PCI Hotplug driver):
Hotplug for PCI devices by `acpi_pcihp` (Linux's ACPI PCI Hotplug driver):
```
machine_type = "q35"
@@ -91,7 +91,6 @@ The following kernel config options need to be enabled:
```
# Support PCI/PCIe device hotplug (Required for large BARs device)
CONFIG_HOTPLUG_PCI_PCIE=y
CONFIG_HOTPLUG_PCI_SHPC=y
# Support for loading modules (Required for load Nvidia drivers)
CONFIG_MODULES=y

View File

@@ -1,107 +1,113 @@
# Kata Containers with SGX
Intel® Software Guard Extensions (SGX) is a set of instructions that increases the security
Intel Software Guard Extensions (SGX) is a set of instructions that increases the security
of applications code and data, giving them more protections from disclosure or modification.
> **Note:** At the time of writing this document, SGX patches have not landed on the Linux kernel
> project, so specific versions for guest and host kernels must be installed to enable SGX.
This document guides you to run containers with SGX enclaves with Kata Containers in Kubernetes.
## Check if SGX is enabled
## Preconditions
Run the following command to check if your host supports SGX.
* Intel SGX capable bare metal nodes
* Host kernel Linux 5.13 or later with SGX and SGX KVM enabled:
```sh
$ grep -o sgx /proc/cpuinfo
$ grep SGX /boot/config-`uname -r`
CONFIG_X86_SGX=y
CONFIG_X86_SGX_KVM=y
```
Continue to the following section if the output of the above command is empty,
otherwise continue to section [Install Guest kernel with SGX support](#install-guest-kernel-with-sgx-support)
* Kubernetes cluster configured with:
* [`kata-deploy`](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy) based Kata Containers installation
* [Intel SGX Kubernetes device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/cmd/sgx_plugin#deploying-with-pre-built-images)
## Install Host kernel with SGX support
> Note: Kata Containers supports creating VM sandboxes with Intel® SGX enabled
> using [cloud-hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor/) VMM only. QEMU support is waiting to get the
> Intel SGX enabled QEMU upstream release.
The following commands were tested on Fedora 32, they might work on other distros too.
## Installation
### Kata Containers Guest Kernel
Follow the instructions to [setup](../../tools/packaging/kernel/README.md#setup-kernel-source-code) and [build](../../tools/packaging/kernel/README.md#build-the-kernel) the experimental guest kernel. Then, install as:
```sh
$ git clone --depth=1 https://github.com/intel/kvm-sgx
$ pushd kvm-sgx
$ cp /boot/config-$(uname -r) .config
$ yes "" | make oldconfig
$ # In the following step, enable: INTEL_SGX and INTEL_SGX_VIRTUALIZATION
$ make menuconfig
$ make -j$(($(nproc)-1)) bzImage
$ make -j$(($(nproc)-1)) modules
$ sudo make modules_install
$ sudo make install
$ popd
$ sudo reboot
$ sudo cp kata-linux-experimental-*/vmlinux /opt/kata/share/kata-containers/vmlinux.sgx
$ sudo sed -i 's|vmlinux.container|vmlinux.sgx|g' \
/opt/kata/share/defaults/kata-containers/configuration-clh.toml
```
> **Notes:**
> * Run: `mokutil --sb-state` to check whether secure boot is enabled, if so, you will need to sign the kernel.
> * You'll lose SGX support when a new distro kernel is installed and the system rebooted.
Once you have restarted your system with the new brand Linux Kernel with SGX support, run
the following command to make sure it's enabled. If the output is empty, go to the BIOS
setup and enable SGX manually.
```sh
$ grep -o sgx /proc/cpuinfo
```
## Install Guest kernel with SGX support
Install the guest kernel in the Kata Containers directory, this way it can be used to run
Kata Containers.
```sh
$ curl -LOk https://github.com/devimc/kvm-sgx/releases/download/v0.0.1/kata-virtiofs-sgx.tar.gz
$ sudo tar -xf kata-virtiofs-sgx.tar.gz -C /usr/share/kata-containers/
$ sudo sed -i 's|kernel =|kernel = "/usr/share/kata-containers/vmlinux-virtiofs-sgx.container"|g' \
/usr/share/defaults/kata-containers/configuration.toml
```
## Run Kata Containers with SGX enabled
### Kata Containers Configuration
Before running a Kata Container make sure that your version of `crio` or `containerd`
supports annotations.
For `containerd` check in `/etc/containerd/config.toml` that the list of `pod_annotations` passed
to the `sandbox` are: `["io.katacontainers.*", "sgx.intel.com/epc"]`.
> `sgx.yaml`
## Usage
With the following sample job deployed using `kubectl apply -f`:
```yaml
apiVersion: v1
kind: Pod
apiVersion: batch/v1
kind: Job
metadata:
name: sgx
annotations:
sgx.intel.com/epc: "32Mi"
name: oesgx-demo-job
labels:
jobgroup: oesgx-demo
spec:
terminationGracePeriodSeconds: 0
runtimeClassName: kata
containers:
- name: c1
image: busybox
command:
- sh
stdin: true
tty: true
volumeMounts:
- mountPath: /dev/sgx/
name: test-volume
volumes:
- name: test-volume
hostPath:
path: /dev/sgx/
type: Directory
template:
metadata:
labels:
jobgroup: oesgx-demo
spec:
runtimeClassName: kata-clh
initContainers:
- name: init-sgx
image: busybox
command: ['sh', '-c', 'mkdir /dev/sgx; ln -s /dev/sgx_enclave /dev/sgx/enclave; ln -s /dev/sgx_provision /dev/sgx/provision']
volumeMounts:
- mountPath: /dev
name: dev-mount
restartPolicy: Never
containers:
-
name: eosgx-demo-job-1
image: oeciteam/oe-helloworld:latest
imagePullPolicy: IfNotPresent
securityContext:
readOnlyRootFilesystem: true
capabilities:
add: ["IPC_LOCK"]
resources:
limits:
sgx.intel.com/epc: "512Ki"
volumes:
- name: dev-mount
hostPath:
path: /dev
```
You'll see the enclave output:
```sh
$ kubectl apply -f sgx.yaml
$ kubectl exec -ti sgx ls /dev/sgx/
enclave provision
$ kubectl logs oesgx-demo-job-wh42g
Hello world from the enclave
Enclave called into host to print: Hello World!
```
The output of the latest command shouldn't be empty, otherwise check
your system environment to make sure SGX is fully supported.
### Notes
[1]: github.com/cloud-hypervisor/cloud-hypervisor/
* The Kata VM's SGX Encrypted Page Cache (EPC) memory size is based on the sum of `sgx.intel.com/epc`
resource requests within the pod.
* `init-sgx` can be removed from the YAML configuration file if the Kata rootfs is modified with the
necessary udev rules.
See the [note on SGX backwards compatibility](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/cmd/sgx_plugin#backwards-compatibility-note).
* Intel SGX DCAP attestation is known to work from Kata sandboxes but it comes with one limitation: If
the Intel SGX `aesm` daemon runs on the bare metal node and DCAP `out-of-proc` attestation is used,
containers within the Kata sandbox cannot get the access to the host's `/var/run/aesmd/aesm.sock`
because socket passthrough is not supported. An alternative is to deploy the `aesm` daemon as a side-car
container.
* Projects like [Gramine Shielded Containers (GSC)](https://gramine-gsc.readthedocs.io/en/latest/) are
also known to work. For GSC specifically, the Kata guest kernel needs to have the `CONFIG_NUMA=y`
enabled and at least one CPU online when running the GSC container.

View File

@@ -7,15 +7,15 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
serde_json = "1.0.39"
serde_json = "1.0.73"
# slog:
# - Dynamic keys required to allow HashMap keys to be slog::Serialized.
# - The 'max_*' features allow changing the log level at runtime
# (by stopping the compiler from removing log calls).
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
slog-json = "2.3.0"
slog-async = "2.3.0"
slog-scope = "4.1.2"
slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug"] }
slog-json = "2.4.0"
slog-async = "2.7.0"
slog-scope = "4.4.0"
[dev-dependencies]
tempfile = "3.1.0"
tempfile = "3.2.0"

View File

@@ -59,7 +59,7 @@ parts:
yq_version=3.4.1
yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}"
curl -o "${yq_path}" -LSsf "${yq_url}"
curl -o "${yq_path}" -L "${yq_url}"
chmod +x "${yq_path}"
kata_dir=gopath/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
@@ -139,7 +139,7 @@ parts:
cp kata-containers*.img ${kata_image_dir}
runtime:
after: [godeps, image]
after: [godeps, image, cloud-hypervisor]
plugin: nil
build-attributes: [no-patchelf]
override-build: |
@@ -185,6 +185,7 @@ parts:
- flex
override-build: |
yq=${SNAPCRAFT_STAGE}/yq
export PATH="${PATH}:${SNAPCRAFT_STAGE}"
export GOPATH=${SNAPCRAFT_STAGE}/gopath
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
versions_file="${kata_dir}/versions.yaml"
@@ -199,10 +200,17 @@ parts:
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
cd ${kata_dir}/tools/packaging/kernel
kernel_dir_prefix="kata-linux-"
# Setup and build kernel
./build-kernel.sh -v ${kernel_version} -d setup
kernel_dir_prefix="kata-linux-"
if [ "$(uname -m)" = "x86_64" ]; then
kernel_version="$(${yq} r $versions_file assets.kernel-experimental.tag)"
kernel_version=${kernel_version#v}
kernel_dir_prefix="kata-linux-experimental-"
./build-kernel.sh -e -v ${kernel_version} -d setup
else
./build-kernel.sh -v ${kernel_version} -d setup
fi
cd ${kernel_dir_prefix}*
make -j $(($(nproc)-1)) EXTRAVERSION=".container"
@@ -299,13 +307,13 @@ parts:
| xargs ./configure
# Copy QEMU configurations (Kconfigs)
case "$(branch)" in
case "${branch}" in
"v5.1.0")
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs
;;
*)
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* configs/devices/
;;
esac
@@ -327,6 +335,22 @@ parts:
# Hack: move qemu to /
"snap/kata-containers/current/": "./"
cloud-hypervisor:
plugin: nil
after: [godeps]
override-build: |
export GOPATH=${SNAPCRAFT_STAGE}/gopath
yq=${SNAPCRAFT_STAGE}/yq
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
versions_file="${kata_dir}/versions.yaml"
version="$(${yq} r ${versions_file} assets.hypervisor.cloud_hypervisor.version)"
url="https://github.com/cloud-hypervisor/cloud-hypervisor/releases/download/${version}"
curl -L ${url}/cloud-hypervisor-static -o cloud-hypervisor
curl -LO ${url}/clh-remote
install -D cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor
install -D clh-remote ${SNAPCRAFT_PART_INSTALL}/usr/bin/clh-remote
apps:
runtime:
command: usr/bin/kata-runtime

756
src/agent/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,6 @@ edition = "2018"
[dependencies]
oci = { path = "oci" }
logging = { path = "../../pkg/logging" }
rustjail = { path = "rustjail" }
protocols = { path = "protocols" }
lazy_static = "1.3.0"
@@ -20,11 +19,12 @@ scan_fmt = "0.2.3"
scopeguard = "1.0.0"
thiserror = "1.0.26"
regex = "1"
serial_test = "0.5.1"
# Async helpers
async-trait = "0.1.42"
async-recursion = "0.3.2"
futures = "0.3.12"
futures = "0.3.17"
# Async runtime
tokio = { version = "1", features = ["full"] }
@@ -35,21 +35,20 @@ rtnetlink = "0.8.0"
netlink-packet-utils = "0.4.1"
ipnetwork = "0.17.0"
# slog:
# - Dynamic keys required to allow HashMap keys to be slog::Serialized.
# - The 'max_*' features allow changing the log level at runtime
# (by stopping the compiler from removing log calls).
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
# Note: this crate sets the slog 'max_*' features which allows the log level
# to be modified at runtime.
logging = { path = "../../pkg/logging" }
slog = "2.5.2"
slog-scope = "4.1.2"
# Redirect ttrpc log calls
slog-stdlog = "4.0.0"
log = "0.4.11"
prometheus = { version = "0.9.0", features = ["process"] }
procfs = "0.7.9"
prometheus = { version = "0.13.0", features = ["process"] }
procfs = "0.12.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.2.5" }
cgroups = { package = "cgroups-rs", version = "0.2.8" }
# Tracing
tracing = "0.1.26"
@@ -58,6 +57,10 @@ tracing-opentelemetry = "0.13.0"
opentelemetry = { version = "0.14.0", features = ["rt-tokio-current-thread"]}
vsock-exporter = { path = "vsock-exporter" }
# Configuration
serde = { version = "1.0.129", features = ["derive"] }
toml = "0.5.8"
[dev-dependencies]
tempfile = "3.1.0"
@@ -70,3 +73,6 @@ members = [
[profile.release]
lto = true
[features]
seccomp = ["rustjail/seccomp"]

View File

@@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -27,6 +27,20 @@ COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)
# Exported to allow cargo to see it
export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION))
EXTRA_RUSTFEATURES :=
##VAR SECCOMP=yes|no define if agent enables seccomp feature
SECCOMP := yes
# Enable seccomp feature of rust build
ifeq ($(SECCOMP),yes)
override EXTRA_RUSTFEATURES += seccomp
endif
ifneq ($(EXTRA_RUSTFEATURES),)
override EXTRA_RUSTFEATURES := --features $(EXTRA_RUSTFEATURES)
endif
include ../../utils.mk
TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)
@@ -90,15 +104,14 @@ default: $(TARGET) show-header
$(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
$(TARGET_PATH): $(SOURCES) | show-summary
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
$(GENERATED_FILES): %: %.in
@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
##TARGET optimize: optimized build
optimize: $(SOURCES) | show-summary show-header
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
##TARGET clippy: run clippy linter
clippy: $(GENERATED_CODE)
@@ -127,7 +140,7 @@ vendor:
#TARGET test: run cargo tests
test:
@cargo test --all --target $(TRIPLE)
@cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture
##TARGET check: run test
check: clippy format

View File

@@ -19,6 +19,7 @@ After that, we drafted the initial code here, and any contributions are welcome.
| I/O stream | :white_check_mark: |
| Cgroups | :white_check_mark: |
| Capabilities, `rlimit`, readonly path, masked path, users | :white_check_mark: |
| Seccomp | :white_check_mark: |
| container stats (`stats_container`) | :white_check_mark: |
| Hooks | :white_check_mark: |
| **Agent Features & APIs** |

View File

@@ -5,7 +5,7 @@ authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
[dependencies]
serde = "1.0.91"
serde_derive = "1.0.91"
serde_json = "1.0.39"
libc = "0.2.58"
serde = "1.0.131"
serde_derive = "1.0.131"
serde_json = "1.0.73"
libc = "0.2.112"

View File

@@ -46,6 +46,7 @@ message Route {
string device = 3;
string source = 4;
uint32 scope = 5;
IPFamily family = 6;
}
message ARPNeighbor {

View File

@@ -23,14 +23,18 @@ scan_fmt = "0.2"
regex = "1.1"
path-absolutize = "1.2.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.2.5" }
cgroups = { package = "cgroups-rs", version = "0.2.8" }
rlimit = "0.5.3"
tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
futures = "0.3"
async-trait = "0.1.31"
inotify = "0.9.2"
libseccomp = { version = "0.1.3", optional = true }
[dev-dependencies]
serial_test = "0.5.0"
tempfile = "3.1.0"
[features]
seccomp = ["libseccomp"]

View File

@@ -25,6 +25,8 @@ use crate::cgroups::mock::Manager as FsManager;
use crate::cgroups::Manager;
use crate::log_child;
use crate::process::Process;
#[cfg(feature = "seccomp")]
use crate::seccomp;
use crate::specconv::CreateOpts;
use crate::{mount, validator};
@@ -151,7 +153,7 @@ lazy_static! {
},
LinuxDevice {
path: "/dev/full".to_string(),
r#type: String::from("c"),
r#type: "c".to_string(),
major: 1,
minor: 7,
file_mode: Some(0o666),
@@ -593,11 +595,22 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
})?;
}
// NoNewPeiviledges, Drop capabilities
// NoNewPrivileges
if oci_process.no_new_privileges {
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
}
// Without NoNewPrivileges, we need to set seccomp
// before dropping capabilities because the calling thread
// must have the CAP_SYS_ADMIN.
#[cfg(feature = "seccomp")]
if !oci_process.no_new_privileges {
if let Some(ref scmp) = linux.seccomp {
seccomp::init_seccomp(scmp)?;
}
}
// Drop capabilities
if oci_process.capabilities.is_some() {
let c = oci_process.capabilities.as_ref().unwrap();
capabilities::drop_privileges(cfd_log, c)?;
@@ -641,7 +654,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let exec_file = Path::new(&args[0]);
log_child!(cfd_log, "process command: {:?}", &args);
if !exec_file.exists() {
find_file(exec_file).ok_or_else(|| anyhow!("the file {} is not exist", &args[0]))?;
find_file(exec_file).ok_or_else(|| anyhow!("the file {} was not found", &args[0]))?;
}
// notify parent that the child's ready to start
@@ -669,6 +682,16 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
unistd::read(fd, &mut buf)?;
}
// With NoNewPrivileges, we should set seccomp as close to
// do_exec as possible in order to reduce the amount of
// system calls in the seccomp profiles.
#[cfg(feature = "seccomp")]
if oci_process.no_new_privileges {
if let Some(ref scmp) = linux.seccomp {
seccomp::init_seccomp(scmp)?;
}
}
do_exec(&args);
}
@@ -833,6 +856,20 @@ impl BaseContainer for LinuxContainer {
}
let linux = spec.linux.as_ref().unwrap();
if p.oci.capabilities.is_none() {
// No capabilities, inherit from container process
let process = spec
.process
.as_ref()
.ok_or_else(|| anyhow!("no process config"))?;
p.oci.capabilities = Some(
process
.capabilities
.clone()
.ok_or_else(|| anyhow!("missing process capabilities"))?,
);
}
let (pfd_log, cfd_log) = unistd::pipe().context("failed to create pipe")?;
let _ = fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))

View File

@@ -34,6 +34,8 @@ pub mod container;
pub mod mount;
pub mod pipestream;
pub mod process;
#[cfg(feature = "seccomp")]
pub mod seccomp;
pub mod specconv;
pub mod sync;
pub mod sync_with_async;

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, bail, Context, Result};
use anyhow::{anyhow, Context, Result};
use libc::uid_t;
use nix::errno::Errno;
use nix::fcntl::{self, OFlag};
@@ -19,7 +19,7 @@ use std::fs::{self, OpenOptions};
use std::mem::MaybeUninit;
use std::os::unix;
use std::os::unix::io::RawFd;
use std::path::{Path, PathBuf};
use std::path::{Component, Path, PathBuf};
use path_absolutize::*;
use std::fs::File;
@@ -745,7 +745,7 @@ fn mount_from(
let _ = fs::create_dir_all(&dir).map_err(|e| {
log_child!(
cfd_log,
"creat dir {}: {}",
"create dir {}: {}",
dir.to_str().unwrap(),
e.to_string()
)
@@ -828,18 +828,35 @@ fn default_symlinks() -> Result<()> {
}
Ok(())
}
fn dev_rel_path(path: &str) -> Option<&Path> {
let path = Path::new(path);
if !path.starts_with("/dev")
|| path == Path::new("/dev")
|| path.components().any(|c| c == Component::ParentDir)
{
return None;
}
path.strip_prefix("/").ok()
}
fn create_devices(devices: &[LinuxDevice], bind: bool) -> Result<()> {
let op: fn(&LinuxDevice) -> Result<()> = if bind { bind_dev } else { mknod_dev };
let op: fn(&LinuxDevice, &Path) -> Result<()> = if bind { bind_dev } else { mknod_dev };
let old = stat::umask(Mode::from_bits_truncate(0o000));
for dev in DEFAULT_DEVICES.iter() {
op(dev)?;
let path = Path::new(&dev.path[1..]);
op(dev, path).context(format!("Creating container device {:?}", dev))?;
}
for dev in devices {
if !dev.path.starts_with("/dev") || dev.path.contains("..") {
let path = dev_rel_path(&dev.path).ok_or_else(|| {
let msg = format!("{} is not a valid device path", dev.path);
bail!(anyhow!(msg));
anyhow!(msg)
})?;
if let Some(dir) = path.parent() {
fs::create_dir_all(dir).context(format!("Creating container device {:?}", dev))?;
}
op(dev)?;
op(dev, path).context(format!("Creating container device {:?}", dev))?;
}
stat::umask(old);
Ok(())
@@ -861,21 +878,21 @@ lazy_static! {
};
}
fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
fn mknod_dev(dev: &LinuxDevice, relpath: &Path) -> Result<()> {
let f = match LINUXDEVICETYPE.get(dev.r#type.as_str()) {
Some(v) => v,
None => return Err(anyhow!("invalid spec".to_string())),
};
stat::mknod(
&dev.path[1..],
relpath,
*f,
Mode::from_bits_truncate(dev.file_mode.unwrap_or(0)),
nix::sys::stat::makedev(dev.major as u64, dev.minor as u64),
)?;
unistd::chown(
&dev.path[1..],
relpath,
Some(Uid::from_raw(dev.uid.unwrap_or(0) as uid_t)),
Some(Gid::from_raw(dev.gid.unwrap_or(0) as uid_t)),
)?;
@@ -883,9 +900,9 @@ fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
Ok(())
}
fn bind_dev(dev: &LinuxDevice) -> Result<()> {
fn bind_dev(dev: &LinuxDevice, relpath: &Path) -> Result<()> {
let fd = fcntl::open(
&dev.path[1..],
relpath,
OFlag::O_RDWR | OFlag::O_CREAT,
Mode::from_bits_truncate(0o644),
)?;
@@ -894,7 +911,7 @@ fn bind_dev(dev: &LinuxDevice) -> Result<()> {
mount(
Some(&*dev.path),
&dev.path[1..],
relpath,
None::<&str>,
MsFlags::MS_BIND,
None::<&str>,
@@ -1258,11 +1275,12 @@ mod tests {
uid: Some(unistd::getuid().as_raw()),
gid: Some(unistd::getgid().as_raw()),
};
let path = Path::new("fifo");
let ret = mknod_dev(&dev);
let ret = mknod_dev(&dev, path);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
let ret = stat::stat("fifo");
let ret = stat::stat(path);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
@@ -1379,4 +1397,26 @@ mod tests {
assert!(result == t.result, "{}", msg);
}
}
#[test]
fn test_dev_rel_path() {
// Valid device paths
assert_eq!(dev_rel_path("/dev/sda").unwrap(), Path::new("dev/sda"));
assert_eq!(dev_rel_path("//dev/sda").unwrap(), Path::new("dev/sda"));
assert_eq!(
dev_rel_path("/dev/vfio/99").unwrap(),
Path::new("dev/vfio/99")
);
assert_eq!(dev_rel_path("/dev/...").unwrap(), Path::new("dev/..."));
assert_eq!(dev_rel_path("/dev/a..b").unwrap(), Path::new("dev/a..b"));
assert_eq!(dev_rel_path("/dev//foo").unwrap(), Path::new("dev/foo"));
// Bad device paths
assert!(dev_rel_path("/devfoo").is_none());
assert!(dev_rel_path("/etc/passwd").is_none());
assert!(dev_rel_path("/dev/../etc/passwd").is_none());
assert!(dev_rel_path("dev/foo").is_none());
assert!(dev_rel_path("").is_none());
assert!(dev_rel_path("/dev").is_none());
}
}

View File

@@ -24,6 +24,16 @@ use tokio::io::{split, ReadHalf, WriteHalf};
use tokio::sync::Mutex;
use tokio::sync::Notify;
macro_rules! close_process_stream {
($self: ident, $stream:ident, $stream_type: ident) => {
if $self.$stream.is_some() {
$self.close_stream(StreamType::$stream_type);
let _ = unistd::close($self.$stream.unwrap());
$self.$stream = None;
}
};
}
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub enum StreamType {
Stdin,
@@ -147,6 +157,22 @@ impl Process {
notify.notify_one();
}
pub fn close_stdin(&mut self) {
close_process_stream!(self, term_master, TermMaster);
close_process_stream!(self, parent_stdin, ParentStdin);
self.notify_term_close();
}
pub fn cleanup_process_stream(&mut self) {
close_process_stream!(self, parent_stdin, ParentStdin);
close_process_stream!(self, parent_stdout, ParentStdout);
close_process_stream!(self, parent_stderr, ParentStderr);
close_process_stream!(self, term_master, TermMaster);
self.notify_term_close();
}
fn get_fd(&self, stream_type: &StreamType) -> Option<RawFd> {
match stream_type {
StreamType::Stdin => self.stdin,

View File

@@ -0,0 +1,237 @@
// Copyright 2021 Sony Group Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Result};
use libseccomp::*;
use oci::{LinuxSeccomp, LinuxSeccompArg};
use std::str::FromStr;
fn get_filter_attr_from_flag(flag: &str) -> Result<ScmpFilterAttr> {
match flag {
"SECCOMP_FILTER_FLAG_TSYNC" => Ok(ScmpFilterAttr::CtlTsync),
"SECCOMP_FILTER_FLAG_LOG" => Ok(ScmpFilterAttr::CtlLog),
"SECCOMP_FILTER_FLAG_SPEC_ALLOW" => Ok(ScmpFilterAttr::CtlSsb),
_ => Err(anyhow!("Invalid seccomp flag")),
}
}
// get_rule_conditions gets rule conditions for a system call from the args.
fn get_rule_conditions(args: &[LinuxSeccompArg]) -> Result<Vec<ScmpArgCompare>> {
let mut conditions: Vec<ScmpArgCompare> = Vec::new();
for arg in args {
if arg.op.is_empty() {
return Err(anyhow!("seccomp opreator is required"));
}
let cond = ScmpArgCompare::new(
arg.index,
ScmpCompareOp::from_str(&arg.op)?,
arg.value,
Some(arg.value_two),
);
conditions.push(cond);
}
Ok(conditions)
}
// init_seccomp creates a seccomp filter and loads it for the current process
// including all the child processes.
pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> {
let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM as u32))?;
// Create a new filter context
let mut filter = ScmpFilterContext::new_filter(def_action)?;
// Add extra architectures
for arch in &scmp.architectures {
let scmp_arch = ScmpArch::from_str(arch)?;
filter.add_arch(scmp_arch)?;
}
// Unset no new privileges bit
filter.set_no_new_privs_bit(false)?;
// Add a rule for each system call
for syscall in &scmp.syscalls {
if syscall.names.is_empty() {
return Err(anyhow!("syscall name is required"));
}
let action = ScmpAction::from_str(&syscall.action, Some(syscall.errno_ret))?;
if action == def_action {
continue;
}
for name in &syscall.names {
let syscall_num = get_syscall_from_name(name, None)?;
if syscall.args.is_empty() {
filter.add_rule(action, syscall_num, None)?;
} else {
let conditions = get_rule_conditions(&syscall.args)?;
filter.add_rule(action, syscall_num, Some(&conditions))?;
}
}
}
// Set filter attributes for each seccomp flag
for flag in &scmp.flags {
let scmp_attr = get_filter_attr_from_flag(flag)?;
filter.set_filter_attr(scmp_attr, 1)?;
}
// Load the filter
filter.load()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::skip_if_not_root;
use libc::{dup3, process_vm_readv, EPERM, O_CLOEXEC};
use std::io::Error;
use std::ptr::null;
macro_rules! syscall_assert {
($e1: expr, $e2: expr) => {
let mut errno: i32 = 0;
if $e1 < 0 {
errno = -Error::last_os_error().raw_os_error().unwrap();
}
assert_eq!(errno, $e2);
};
}
#[test]
fn test_get_filter_attr_from_flag() {
skip_if_not_root!();
assert_eq!(
get_filter_attr_from_flag("SECCOMP_FILTER_FLAG_TSYNC").unwrap(),
ScmpFilterAttr::CtlTsync
);
assert_eq!(get_filter_attr_from_flag("ERROR").is_err(), true);
}
#[test]
fn test_init_seccomp() {
skip_if_not_root!();
let data = r#"{
"defaultAction": "SCMP_ACT_ALLOW",
"architectures": [
],
"flags": [
"SECCOMP_FILTER_FLAG_LOG"
],
"syscalls": [
{
"names": [
"dup3"
],
"action": "SCMP_ACT_ERRNO"
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 10,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 111,
"args": [
{
"index": 0,
"value": 20,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"process_vm_readv"
],
"action": "SCMP_ACT_ERRNO",
"errnoRet": 222,
"args": [
{
"index": 0,
"value": 30,
"op": "SCMP_CMP_EQ"
},
{
"index": 2,
"value": 40,
"op": "SCMP_CMP_EQ"
}
]
}
]
}"#;
let mut scmp: oci::LinuxSeccomp = serde_json::from_str(data).unwrap();
let mut arch: Vec<oci::Arch>;
if cfg!(target_endian = "little") {
// For little-endian architectures
arch = vec![
"SCMP_ARCH_X86".to_string(),
"SCMP_ARCH_X32".to_string(),
"SCMP_ARCH_X86_64".to_string(),
"SCMP_ARCH_AARCH64".to_string(),
"SCMP_ARCH_ARM".to_string(),
"SCMP_ARCH_PPC64LE".to_string(),
];
} else {
// For big-endian architectures
arch = vec!["SCMP_ARCH_S390X".to_string()];
}
scmp.architectures.append(&mut arch);
init_seccomp(&scmp).unwrap();
// Basic syscall with simple rule
syscall_assert!(unsafe { dup3(0, 1, O_CLOEXEC) }, -EPERM);
// Syscall with permitted arguments
syscall_assert!(unsafe { process_vm_readv(1, null(), 0, null(), 0, 0) }, 0);
// Multiple arguments with OR rules with ERRNO
syscall_assert!(
unsafe { process_vm_readv(10, null(), 0, null(), 0, 0) },
-111
);
syscall_assert!(
unsafe { process_vm_readv(20, null(), 0, null(), 0, 0) },
-111
);
// Multiple arguments with AND rules with ERRNO
syscall_assert!(unsafe { process_vm_readv(30, null(), 0, null(), 0, 0) }, 0);
syscall_assert!(
unsafe { process_vm_readv(30, null(), 40, null(), 0, 0) },
-222
);
}
}

View File

@@ -0,0 +1,41 @@
# This is an agent configuration file example.
dev_mode = true
server_addr = 'vsock://8:2048'
[endpoints]
# All endpoints are allowed
allowed = [
"AddARPNeighborsRequest",
"AddSwapRequest",
"CloseStdinRequest",
"CopyFileRequest",
"CreateContainerRequest",
"CreateSandboxRequest",
"DestroySandboxRequest",
"ExecProcessRequest",
"GetMetricsRequest",
"GetOOMEventRequest",
"GuestDetailsRequest",
"ListInterfacesRequest",
"ListRoutesRequest",
"MemHotplugByProbeRequest",
"OnlineCPUMemRequest",
"PauseContainerRequest",
"PullImageRequest",
"ReadStreamRequest",
"RemoveContainerRequest",
"ReseedRandomDevRequest",
"ResumeContainerRequest",
"SetGuestDateTimeRequest",
"SignalProcessRequest",
"StartContainerRequest",
"StartTracingRequest",
"StatsContainerRequest",
"StopTracingRequest",
"TtyWinResizeRequest",
"UpdateContainerRequest",
"UpdateInterfaceRequest",
"UpdateRoutesRequest",
"WaitProcessRequest",
"WriteStreamRequest"
]

View File

@@ -2,10 +2,13 @@
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::tracer;
use crate::rpc;
use anyhow::{bail, ensure, Context, Result};
use serde::Deserialize;
use std::collections::HashSet;
use std::env;
use std::fs;
use std::str::FromStr;
use std::time;
use tracing::instrument;
@@ -19,6 +22,7 @@ const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
const LOG_VPORT_OPTION: &str = "agent.log_vport";
const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size";
const UNIFIED_CGROUP_HIERARCHY_OPTION: &str = "agent.unified_cgroup_hierarchy";
const CONFIG_FILE: &str = "agent.config_file";
const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
@@ -29,7 +33,7 @@ const VSOCK_PORT: u16 = 1024;
// Environment variables used for development and testing
const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";
const TRACE_TYPE_ENV_VAR: &str = "KATA_AGENT_TRACE_TYPE";
const TRACING_ENV_VAR: &str = "KATA_AGENT_TRACING";
const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
@@ -47,6 +51,17 @@ const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container p
const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
#[derive(Debug, Default, Deserialize)]
pub struct EndpointsConfig {
pub allowed: Vec<String>,
}
#[derive(Debug, Default)]
pub struct AgentEndpoints {
pub allowed: HashSet<String>,
pub all_allowed: bool,
}
#[derive(Debug)]
pub struct AgentConfig {
pub debug_console: bool,
@@ -58,7 +73,38 @@ pub struct AgentConfig {
pub container_pipe_size: i32,
pub server_addr: String,
pub unified_cgroup_hierarchy: bool,
pub tracing: tracer::TraceType,
pub tracing: bool,
pub endpoints: AgentEndpoints,
pub supports_seccomp: bool,
}
#[derive(Debug, Deserialize)]
pub struct AgentConfigBuilder {
pub debug_console: Option<bool>,
pub dev_mode: Option<bool>,
pub log_level: Option<String>,
pub hotplug_timeout: Option<time::Duration>,
pub debug_console_vport: Option<i32>,
pub log_vport: Option<i32>,
pub container_pipe_size: Option<i32>,
pub server_addr: Option<String>,
pub unified_cgroup_hierarchy: Option<bool>,
pub tracing: Option<bool>,
pub endpoints: Option<EndpointsConfig>,
}
macro_rules! config_override {
($builder:ident, $config:ident, $field:ident) => {
if let Some(v) = $builder.$field {
$config.$field = v;
}
};
($builder:ident, $config:ident, $field:ident, $func: ident) => {
if let Some(v) = $builder.$field {
$config.$field = $func(&v)?;
}
};
}
// parse_cmdline_param parse commandline parameters.
@@ -91,8 +137,8 @@ macro_rules! parse_cmdline_param {
};
}
impl AgentConfig {
pub fn new() -> AgentConfig {
impl Default for AgentConfig {
fn default() -> Self {
AgentConfig {
debug_console: false,
dev_mode: false,
@@ -103,34 +149,84 @@ impl AgentConfig {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
tracing: false,
endpoints: Default::default(),
supports_seccomp: rpc::have_seccomp(),
}
}
}
impl FromStr for AgentConfig {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let agent_config_builder: AgentConfigBuilder =
toml::from_str(s).map_err(anyhow::Error::new)?;
let mut agent_config: AgentConfig = Default::default();
// Overwrite default values with the configuration files ones.
config_override!(agent_config_builder, agent_config, debug_console);
config_override!(agent_config_builder, agent_config, dev_mode);
config_override!(
agent_config_builder,
agent_config,
log_level,
logrus_to_slog_level
);
config_override!(agent_config_builder, agent_config, hotplug_timeout);
config_override!(agent_config_builder, agent_config, debug_console_vport);
config_override!(agent_config_builder, agent_config, log_vport);
config_override!(agent_config_builder, agent_config, container_pipe_size);
config_override!(agent_config_builder, agent_config, server_addr);
config_override!(agent_config_builder, agent_config, unified_cgroup_hierarchy);
config_override!(agent_config_builder, agent_config, tracing);
// Populate the allowed endpoints hash set, if we got any from the config file.
if let Some(endpoints) = agent_config_builder.endpoints {
for ep in endpoints.allowed {
agent_config.endpoints.allowed.insert(ep);
}
}
Ok(agent_config)
}
}
impl AgentConfig {
#[instrument]
pub fn parse_cmdline(&mut self, file: &str) -> Result<()> {
pub fn from_cmdline(file: &str) -> Result<AgentConfig> {
let mut config: AgentConfig = Default::default();
let cmdline = fs::read_to_string(file)?;
let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
for param in params.iter() {
// If we get a configuration file path from the command line, we
// generate our config from it.
// The agent will fail to start if the configuration file is not present,
// or if it can't be parsed properly.
if param.starts_with(format!("{}=", CONFIG_FILE).as_str()) {
let config_file = get_string_value(param)?;
return AgentConfig::from_config_file(&config_file);
}
// parse cmdline flags
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, config.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, config.dev_mode);
// Support "bare" tracing option for backwards compatibility with
// Kata 1.x.
if param == &TRACE_MODE_OPTION {
self.tracing = tracer::TraceType::Isolated;
config.tracing = true;
continue;
}
parse_cmdline_param!(param, TRACE_MODE_OPTION, self.tracing, get_trace_type);
parse_cmdline_param!(param, TRACE_MODE_OPTION, config.tracing, get_bool_value);
// parse cmdline options
parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
parse_cmdline_param!(param, LOG_LEVEL_OPTION, config.log_level, get_log_level);
parse_cmdline_param!(
param,
SERVER_ADDR_OPTION,
self.server_addr,
config.server_addr,
get_string_value
);
@@ -138,7 +234,7 @@ impl AgentConfig {
parse_cmdline_param!(
param,
HOTPLUG_TIMOUT_OPTION,
self.hotplug_timeout,
config.hotplug_timeout,
get_hotplug_timeout,
|hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
);
@@ -147,14 +243,14 @@ impl AgentConfig {
parse_cmdline_param!(
param,
DEBUG_CONSOLE_VPORT_OPTION,
self.debug_console_vport,
config.debug_console_vport,
get_vsock_port,
|port| port > 0
);
parse_cmdline_param!(
param,
LOG_VPORT_OPTION,
self.log_vport,
config.log_vport,
get_vsock_port,
|port| port > 0
);
@@ -162,34 +258,47 @@ impl AgentConfig {
parse_cmdline_param!(
param,
CONTAINER_PIPE_SIZE_OPTION,
self.container_pipe_size,
config.container_pipe_size,
get_container_pipe_size
);
parse_cmdline_param!(
param,
UNIFIED_CGROUP_HIERARCHY_OPTION,
self.unified_cgroup_hierarchy,
config.unified_cgroup_hierarchy,
get_bool_value
);
}
if let Ok(addr) = env::var(SERVER_ADDR_ENV_VAR) {
self.server_addr = addr;
config.server_addr = addr;
}
if let Ok(addr) = env::var(LOG_LEVEL_ENV_VAR) {
if let Ok(level) = logrus_to_slog_level(&addr) {
self.log_level = level;
config.log_level = level;
}
}
if let Ok(value) = env::var(TRACE_TYPE_ENV_VAR) {
if let Ok(result) = value.parse::<tracer::TraceType>() {
self.tracing = result;
}
if let Ok(value) = env::var(TRACING_ENV_VAR) {
let name_value = format!("{}={}", TRACING_ENV_VAR, value);
config.tracing = get_bool_value(&name_value)?;
}
Ok(())
// We did not get a configuration file: allow all endpoints.
config.endpoints.all_allowed = true;
Ok(config)
}
#[instrument]
pub fn from_config_file(file: &str) -> Result<AgentConfig> {
let config = fs::read_to_string(file)?;
AgentConfig::from_str(&config)
}
pub fn is_allowed_endpoint(&self, ep: &str) -> bool {
self.endpoints.all_allowed || self.endpoints.allowed.contains(ep)
}
}
@@ -236,25 +345,6 @@ fn get_log_level(param: &str) -> Result<slog::Level> {
logrus_to_slog_level(fields[1])
}
#[instrument]
fn get_trace_type(param: &str) -> Result<tracer::TraceType> {
ensure!(!param.is_empty(), "invalid trace type parameter");
let fields: Vec<&str> = param.split('=').collect();
ensure!(
fields[0] == TRACE_MODE_OPTION,
"invalid trace type key name"
);
if fields.len() == 1 {
return Ok(tracer::TraceType::Isolated);
}
let result = fields[1].parse::<tracer::TraceType>()?;
Ok(result)
}
#[instrument]
fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
let fields: Vec<&str> = param.split('=').collect();
@@ -339,10 +429,6 @@ mod tests {
use std::time;
use tempfile::tempdir;
const ERR_INVALID_TRACE_TYPE_PARAM: &str = "invalid trace type parameter";
const ERR_INVALID_TRACE_TYPE: &str = "invalid trace type";
const ERR_INVALID_TRACE_TYPE_KEY: &str = "invalid trace type key name";
// Parameters:
//
// 1: expected Result
@@ -371,7 +457,7 @@ mod tests {
#[test]
fn test_new() {
let config = AgentConfig::new();
let config: AgentConfig = Default::default();
assert!(!config.debug_console);
assert!(!config.dev_mode);
assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -379,7 +465,7 @@ mod tests {
}
#[test]
fn test_parse_cmdline() {
fn test_from_cmdline() {
const TEST_SERVER_ADDR: &str = "vsock://-1:1024";
#[derive(Debug)]
@@ -393,7 +479,7 @@ mod tests {
container_pipe_size: i32,
server_addr: &'a str,
unified_cgroup_hierarchy: bool,
tracing: tracer::TraceType,
tracing: bool,
}
impl Default for TestData<'_> {
@@ -408,7 +494,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
tracing: false,
}
}
}
@@ -667,64 +753,121 @@ mod tests {
},
TestData {
contents: "trace",
tracing: tracer::TraceType::Disabled,
tracing: false,
..Default::default()
},
TestData {
contents: ".trace",
tracing: tracer::TraceType::Disabled,
tracing: false,
..Default::default()
},
TestData {
contents: "agent.tracer",
tracing: tracer::TraceType::Disabled,
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trac",
tracing: tracer::TraceType::Disabled,
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trace",
tracing: tracer::TraceType::Isolated,
tracing: true,
..Default::default()
},
TestData {
contents: "agent.trace=isolated",
tracing: tracer::TraceType::Isolated,
contents: "agent.trace=true",
tracing: true,
..Default::default()
},
TestData {
contents: "agent.trace=disabled",
tracing: tracer::TraceType::Disabled,
contents: "agent.trace=false",
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trace=0",
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trace=1",
tracing: true,
..Default::default()
},
TestData {
contents: "agent.trace=a",
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trace=foo",
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trace=.",
tracing: false,
..Default::default()
},
TestData {
contents: "agent.trace=,",
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACE_TYPE=isolated"],
tracing: tracer::TraceType::Isolated,
env_vars: vec!["KATA_AGENT_TRACING="],
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACE_TYPE=disabled"],
tracing: tracer::TraceType::Disabled,
env_vars: vec!["KATA_AGENT_TRACING=''"],
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACING=0"],
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACING=."],
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACING=,"],
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACING=foo"],
tracing: false,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACING=1"],
tracing: true,
..Default::default()
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACING=true"],
tracing: true,
..Default::default()
},
];
let dir = tempdir().expect("failed to create tmpdir");
// First, check a missing file is handled
let file_path = dir.path().join("enoent");
let filename = file_path.to_str().expect("failed to create filename");
let mut config = AgentConfig::new();
let result = config.parse_cmdline(&filename.to_owned());
assert!(result.is_err());
// Now, test various combinations of file contents and environment
// variables.
for (i, d) in tests.iter().enumerate() {
@@ -753,22 +896,7 @@ mod tests {
vars_to_unset.push(name);
}
let mut config = AgentConfig::new();
assert!(!config.debug_console, "{}", msg);
assert!(!config.dev_mode, "{}", msg);
assert!(!config.unified_cgroup_hierarchy, "{}", msg);
assert_eq!(
config.hotplug_timeout,
time::Duration::from_secs(3),
"{}",
msg
);
assert_eq!(config.container_pipe_size, 0, "{}", msg);
assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
assert_eq!(config.tracing, tracer::TraceType::Disabled, "{}", msg);
let result = config.parse_cmdline(filename);
assert!(result.is_ok(), "{}", msg);
let config = AgentConfig::from_cmdline(filename).expect("Failed to parse command line");
assert_eq!(d.debug_console, config.debug_console, "{}", msg);
assert_eq!(d.dev_mode, config.dev_mode, "{}", msg);
@@ -1220,60 +1348,33 @@ Caused by:
}
#[test]
fn test_get_trace_type() {
#[derive(Debug)]
struct TestData<'a> {
param: &'a str,
result: Result<tracer::TraceType>,
}
fn test_config_builder_from_string() {
let config = AgentConfig::from_str(
r#"
dev_mode = true
server_addr = 'vsock://8:2048'
let tests = &[
TestData {
param: "",
result: Err(anyhow!(ERR_INVALID_TRACE_TYPE_PARAM)),
},
TestData {
param: "agent.tracer",
result: Err(anyhow!(ERR_INVALID_TRACE_TYPE_KEY)),
},
TestData {
param: "agent.trac",
result: Err(anyhow!(ERR_INVALID_TRACE_TYPE_KEY)),
},
TestData {
param: "agent.trace=",
result: Err(anyhow!(ERR_INVALID_TRACE_TYPE)),
},
TestData {
param: "agent.trace==",
result: Err(anyhow!(ERR_INVALID_TRACE_TYPE)),
},
TestData {
param: "agent.trace=foo",
result: Err(anyhow!(ERR_INVALID_TRACE_TYPE)),
},
TestData {
param: "agent.trace",
result: Ok(tracer::TraceType::Isolated),
},
TestData {
param: "agent.trace=isolated",
result: Ok(tracer::TraceType::Isolated),
},
TestData {
param: "agent.trace=disabled",
result: Ok(tracer::TraceType::Disabled),
},
];
[endpoints]
allowed = ["CreateContainer", "StartContainer"]
"#,
)
.unwrap();
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
// Verify that the all_allowed flag is false
assert!(!config.endpoints.all_allowed);
let result = get_trace_type(d.param);
// Verify that the override worked
assert!(config.dev_mode);
assert_eq!(config.server_addr, "vsock://8:2048");
assert_eq!(
config.endpoints.allowed,
vec!["CreateContainer".to_string(), "StartContainer".to_string()]
.iter()
.cloned()
.collect()
);
let msg = format!("{}: result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
// Verify that the default values are valid
assert_eq!(config.hotplug_timeout, DEFAULT_HOTPLUG_TIMEOUT);
}
}

View File

@@ -7,7 +7,10 @@ use libc::{c_uint, major, minor};
use nix::sys::stat;
use regex::Regex;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fmt;
use std::fs;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use std::str::FromStr;
@@ -17,10 +20,6 @@ use tokio::sync::Mutex;
#[cfg(target_arch = "s390x")]
use crate::ccw;
use crate::linux_abi::*;
use crate::mount::{
DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE,
DRIVER_SCSI_TYPE,
};
use crate::pci;
use crate::sandbox::Sandbox;
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
@@ -38,6 +37,22 @@ macro_rules! sl {
const VM_ROOTFS: &str = "/";
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
// VFIO device to be bound to a guest kernel driver
pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
// VFIO device to be bound to vfio-pci and made available inside the
// container as a VFIO device node
pub const DRIVER_VFIO_TYPE: &str = "vfio";
#[derive(Debug)]
struct DevIndexEntry {
idx: usize,
@@ -47,17 +62,89 @@ struct DevIndexEntry {
#[derive(Debug)]
struct DevIndex(HashMap<String, DevIndexEntry>);
#[instrument]
pub fn rescan_pci_bus() -> Result<()> {
online_device(SYSFS_PCI_BUS_RESCAN_FILE)
}
#[instrument]
pub fn online_device(path: &str) -> Result<()> {
fs::write(path, "1")?;
Ok(())
}
// Force a given PCI device to bind to the given driver, does
// basically the same thing as
// driverctl set-override <PCI address> <driver>
#[instrument]
pub fn pci_driver_override<T, U>(syspci: T, dev: pci::Address, drv: U) -> Result<()>
where
T: AsRef<OsStr> + std::fmt::Debug,
U: AsRef<OsStr> + std::fmt::Debug,
{
let syspci = Path::new(&syspci);
let drv = drv.as_ref();
info!(sl!(), "rebind_pci_driver: {} => {:?}", dev, drv);
let devpath = syspci.join("devices").join(dev.to_string());
let overridepath = &devpath.join("driver_override");
fs::write(overridepath, drv.as_bytes())?;
let drvpath = &devpath.join("driver");
let need_unbind = match fs::read_link(drvpath) {
Ok(d) if d.file_name() == Some(drv) => return Ok(()), // Nothing to do
Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, // No current driver
Err(e) => return Err(anyhow!("Error checking driver on {}: {}", dev, e)),
Ok(_) => true, // Current driver needs unbinding
};
if need_unbind {
let unbindpath = &drvpath.join("unbind");
fs::write(unbindpath, dev.to_string())?;
}
let probepath = syspci.join("drivers_probe");
fs::write(probepath, dev.to_string())?;
Ok(())
}
// Represents an IOMMU group
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct IommuGroup(u32);
impl fmt::Display for IommuGroup {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", self.0)
}
}
// Determine the IOMMU group of a PCI device
#[instrument]
fn pci_iommu_group<T>(syspci: T, dev: pci::Address) -> Result<Option<IommuGroup>>
where
T: AsRef<OsStr> + std::fmt::Debug,
{
let syspci = Path::new(&syspci);
let grouppath = syspci
.join("devices")
.join(dev.to_string())
.join("iommu_group");
match fs::read_link(&grouppath) {
// Device has no group
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(anyhow!("Error reading link {:?}: {}", &grouppath, e)),
Ok(group) => {
if let Some(group) = group.file_name() {
if let Some(group) = group.to_str() {
if let Ok(group) = group.parse::<u32>() {
return Ok(Some(IommuGroup(group)));
}
}
}
Err(anyhow!(
"Unexpected IOMMU group link {:?} => {:?}",
grouppath,
group
))
}
}
}
// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
// the sysfs path for the PCI host bridge, based on the PCI path
// provided.
@@ -67,7 +154,7 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<Str
let mut relpath = String::new();
for i in 0..pcipath.len() {
let bdf = format!("{}:{}.0", bus, pcipath[i]);
let bdf = format!("{}:{}", bus, pcipath[i]);
relpath = format!("{}/{}", relpath, bdf);
@@ -162,8 +249,6 @@ pub async fn get_virtio_blk_pci_device_name(
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);
rescan_pci_bus()?;
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}
@@ -255,6 +340,72 @@ pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str)
Ok(())
}
#[derive(Debug)]
struct PciMatcher {
devpath: String,
}
impl PciMatcher {
fn new(relpath: &str) -> Result<PciMatcher> {
let root_bus = create_pci_root_bus_path();
Ok(PciMatcher {
devpath: format!("{}{}", root_bus, relpath),
})
}
}
impl UeventMatcher for PciMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.devpath == self.devpath
}
}
pub async fn wait_for_pci_device(
sandbox: &Arc<Mutex<Sandbox>>,
pcipath: &pci::Path,
) -> Result<pci::Address> {
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = PciMatcher::new(&sysfs_rel_path)?;
let uev = wait_for_uevent(sandbox, matcher).await?;
let addr = uev
.devpath
.rsplit('/')
.next()
.ok_or_else(|| anyhow!("Bad device path {:?} in uevent", &uev.devpath))?;
let addr = pci::Address::from_str(addr)?;
Ok(addr)
}
#[derive(Debug)]
struct VfioMatcher {
syspath: String,
}
impl VfioMatcher {
fn new(grp: IommuGroup) -> VfioMatcher {
VfioMatcher {
syspath: format!("/devices/virtual/vfio/{}", grp),
}
}
}
impl UeventMatcher for VfioMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.devpath == self.syspath
}
}
#[instrument]
async fn get_vfio_device_name(sandbox: &Arc<Mutex<Sandbox>>, grp: IommuGroup) -> Result<String> {
let matcher = VfioMatcher::new(grp);
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
#[instrument]
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
@@ -285,24 +436,27 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
Ok(())
}
// update_spec_device_list takes a device description provided by the caller,
// trying to find it on the guest. Once this device has been identified, the
// "real" information that can be read from inside the VM is used to update
// the same device in the list of devices provided through the OCI spec.
// This is needed to update information about minor/major numbers that cannot
// be predicted from the caller.
// update_spec_device updates the device list in the OCI spec to make
// it include details appropriate for the VM, instead of the host. It
// is given the host path to the device (to locate the device in the
// original OCI spec) and the VM path which it uses to determine the
// VM major/minor numbers, and the final path with which to present
// the device in the (inner) container
#[instrument]
fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> {
fn update_spec_device(
spec: &mut Spec,
devidx: &DevIndex,
host_path: &str,
vm_path: &str,
final_path: &str,
) -> Result<()> {
let major_id: c_uint;
let minor_id: c_uint;
// If no container_path is provided, we won't be able to match and
// update the device in the OCI spec device list. This is an error.
if device.container_path.is_empty() {
return Err(anyhow!(
"container_path cannot empty for device {:?}",
device
));
if host_path.is_empty() {
return Err(anyhow!("Host path cannot empty for device"));
}
let linux = spec
@@ -310,11 +464,11 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't container linux field"))?;
if !Path::new(&device.vm_path).exists() {
return Err(anyhow!("vm_path:{} doesn't exist", device.vm_path));
if !Path::new(vm_path).exists() {
return Err(anyhow!("vm_path:{} doesn't exist", vm_path));
}
let meta = fs::metadata(&device.vm_path)?;
let meta = fs::metadata(vm_path)?;
let dev_id = meta.rdev();
unsafe {
major_id = major(dev_id);
@@ -323,24 +477,27 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)
info!(
sl!(),
"got the device: dev_path: {}, major: {}, minor: {}\n", &device.vm_path, major_id, minor_id
"update_spec_device(): vm_path={}, major: {}, minor: {}\n", vm_path, major_id, minor_id
);
if let Some(idxdata) = devidx.0.get(device.container_path.as_str()) {
if let Some(idxdata) = devidx.0.get(host_path) {
let dev = &mut linux.devices[idxdata.idx];
let host_major = dev.major;
let host_minor = dev.minor;
dev.major = major_id as i64;
dev.minor = minor_id as i64;
dev.path = final_path.to_string();
info!(
sl!(),
"change the device from major: {} minor: {} to vm device major: {} minor: {}",
"change the device from path: {} major: {} minor: {} to vm device path: {} major: {} minor: {}",
host_path,
host_major,
host_minor,
major_id,
minor_id
dev.path,
dev.major,
dev.minor,
);
// Resources must be updated since they are used to identify
@@ -361,7 +518,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)
} else {
Err(anyhow!(
"Should have found a matching device {} in the spec",
device.vm_path
vm_path
))
}
}
@@ -379,7 +536,13 @@ async fn virtiommio_blk_device_handler(
return Err(anyhow!("Invalid path for virtio mmio blk device"));
}
update_spec_device_list(device, spec, devidx)
update_spec_device(
spec,
devidx,
&device.container_path,
&device.vm_path,
&device.container_path,
)
}
// device.Id should be a PCI path string
@@ -395,7 +558,13 @@ async fn virtio_blk_device_handler(
dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
update_spec_device_list(&dev, spec, devidx)
update_spec_device(
spec,
devidx,
&dev.container_path,
&dev.vm_path,
&dev.container_path,
)
}
// device.id should be a CCW path string
@@ -410,7 +579,13 @@ async fn virtio_blk_ccw_device_handler(
let mut dev = device.clone();
let ccw_device = ccw::Device::from_str(&device.id)?;
dev.vm_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?;
update_spec_device_list(&dev, spec, devidx)
update_spec_device(
spec,
devidx,
&dev.container_path,
&dev.vm_path,
&dev.container_path,
)
}
#[cfg(not(target_arch = "s390x"))]
@@ -434,7 +609,13 @@ async fn virtio_scsi_device_handler(
) -> Result<()> {
let mut dev = device.clone();
dev.vm_path = get_scsi_device_name(sandbox, &device.id).await?;
update_spec_device_list(&dev, spec, devidx)
update_spec_device(
spec,
devidx,
&dev.container_path,
&dev.vm_path,
&dev.container_path,
)
}
#[instrument]
@@ -448,7 +629,79 @@ async fn virtio_nvdimm_device_handler(
return Err(anyhow!("Invalid path for nvdimm device"));
}
update_spec_device_list(device, spec, devidx)
update_spec_device(
spec,
devidx,
&device.container_path,
&device.vm_path,
&device.container_path,
)
}
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
let mut tokens = opt.split('=');
let hostbdf = tokens.next()?;
let path = tokens.next()?;
if tokens.next().is_some() {
None
} else {
Some((hostbdf, path))
}
}
// device.options should have one entry for each PCI device in the VFIO group
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
// DDDD:BB:DD.F is the device's PCI address in the host
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
async fn vfio_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let vfio_in_guest = device.field_type != DRIVER_VFIO_GK_TYPE;
let mut group = None;
for opt in device.options.iter() {
let (_, pcipath) =
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
let pcipath = pci::Path::from_str(pcipath)?;
let guestdev = wait_for_pci_device(sandbox, &pcipath).await?;
if vfio_in_guest {
pci_driver_override(SYSFS_BUS_PCI_PATH, guestdev, "vfio-pci")?;
let devgroup = pci_iommu_group(SYSFS_BUS_PCI_PATH, guestdev)?;
if devgroup.is_none() {
// Devices must have an IOMMU group to be usable via VFIO
return Err(anyhow!("{} has no IOMMU group", guestdev));
}
if group.is_some() && group != devgroup {
// If PCI devices associated with the same VFIO device
// (and therefore group) in the host don't end up in
// the same group in the guest, something has gone
// horribly wrong
return Err(anyhow!(
"{} is not in guest IOMMU group {}",
guestdev,
group.unwrap()
));
}
group = devgroup;
}
}
if vfio_in_guest {
// If there are any devices at all, logic above ensures that group is not None
let group = group.unwrap();
let vmpath = get_vfio_device_name(sandbox, group).await?;
update_spec_device(spec, devidx, &device.container_path, &vmpath, &vmpath)?;
}
Ok(())
}
impl DevIndex {
@@ -520,6 +773,9 @@ async fn add_device(
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => {
vfio_device_handler(device, spec, sandbox, devidx).await
}
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
}
}
@@ -584,28 +840,28 @@ mod tests {
}
#[test]
fn test_update_spec_device_list() {
fn test_update_spec_device() {
let (major, minor) = (7, 2);
let mut device = Device::default();
let mut spec = Spec::default();
// container_path empty
let container_path = "";
let vm_path = "";
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_err());
device.container_path = "/dev/null".to_string();
// linux is empty
let container_path = "/dev/null";
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_err());
spec.linux = Some(Linux::default());
// linux.devices is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_err());
spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice {
@@ -617,26 +873,32 @@ mod tests {
// vm_path empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_err());
device.vm_path = "/dev/null".to_string();
let vm_path = "/dev/null";
// guest and host path are not the same
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_err(), "device={:?} spec={:?}", device, spec);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(
res.is_err(),
"container_path={:?} vm_path={:?} spec={:?}",
container_path,
vm_path,
spec
);
spec.linux.as_mut().unwrap().devices[0].path = device.container_path.clone();
spec.linux.as_mut().unwrap().devices[0].path = container_path.to_string();
// spec.linux.resources is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_ok());
// update both devices and cgroup lists
spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice {
path: device.container_path.clone(),
path: container_path.to_string(),
major,
minor,
..oci::LinuxDevice::default()
@@ -652,12 +914,12 @@ mod tests {
});
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_ok());
}
#[test]
fn test_update_spec_device_list_guest_host_conflict() {
fn test_update_spec_device_guest_host_conflict() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let zero_rdev = fs::metadata("/dev/zero").unwrap().rdev();
let full_rdev = fs::metadata("/dev/full").unwrap().rdev();
@@ -708,20 +970,14 @@ mod tests {
};
let devidx = DevIndex::new(&spec);
let dev_a = Device {
container_path: "/dev/a".to_string(),
vm_path: "/dev/zero".to_string(),
..Device::default()
};
let container_path_a = "/dev/a";
let vm_path_a = "/dev/zero";
let guest_major_a = stat::major(zero_rdev) as i64;
let guest_minor_a = stat::minor(zero_rdev) as i64;
let dev_b = Device {
container_path: "/dev/b".to_string(),
vm_path: "/dev/full".to_string(),
..Device::default()
};
let container_path_b = "/dev/b";
let vm_path_b = "/dev/full";
let guest_major_b = stat::major(full_rdev) as i64;
let guest_minor_b = stat::minor(full_rdev) as i64;
@@ -738,7 +994,13 @@ mod tests {
assert_eq!(Some(host_major_b), specresources.devices[1].major);
assert_eq!(Some(host_minor_b), specresources.devices[1].minor);
let res = update_spec_device_list(&dev_a, &mut spec, &devidx);
let res = update_spec_device(
&mut spec,
&devidx,
container_path_a,
vm_path_a,
container_path_a,
);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
@@ -753,7 +1015,13 @@ mod tests {
assert_eq!(Some(host_major_b), specresources.devices[1].major);
assert_eq!(Some(host_minor_b), specresources.devices[1].minor);
let res = update_spec_device_list(&dev_b, &mut spec, &devidx);
let res = update_spec_device(
&mut spec,
&devidx,
container_path_b,
vm_path_b,
container_path_b,
);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
@@ -770,7 +1038,7 @@ mod tests {
}
#[test]
fn test_update_spec_device_list_char_block_conflict() {
fn test_update_spec_device_char_block_conflict() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let guest_major = stat::major(null_rdev) as i64;
@@ -819,11 +1087,8 @@ mod tests {
};
let devidx = DevIndex::new(&spec);
let dev = Device {
container_path: "/dev/char".to_string(),
vm_path: "/dev/null".to_string(),
..Device::default()
};
let container_path = "/dev/char";
let vm_path = "/dev/null";
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(host_major), specresources.devices[0].major);
@@ -831,7 +1096,7 @@ mod tests {
assert_eq!(Some(host_major), specresources.devices[1].major);
assert_eq!(Some(host_minor), specresources.devices[1].minor);
let res = update_spec_device_list(&dev, &mut spec, &devidx);
let res = update_spec_device(&mut spec, &devidx, container_path, vm_path, container_path);
assert!(res.is_ok());
// Only the char device, not the block device should be updated
@@ -842,6 +1107,43 @@ mod tests {
assert_eq!(Some(host_minor), specresources.devices[1].minor);
}
#[test]
fn test_update_spec_device_final_path() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let guest_major = stat::major(null_rdev) as i64;
let guest_minor = stat::minor(null_rdev) as i64;
let host_path = "/dev/host";
let host_major: i64 = 99;
let host_minor: i64 = 99;
let mut spec = Spec {
linux: Some(Linux {
devices: vec![oci::LinuxDevice {
path: host_path.to_string(),
r#type: "c".to_string(),
major: host_major,
minor: host_minor,
..oci::LinuxDevice::default()
}],
..Linux::default()
}),
..Spec::default()
};
let devidx = DevIndex::new(&spec);
let vm_path = "/dev/null";
let final_path = "/dev/final";
let res = update_spec_device(&mut spec, &devidx, host_path, vm_path, final_path);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
assert_eq!(guest_major, specdevices[0].major);
assert_eq!(guest_minor, specdevices[0].minor);
assert_eq!(final_path, specdevices[0].path);
}
#[test]
fn test_pcipath_to_sysfs() {
let testdir = tempdir().expect("failed to create tmpdir");
@@ -1068,4 +1370,112 @@ mod tests {
assert!(!matcher_b.is_match(&uev_a));
assert!(!matcher_a.is_match(&uev_b));
}
#[tokio::test]
async fn test_vfio_matcher() {
let grpa = IommuGroup(1);
let grpb = IommuGroup(22);
let mut uev_a = crate::uevent::Uevent::default();
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.devname = format!("vfio/{}", grpa);
uev_a.devpath = format!("/devices/virtual/vfio/{}", grpa);
let matcher_a = VfioMatcher::new(grpa);
let mut uev_b = uev_a.clone();
uev_b.devpath = format!("/devices/virtual/vfio/{}", grpb);
let matcher_b = VfioMatcher::new(grpb);
assert!(matcher_a.is_match(&uev_a));
assert!(matcher_b.is_match(&uev_b));
assert!(!matcher_b.is_match(&uev_a));
assert!(!matcher_a.is_match(&uev_b));
}
#[test]
fn test_split_vfio_option() {
assert_eq!(
split_vfio_option("0000:01:00.0=02/01"),
Some(("0000:01:00.0", "02/01"))
);
assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None);
assert_eq!(split_vfio_option("0000:01:00.0"), None);
}
#[test]
fn test_pci_driver_override() {
let testdir = tempdir().expect("failed to create tmpdir");
let syspci = testdir.path(); // Path to mock /sys/bus/pci
let dev0 = pci::Address::new(0, 0, pci::SlotFn::new(0, 0).unwrap());
let dev0path = syspci.join("devices").join(dev0.to_string());
let dev0drv = dev0path.join("driver");
let dev0override = dev0path.join("driver_override");
let drvapath = syspci.join("drivers").join("drv_a");
let drvaunbind = drvapath.join("unbind");
let probepath = syspci.join("drivers_probe");
// Start mocking dev0 as being unbound
fs::create_dir_all(&dev0path).unwrap();
pci_driver_override(syspci, dev0, "drv_a").unwrap();
assert_eq!(fs::read_to_string(&dev0override).unwrap(), "drv_a");
assert_eq!(fs::read_to_string(&probepath).unwrap(), dev0.to_string());
// Now mock dev0 already being attached to drv_a
fs::create_dir_all(&drvapath).unwrap();
std::os::unix::fs::symlink(&drvapath, dev0drv).unwrap();
std::fs::remove_file(&probepath).unwrap();
pci_driver_override(syspci, dev0, "drv_a").unwrap(); // no-op
assert_eq!(fs::read_to_string(&dev0override).unwrap(), "drv_a");
assert!(!probepath.exists());
// Now try binding to a different driver
pci_driver_override(syspci, dev0, "drv_b").unwrap();
assert_eq!(fs::read_to_string(&dev0override).unwrap(), "drv_b");
assert_eq!(fs::read_to_string(&probepath).unwrap(), dev0.to_string());
assert_eq!(fs::read_to_string(&drvaunbind).unwrap(), dev0.to_string());
}
#[test]
fn test_pci_iommu_group() {
let testdir = tempdir().expect("failed to create tmpdir"); // mock /sys
let syspci = testdir.path().join("bus").join("pci");
// Mock dev0, which has no group
let dev0 = pci::Address::new(0, 0, pci::SlotFn::new(0, 0).unwrap());
let dev0path = syspci.join("devices").join(dev0.to_string());
fs::create_dir_all(&dev0path).unwrap();
// Test dev0
assert!(pci_iommu_group(&syspci, dev0).unwrap().is_none());
// Mock dev1, which is in group 12
let dev1 = pci::Address::new(0, 1, pci::SlotFn::new(0, 0).unwrap());
let dev1path = syspci.join("devices").join(dev1.to_string());
let dev1group = dev1path.join("iommu_group");
fs::create_dir_all(&dev1path).unwrap();
std::os::unix::fs::symlink("../../../kernel/iommu_groups/12", &dev1group).unwrap();
// Test dev1
assert_eq!(
pci_iommu_group(&syspci, dev1).unwrap(),
Some(IommuGroup(12))
);
// Mock dev2, which has a bogus group (dir instead of symlink)
let dev2 = pci::Address::new(0, 2, pci::SlotFn::new(0, 0).unwrap());
let dev2path = syspci.join("devices").join(dev2.to_string());
let dev2group = dev2path.join("iommu_group");
fs::create_dir_all(&dev2group).unwrap();
// Test dev2
assert!(pci_iommu_group(&syspci, dev2).is_err());
}
}

View File

@@ -9,7 +9,6 @@
use std::fs;
pub const SYSFS_DIR: &str = "/sys";
pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
#[cfg(any(
target_arch = "powerpc64",
target_arch = "s390x",
@@ -84,6 +83,8 @@ pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory";
pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host";
pub const SYSFS_BUS_PCI_PATH: &str = "/sys/bus/pci";
pub const SYSFS_CGROUPPATH: &str = "/sys/fs/cgroup";
pub const SYSFS_ONLINE_FILE: &str = "online";
@@ -95,6 +96,7 @@ pub const SYSTEM_DEV_PATH: &str = "/dev";
// Linux UEvent related consts.
pub const U_EVENT_ACTION: &str = "ACTION";
pub const U_EVENT_ACTION_ADD: &str = "add";
pub const U_EVENT_ACTION_REMOVE: &str = "remove";
pub const U_EVENT_DEV_PATH: &str = "DEVPATH";
pub const U_EVENT_SUB_SYSTEM: &str = "SUBSYSTEM";
pub const U_EVENT_SEQ_NUM: &str = "SEQNUM";

View File

@@ -77,11 +77,11 @@ mod rpc;
mod tracer;
const NAME: &str = "kata-agent";
const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
lazy_static! {
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
Arc::new(RwLock::new(config::AgentConfig::new()));
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> = Arc::new(RwLock::new(
AgentConfig::from_cmdline("/proc/cmdline").unwrap()
));
}
#[instrument]
@@ -134,15 +134,11 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
console::initialize();
lazy_static::initialize(&AGENT_CONFIG);
// support vsock log
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let (shutdown_tx, shutdown_rx) = channel(true);
let agent_config = AGENT_CONFIG.clone();
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
// dup a new file descriptor for this temporary logger writer,
@@ -163,20 +159,15 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
e
})?;
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
lazy_static::initialize(&AGENT_CONFIG);
init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
init_agent_as_init(&logger, AGENT_CONFIG.read().await.unified_cgroup_hierarchy)?;
drop(logger_async_guard);
} else {
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
lazy_static::initialize(&AGENT_CONFIG);
}
let config = agent_config.read().await;
let config = AGENT_CONFIG.read().await;
let log_vport = config.log_vport as u32;
let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
@@ -205,16 +196,16 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
}
if config.tracing != tracer::TraceType::Disabled {
let _ = tracer::setup_tracing(NAME, &logger, &config)?;
if config.tracing {
tracer::setup_tracing(NAME, &logger)?;
}
let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
let root_span = span!(tracing::Level::TRACE, "root-span");
// XXX: Start the root trace transaction.
//
// XXX: Note that *ALL* spans needs to start after this point!!
let _enter = root.enter();
let span_guard = root_span.enter();
// Start the sandbox and wait for its ttRPC server to end
start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
@@ -238,19 +229,29 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
// Wait for all threads to finish
let results = join_all(tasks).await;
for result in results {
if let Err(e) = result {
return Err(anyhow!(e).into());
}
}
// force flushing spans
drop(span_guard);
drop(root_span);
if config.tracing != tracer::TraceType::Disabled {
if config.tracing {
tracer::end_tracing();
}
eprintln!("{} shutdown complete", NAME);
Ok(())
let mut wait_errors: Vec<tokio::task::JoinError> = vec![];
for result in results {
if let Err(e) = result {
eprintln!("wait task error: {:#?}", e);
wait_errors.push(e);
}
}
if wait_errors.is_empty() {
Ok(())
} else {
Err(anyhow!("wait all tasks failed: {:#?}", wait_errors).into())
}
}
fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {

View File

@@ -23,50 +23,50 @@ macro_rules! sl {
lazy_static! {
static ref AGENT_SCRAPE_COUNT: IntCounter =
prometheus::register_int_counter!(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count").as_ref(), "Metrics scrape count").unwrap();
prometheus::register_int_counter!(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count"), "Metrics scrape count").unwrap();
static ref AGENT_THREADS: Gauge =
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads").as_ref(), "Agent process threads").unwrap();
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads"), "Agent process threads").unwrap();
static ref AGENT_TOTAL_TIME: Gauge =
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time").as_ref(), "Agent process total time").unwrap();
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time"), "Agent process total time").unwrap();
static ref AGENT_TOTAL_VM: Gauge =
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm").as_ref(), "Agent process total VM size").unwrap();
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm"), "Agent process total VM size").unwrap();
static ref AGENT_TOTAL_RSS: Gauge =
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss").as_ref(), "Agent process total RSS size").unwrap();
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss"), "Agent process total RSS size").unwrap();
static ref AGENT_PROC_STATUS: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status").as_ref(), "Agent process status.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status"), "Agent process status.", &["item"]).unwrap();
static ref AGENT_IO_STAT: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat").as_ref(), "Agent process IO statistics.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat"), "Agent process IO statistics.", &["item"]).unwrap();
static ref AGENT_PROC_STAT: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat").as_ref(), "Agent process statistics.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat"), "Agent process statistics.", &["item"]).unwrap();
// guest os metrics
static ref GUEST_LOAD: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"load").as_ref() , "Guest system load.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"load") , "Guest system load.", &["item"]).unwrap();
static ref GUEST_TASKS: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks").as_ref() , "Guest system load.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks") , "Guest system load.", &["item"]).unwrap();
static ref GUEST_CPU_TIME: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time").as_ref() , "Guest CPU statistics.", &["cpu","item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time") , "Guest CPU statistics.", &["cpu","item"]).unwrap();
static ref GUEST_VM_STAT: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat").as_ref() , "Guest virtual memory statistics.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat") , "Guest virtual memory statistics.", &["item"]).unwrap();
static ref GUEST_NETDEV_STAT: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat").as_ref() , "Guest net devices statistics.", &["interface","item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat") , "Guest net devices statistics.", &["interface","item"]).unwrap();
static ref GUEST_DISKSTAT: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat").as_ref() , "Disks statistics in system.", &["disk","item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat") , "Disks statistics in system.", &["disk","item"]).unwrap();
static ref GUEST_MEMINFO: GaugeVec =
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo").as_ref() , "Statistics about memory usage in the system.", &["item"]).unwrap();
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo") , "Statistics about memory usage in the system.", &["item"]).unwrap();
}
#[instrument]
@@ -348,17 +348,17 @@ fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procf
gv.with_label_values(&[cpu, "idle"])
.set(cpu_time.idle as f64);
gv.with_label_values(&[cpu, "iowait"])
.set(cpu_time.iowait.unwrap_or(0.0) as f64);
.set(cpu_time.iowait.unwrap_or(0) as f64);
gv.with_label_values(&[cpu, "irq"])
.set(cpu_time.irq.unwrap_or(0.0) as f64);
.set(cpu_time.irq.unwrap_or(0) as f64);
gv.with_label_values(&[cpu, "softirq"])
.set(cpu_time.softirq.unwrap_or(0.0) as f64);
.set(cpu_time.softirq.unwrap_or(0) as f64);
gv.with_label_values(&[cpu, "steal"])
.set(cpu_time.steal.unwrap_or(0.0) as f64);
.set(cpu_time.steal.unwrap_or(0) as f64);
gv.with_label_values(&[cpu, "guest"])
.set(cpu_time.guest.unwrap_or(0.0) as f64);
.set(cpu_time.guest.unwrap_or(0) as f64);
gv.with_label_values(&[cpu, "guest_nice"])
.set(cpu_time.guest_nice.unwrap_or(0.0) as f64);
.set(cpu_time.guest_nice.unwrap_or(0) as f64);
}
#[instrument]
@@ -470,7 +470,7 @@ fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process
gv.with_label_values(&["vmswap"])
.set(status.vmswap.unwrap_or(0) as f64);
gv.with_label_values(&["hugetlbpages"])
.set(status.hugetblpages.unwrap_or(0) as f64);
.set(status.hugetlbpages.unwrap_or(0) as f64);
gv.with_label_values(&["voluntary_ctxt_switches"])
.set(status.voluntary_ctxt_switches.unwrap_or(0) as f64);
gv.with_label_values(&["nonvoluntary_ctxt_switches"])

View File

@@ -4,28 +4,27 @@
//
use std::collections::HashMap;
use std::ffi::CString;
use std::fs;
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader};
use std::iter;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::Path;
use std::ptr::null;
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::Mutex;
use libc::{c_void, mount};
use nix::mount::{self, MsFlags};
use nix::mount::MsFlags;
use nix::unistd::Gid;
use regex::Regex;
use crate::device::{
get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
DRIVER_9P_TYPE, DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE,
DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE,
DRIVER_WATCHABLE_BIND_TYPE,
};
use crate::linux_abi::*;
use crate::pci;
@@ -37,17 +36,6 @@ use anyhow::{anyhow, Context, Result};
use slog::Logger;
use tracing::instrument;
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
pub const TYPE_ROOTFS: &str = "rootfs";
pub const MOUNT_GUEST_TAG: &str = "kataShared";
@@ -149,96 +137,53 @@ pub const STORAGE_HANDLER_LIST: &[&str] = &[
DRIVER_WATCHABLE_BIND_TYPE,
];
#[derive(Debug, Clone)]
pub struct BareMount<'a> {
source: &'a str,
destination: &'a str,
fs_type: &'a str,
#[instrument]
pub fn baremount(
source: &str,
destination: &str,
fs_type: &str,
flags: MsFlags,
options: &'a str,
logger: Logger,
}
options: &str,
logger: &Logger,
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "baremount"));
// mount mounts a source in to a destination. This will do some bookkeeping:
// * evaluate all symlinks
// * ensure the source exists
impl<'a> BareMount<'a> {
#[instrument]
pub fn new(
s: &'a str,
d: &'a str,
fs_type: &'a str,
flags: MsFlags,
options: &'a str,
logger: &Logger,
) -> Self {
BareMount {
source: s,
destination: d,
fs_type,
flags,
options,
logger: logger.new(o!("subsystem" => "baremount")),
}
if source.is_empty() {
return Err(anyhow!("need mount source"));
}
#[instrument]
pub fn mount(&self) -> Result<()> {
let source;
let dest;
let fs_type;
let mut options = null();
let cstr_options: CString;
let cstr_source: CString;
let cstr_dest: CString;
let cstr_fs_type: CString;
if self.source.is_empty() {
return Err(anyhow!("need mount source"));
}
if self.destination.is_empty() {
return Err(anyhow!("need mount destination"));
}
cstr_source = CString::new(self.source)?;
source = cstr_source.as_ptr();
cstr_dest = CString::new(self.destination)?;
dest = cstr_dest.as_ptr();
if self.fs_type.is_empty() {
return Err(anyhow!("need mount FS type"));
}
cstr_fs_type = CString::new(self.fs_type)?;
fs_type = cstr_fs_type.as_ptr();
if !self.options.is_empty() {
cstr_options = CString::new(self.options)?;
options = cstr_options.as_ptr() as *const c_void;
}
info!(
self.logger,
"mount source={:?}, dest={:?}, fs_type={:?}, options={:?}",
self.source,
self.destination,
self.fs_type,
self.options
);
let rc = unsafe { mount(source, dest, fs_type, self.flags.bits(), options) };
if rc < 0 {
return Err(anyhow!(
"failed to mount {:?} to {:?}, with error: {}",
self.source,
self.destination,
io::Error::last_os_error()
));
}
Ok(())
if destination.is_empty() {
return Err(anyhow!("need mount destination"));
}
if fs_type.is_empty() {
return Err(anyhow!("need mount FS type"));
}
info!(
logger,
"mount source={:?}, dest={:?}, fs_type={:?}, options={:?}",
source,
destination,
fs_type,
options
);
nix::mount::mount(
Some(source),
destination,
Some(fs_type),
flags,
Some(options),
)
.map_err(|e| {
anyhow!(
"failed to mount {:?} to {:?}, with error: {}",
source,
destination,
e
)
})
}
#[instrument]
@@ -460,14 +405,18 @@ async fn bind_watcher_storage_handler(
logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
cid: Option<String>,
) -> Result<()> {
let mut locked = sandbox.lock().await;
let container_id = locked.id.clone();
locked
.bind_watcher
.add_container(container_id, iter::once(storage.clone()), logger)
.await
if let Some(cid) = cid {
locked
.bind_watcher
.add_container(cid, iter::once(storage.clone()), logger)
.await
} else {
Ok(())
}
}
// mount_storage performs the mount described by the storage structure.
@@ -486,17 +435,14 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
return Ok(());
}
match storage.fstype.as_str() {
DRIVER_9P_TYPE | DRIVER_VIRTIOFS_TYPE => {
let dest_path = Path::new(storage.mount_point.as_str());
if !dest_path.exists() {
fs::create_dir_all(dest_path).context("Create mount destination failed")?;
}
}
_ => {
ensure_destination_exists(storage.mount_point.as_str(), storage.fstype.as_str())?;
}
let mount_path = Path::new(&storage.mount_point);
let src_path = Path::new(&storage.source);
if storage.fstype == "bind" && !src_path.is_dir() {
ensure_destination_file_exists(mount_path)
} else {
fs::create_dir_all(mount_path).map_err(anyhow::Error::from)
}
.context("Could not create mountpoint")?;
let options_vec = storage.options.to_vec();
let options_vec = options_vec.iter().map(String::as_str).collect();
@@ -509,16 +455,14 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
"mount-options" => options.as_str(),
);
let bare_mount = BareMount::new(
baremount(
storage.source.as_str(),
storage.mount_point.as_str(),
storage.fstype.as_str(),
flags,
options.as_str(),
&logger,
);
bare_mount.mount()
)
}
/// Looks for `mount_point` entry in the /proc/mounts.
@@ -578,6 +522,7 @@ pub async fn add_storages(
logger: Logger,
storages: Vec<Storage>,
sandbox: Arc<Mutex<Sandbox>>,
cid: Option<String>,
) -> Result<Vec<String>> {
let mut mount_list = Vec::new();
@@ -608,7 +553,8 @@ pub async fn add_storages(
}
DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await,
DRIVER_WATCHABLE_BIND_TYPE => {
bind_watcher_storage_handler(&logger, &storage, sandbox.clone()).await?;
bind_watcher_storage_handler(&logger, &storage, sandbox.clone(), cid.clone())
.await?;
// Don't register watch mounts, they're handled separately by the watcher.
Ok(String::new())
}
@@ -637,11 +583,9 @@ fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
let (flags, options) = parse_mount_flags_and_options(options_vec);
let bare_mount = BareMount::new(m.src, m.dest, m.fstype, flags, options.as_str(), logger);
fs::create_dir_all(Path::new(m.dest)).context("could not create directory")?;
bare_mount.mount().or_else(|e| {
baremount(m.src, m.dest, m.fstype, flags, &options, logger).or_else(|e| {
if m.src != "dev" {
return Err(e);
}
@@ -816,32 +760,27 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
#[instrument]
pub fn remove_mounts(mounts: &[String]) -> Result<()> {
for m in mounts.iter() {
mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
nix::mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
}
Ok(())
}
// ensure_destination_exists will recursively create a given mountpoint. If directories
// are created, their permissions are initialized to mountPerm(0755)
#[instrument]
fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
let d = Path::new(destination);
if d.exists() {
fn ensure_destination_file_exists(path: &Path) -> Result<()> {
if path.is_file() {
return Ok(());
}
let dir = d
.parent()
.ok_or_else(|| anyhow!("mount destination {} doesn't exist", destination))?;
if !dir.exists() {
fs::create_dir_all(dir).context(format!("create dir all {:?}", dir))?;
} else if path.exists() {
return Err(anyhow!("{:?} exists but is not a regular file", path));
}
if fs_type != "bind" || d.is_dir() {
fs::create_dir_all(d).context(format!("create dir all {:?}", d))?;
} else {
fs::File::create(d).context(format!("create file {:?}", d))?;
}
// The only way parent() can return None is if the path is /,
// which always exists, so the test above will already have caught
// it, thus the unwrap() is safe
let dir = path.parent().unwrap();
fs::create_dir_all(dir).context(format!("create_dir_all {:?}", dir))?;
fs::File::create(path).context(format!("create empty file {:?}", path))?;
Ok(())
}
@@ -865,8 +804,6 @@ fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
mod tests {
use super::*;
use crate::{skip_if_not_root, skip_loop_if_not_root, skip_loop_if_root};
use libc::umount;
use std::fs::metadata;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Write;
@@ -1006,7 +943,7 @@ mod tests {
std::fs::create_dir_all(d).expect("failed to created directory");
}
let bare_mount = BareMount::new(
let result = baremount(
&src_filename,
&dest_filename,
d.fs_type,
@@ -1015,25 +952,13 @@ mod tests {
&logger,
);
let result = bare_mount.mount();
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
// Cleanup
unsafe {
let cstr_dest =
CString::new(dest_filename).expect("failed to convert dest to cstring");
let umount_dest = cstr_dest.as_ptr();
let ret = umount(umount_dest);
let msg = format!("{}: umount result: {:?}", msg, result);
assert!(ret == 0, "{}", msg);
};
nix::mount::umount(dest_filename.as_str()).unwrap();
continue;
}
@@ -1103,7 +1028,7 @@ mod tests {
}
// Create an actual mount
let bare_mount = BareMount::new(
let result = baremount(
mnt_src_filename,
mnt_dest_filename,
"bind",
@@ -1111,8 +1036,6 @@ mod tests {
"",
&logger,
);
let result = bare_mount.mount();
assert!(result.is_ok(), "mount for test setup failed");
let tests = &[
@@ -1444,37 +1367,20 @@ mod tests {
}
#[test]
fn test_ensure_destination_exists() {
fn test_ensure_destination_file_exists() {
let dir = tempdir().expect("failed to create tmpdir");
let mut testfile = dir.into_path();
testfile.push("testfile");
let result = ensure_destination_exists(testfile.to_str().unwrap(), "bind");
let result = ensure_destination_file_exists(&testfile);
assert!(result.is_ok());
assert!(testfile.exists());
let result = ensure_destination_exists(testfile.to_str().unwrap(), "bind");
let result = ensure_destination_file_exists(&testfile);
assert!(result.is_ok());
let meta = metadata(testfile).unwrap();
assert!(meta.is_file());
let dir = tempdir().expect("failed to create tmpdir");
let mut testdir = dir.into_path();
testdir.push("testdir");
let result = ensure_destination_exists(testdir.to_str().unwrap(), "ext4");
assert!(result.is_ok());
assert!(testdir.exists());
let result = ensure_destination_exists(testdir.to_str().unwrap(), "ext4");
assert!(result.is_ok());
//let meta = metadata(testdir.to_str().unwrap()).unwrap();
let meta = metadata(testdir).unwrap();
assert!(meta.is_dir());
assert!(testfile.is_file());
}
}

View File

@@ -13,7 +13,7 @@ use std::fs::File;
use std::path::{Path, PathBuf};
use tracing::instrument;
use crate::mount::{BareMount, FLAGS};
use crate::mount::{baremount, FLAGS};
use slog::Logger;
const PERSISTENT_NS_DIR: &str = "/var/run/sandbox-ns";
@@ -129,8 +129,7 @@ impl Namespace {
}
};
let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
bare_mount.mount().map_err(|e| {
baremount(source, destination, "none", flags, "", &logger).map_err(|e| {
anyhow!(
"Failed to mount {} to {} with err:{:?}",
source,

View File

@@ -6,6 +6,7 @@
use anyhow::{anyhow, Context, Result};
use futures::{future, StreamExt, TryStreamExt};
use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network};
use nix::errno::Errno;
use protobuf::RepeatedField;
use protocols::types::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
use rtnetlink::{new_connection, packet, IpVersion};
@@ -312,7 +313,6 @@ impl Handle {
for route in list {
let link = self.find_link(LinkFilter::Name(&route.device)).await?;
let is_v6 = is_ipv6(route.get_gateway()) || is_ipv6(route.get_dest());
const MAIN_TABLE: u8 = packet::constants::RT_TABLE_MAIN;
const UNICAST: u8 = packet::constants::RTN_UNICAST;
@@ -334,7 +334,7 @@ impl Handle {
// `rtnetlink` offers a separate request builders for different IP versions (IP v4 and v6).
// This if branch is a bit clumsy because it does almost the same.
if is_v6 {
if route.get_family() == IPFamily::v6 {
let dest_addr = if !route.dest.is_empty() {
Ipv6Network::from_str(&route.dest)?
} else {
@@ -364,14 +364,17 @@ impl Handle {
request = request.gateway(ip);
}
request.execute().await.with_context(|| {
format!(
"Failed to add IP v6 route (src: {}, dst: {}, gtw: {})",
route.get_source(),
route.get_dest(),
route.get_gateway()
)
})?;
if let Err(rtnetlink::Error::NetlinkError(message)) = request.execute().await {
if Errno::from_i32(message.code.abs()) != Errno::EEXIST {
return Err(anyhow!(
"Failed to add IP v6 route (src: {}, dst: {}, gtw: {},Err: {})",
route.get_source(),
route.get_dest(),
route.get_gateway(),
message
));
}
}
} else {
let dest_addr = if !route.dest.is_empty() {
Ipv4Network::from_str(&route.dest)?
@@ -402,7 +405,17 @@ impl Handle {
request = request.gateway(ip);
}
request.execute().await?;
if let Err(rtnetlink::Error::NetlinkError(message)) = request.execute().await {
if Errno::from_i32(message.code.abs()) != Errno::EEXIST {
return Err(anyhow!(
"Failed to add IP v4 route (src: {}, dst: {}, gtw: {},Err: {})",
route.get_source(),
route.get_dest(),
route.get_gateway(),
message
));
}
}
}
}
@@ -594,10 +607,6 @@ fn format_address(data: &[u8]) -> Result<String> {
}
}
fn is_ipv6(str: &str) -> bool {
Ipv6Addr::from_str(str).is_ok()
}
fn parse_mac_address(addr: &str) -> Result<[u8; 6]> {
let mut split = addr.splitn(6, ':');
@@ -932,16 +941,6 @@ mod tests {
assert_eq!(bytes, [0xAB, 0x0C, 0xDE, 0x12, 0x34, 0x56]);
}
#[test]
fn check_ipv6() {
assert!(is_ipv6("::1"));
assert!(is_ipv6("2001:0:3238:DFE1:63::FEFB"));
assert!(!is_ipv6(""));
assert!(!is_ipv6("127.0.0.1"));
assert!(!is_ipv6("10.10.10.10"));
}
fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
// ip link delete dummy
Command::new("ip")

View File

@@ -9,51 +9,143 @@ use std::str::FromStr;
use anyhow::anyhow;
// The PCI spec reserves 5 bits for slot number (a.k.a. device
// number), giving slots 0..31
// The PCI spec reserves 5 bits (0..31) for slot number (a.k.a. device
// number)
const SLOT_BITS: u8 = 5;
const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
// Represents a PCI function's slot number (a.k.a. device number),
// giving its location on a single bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Slot(u8);
// The PCI spec reserves 3 bits (0..7) for function number
const FUNCTION_BITS: u8 = 3;
const FUNCTION_MAX: u8 = (1 << FUNCTION_BITS) - 1;
impl Slot {
pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
if let Ok(v8) = v.try_into() {
if v8 <= SLOT_MAX {
return Ok(Slot(v8));
// Represents a PCI function's slot (a.k.a. device) and function
// numbers, giving its location on a single logical bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SlotFn(u8);
impl SlotFn {
pub fn new<T, U>(ss: T, f: U) -> anyhow::Result<Self>
where
T: TryInto<u8> + fmt::Display + Copy,
U: TryInto<u8> + fmt::Display + Copy,
{
let ss8 = match ss.try_into() {
Ok(ss8) if ss8 <= SLOT_MAX => ss8,
_ => {
return Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
ss,
SLOT_MAX
));
}
}
Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
v,
SLOT_MAX
))
};
let f8 = match f.try_into() {
Ok(f8) if f8 <= FUNCTION_MAX => f8,
_ => {
return Err(anyhow!(
"PCI function {} should be in range [0..{:#x}]",
f,
FUNCTION_MAX
));
}
};
Ok(SlotFn(ss8 << FUNCTION_BITS | f8))
}
pub fn slot(self) -> u8 {
self.0 >> FUNCTION_BITS
}
pub fn function(self) -> u8 {
self.0 & FUNCTION_MAX
}
}
impl FromStr for Slot {
impl FromStr for SlotFn {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let v = isize::from_str_radix(s, 16)?;
Slot::new(v)
let mut tokens = s.split('.').fuse();
let slot = tokens.next();
let func = tokens.next();
if slot.is_none() || tokens.next().is_some() {
return Err(anyhow!(
"PCI slot/function {} should have the format SS.F",
s
));
}
let slot = isize::from_str_radix(slot.unwrap(), 16)?;
let func = match func {
Some(func) => isize::from_str_radix(func, 16)?,
None => 0,
};
SlotFn::new(slot, func)
}
}
impl fmt::Display for Slot {
impl fmt::Display for SlotFn {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:02x}", self.0)
write!(f, "{:02x}.{:01x}", self.slot(), self.function())
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Address {
domain: u16,
bus: u8,
slotfn: SlotFn,
}
impl Address {
pub fn new(domain: u16, bus: u8, slotfn: SlotFn) -> Self {
Address {
domain,
bus,
slotfn,
}
}
}
impl FromStr for Address {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let mut tokens = s.split(':').fuse();
let domain = tokens.next();
let bus = tokens.next();
let slotfn = tokens.next();
if domain.is_none() || bus.is_none() || slotfn.is_none() || tokens.next().is_some() {
return Err(anyhow!(
"PCI address {} should have the format DDDD:BB:SS.F",
s
));
}
let domain = u16::from_str_radix(domain.unwrap(), 16)?;
let bus = u8::from_str_radix(bus.unwrap(), 16)?;
let slotfn = SlotFn::from_str(slotfn.unwrap())?;
Ok(Address::new(domain, bus, slotfn))
}
}
impl fmt::Display for Address {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:04x}:{:02x}:{}", self.domain, self.bus, self.slotfn)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path(Vec<Slot>);
pub struct Path(Vec<SlotFn>);
impl Path {
pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
pub fn new(slots: Vec<SlotFn>) -> anyhow::Result<Self> {
if slots.is_empty() {
return Err(anyhow!("PCI path must have at least one element"));
}
@@ -63,7 +155,7 @@ impl Path {
// Let Path be treated as a slice of Slots
impl Deref for Path {
type Target = [Slot];
type Target = [SlotFn];
fn deref(&self) -> &Self::Target {
&self.0
@@ -85,83 +177,170 @@ impl FromStr for Path {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
let rslots: anyhow::Result<Vec<SlotFn>> = s.split('/').map(SlotFn::from_str).collect();
Path::new(rslots?)
}
}
#[cfg(test)]
mod tests {
use crate::pci::{Path, Slot};
use super::*;
use std::str::FromStr;
#[test]
fn test_slot() {
fn test_slotfn() {
// Valid slots
let slot = Slot::new(0x00).unwrap();
assert_eq!(format!("{}", slot), "00");
let sf = SlotFn::new(0x00, 0x0).unwrap();
assert_eq!(format!("{}", sf), "00.0");
let slot = Slot::from_str("00").unwrap();
assert_eq!(format!("{}", slot), "00");
let sf = SlotFn::from_str("00.0").unwrap();
assert_eq!(format!("{}", sf), "00.0");
let slot = Slot::new(31).unwrap();
let slot2 = Slot::from_str("1f").unwrap();
assert_eq!(slot, slot2);
let sf = SlotFn::from_str("00").unwrap();
assert_eq!(format!("{}", sf), "00.0");
let sf = SlotFn::new(31, 7).unwrap();
let sf2 = SlotFn::from_str("1f.7").unwrap();
assert_eq!(sf, sf2);
// Bad slots
let slot = Slot::new(-1);
assert!(slot.is_err());
let sf = SlotFn::new(-1, 0);
assert!(sf.is_err());
let slot = Slot::new(32);
assert!(slot.is_err());
let sf = SlotFn::new(32, 0);
assert!(sf.is_err());
let slot = Slot::from_str("20");
assert!(slot.is_err());
let sf = SlotFn::from_str("20.0");
assert!(sf.is_err());
let slot = Slot::from_str("xy");
assert!(slot.is_err());
let sf = SlotFn::from_str("20");
assert!(sf.is_err());
let slot = Slot::from_str("00/");
assert!(slot.is_err());
let sf = SlotFn::from_str("xy.0");
assert!(sf.is_err());
let slot = Slot::from_str("");
assert!(slot.is_err());
let sf = SlotFn::from_str("xy");
assert!(sf.is_err());
// Bad functions
let sf = SlotFn::new(0, -1);
assert!(sf.is_err());
let sf = SlotFn::new(0, 8);
assert!(sf.is_err());
let sf = SlotFn::from_str("00.8");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.x");
assert!(sf.is_err());
// Bad formats
let sf = SlotFn::from_str("");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.0.0");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.0/");
assert!(sf.is_err());
let sf = SlotFn::from_str("00/");
assert!(sf.is_err());
}
#[test]
fn test_address() {
// Valid addresses
let sf0_0 = SlotFn::new(0, 0).unwrap();
let sf1f_7 = SlotFn::new(0x1f, 7).unwrap();
let addr = Address::new(0, 0, sf0_0);
assert_eq!(format!("{}", addr), "0000:00:00.0");
let addr2 = Address::from_str("0000:00:00.0").unwrap();
assert_eq!(addr, addr2);
let addr = Address::new(0xffff, 0xff, sf1f_7);
assert_eq!(format!("{}", addr), "ffff:ff:1f.7");
let addr2 = Address::from_str("ffff:ff:1f.7").unwrap();
assert_eq!(addr, addr2);
// Bad addresses
let addr = Address::from_str("10000:00:00.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:100:00.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:20.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:00.8");
assert!(addr.is_err());
let addr = Address::from_str("xyz");
assert!(addr.is_err());
let addr = Address::from_str("xyxy:xy:xy.z");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:00.0:00");
assert!(addr.is_err());
}
#[test]
fn test_path() {
let slot3 = Slot::new(0x03).unwrap();
let slot4 = Slot::new(0x04).unwrap();
let slot5 = Slot::new(0x05).unwrap();
let sf3_0 = SlotFn::new(0x03, 0).unwrap();
let sf4_0 = SlotFn::new(0x04, 0).unwrap();
let sf5_0 = SlotFn::new(0x05, 0).unwrap();
let sfa_5 = SlotFn::new(0x0a, 5).unwrap();
let sfb_6 = SlotFn::new(0x0b, 6).unwrap();
let sfc_7 = SlotFn::new(0x0c, 7).unwrap();
// Valid paths
let pcipath = Path::new(vec![slot3]).unwrap();
assert_eq!(format!("{}", pcipath), "03");
let pcipath = Path::new(vec![sf3_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0");
let pcipath2 = Path::from_str("03.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 1);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[0], sf3_0);
let pcipath = Path::new(vec![slot3, slot4]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04");
let pcipath = Path::new(vec![sf3_0, sf4_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0/04.0");
let pcipath2 = Path::from_str("03.0/04.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 2);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[0], sf3_0);
assert_eq!(pcipath[1], sf4_0);
let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04/05");
let pcipath = Path::new(vec![sf3_0, sf4_0, sf5_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0/04.0/05.0");
let pcipath2 = Path::from_str("03.0/04.0/05.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04/05").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[2], slot5);
assert_eq!(pcipath[0], sf3_0);
assert_eq!(pcipath[1], sf4_0);
assert_eq!(pcipath[2], sf5_0);
let pcipath = Path::new(vec![sfa_5, sfb_6, sfc_7]).unwrap();
assert_eq!(format!("{}", pcipath), "0a.5/0b.6/0c.7");
let pcipath2 = Path::from_str("0a.5/0b.6/0c.7").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], sfa_5);
assert_eq!(pcipath[1], sfb_6);
assert_eq!(pcipath[2], sfc_7);
// Bad paths
assert!(Path::new(vec!()).is_err());
assert!(Path::from_str("20").is_err());
assert!(Path::from_str("00.8").is_err());
assert!(Path::from_str("//").is_err());
assert!(Path::from_str("xyz").is_err());
}

View File

@@ -3,7 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::pci;
use async_trait::async_trait;
use rustjail::{pipestream::PipeStream, process::StreamType};
use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf};
@@ -21,7 +20,7 @@ use ttrpc::{
use anyhow::{anyhow, Context, Result};
use oci::{LinuxNamespace, Root, Spec};
use protobuf::{RepeatedField, SingularPtrField};
use protobuf::{Message, RepeatedField, SingularPtrField};
use protocols::agent::{
AddSwapRequest, AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, Metrics,
OOMEvent, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse,
@@ -44,12 +43,13 @@ use nix::sys::stat;
use nix::unistd::{self, Pid};
use rustjail::process::ProcessOperations;
use crate::device::{add_devices, pcipath_to_sysfs, rescan_pci_bus, update_device_cgroup};
use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_device_cgroup};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::{add_storages, remove_mounts, BareMount, STORAGE_HANDLER_LIST};
use crate::mount::{add_storages, baremount, remove_mounts, STORAGE_HANDLER_LIST};
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
use crate::network::setup_guest_dns;
use crate::pci;
use crate::random;
use crate::sandbox::Sandbox;
use crate::version::{AGENT_VERSION, API_VERSION};
@@ -86,6 +86,21 @@ macro_rules! sl {
};
}
macro_rules! is_allowed {
($req:ident) => {
if !AGENT_CONFIG
.read()
.await
.is_allowed_endpoint($req.descriptor().name())
{
return Err(ttrpc_error(
ttrpc::Code::UNIMPLEMENTED,
format!("{} is blocked", $req.descriptor().name()),
));
}
};
}
#[derive(Clone, Debug)]
pub struct AgentService {
sandbox: Arc<Mutex<Sandbox>>,
@@ -133,10 +148,10 @@ impl AgentService {
};
info!(sl!(), "receive createcontainer, spec: {:?}", &oci);
// re-scan PCI bus
// looking for hidden devices
rescan_pci_bus().context("Could not rescan PCI bus")?;
info!(
sl!(),
"receive createcontainer, storages: {:?}", &req.storages
);
// Some devices need some extra processing (the ones invoked with
// --device for instance), and that's what this call is doing. It
@@ -152,7 +167,13 @@ impl AgentService {
// After all those storages have been processed, no matter the order
// here, the agent will rely on rustjail (using the oci.Mounts
// list) to bind mount all of them inside the container.
let m = add_storages(sl!(), req.storages.to_vec(), self.sandbox.clone()).await?;
let m = add_storages(
sl!(),
req.storages.to_vec(),
self.sandbox.clone(),
Some(req.container_id.clone()),
)
.await?;
{
sandbox = self.sandbox.clone();
s = sandbox.lock().await;
@@ -422,7 +443,7 @@ impl AgentService {
.get_container(&cid)
.ok_or_else(|| anyhow!("Invalid container id"))?;
let mut p = match ctr.processes.get_mut(&pid) {
let p = match ctr.processes.get_mut(&pid) {
Some(p) => p,
None => {
// Lost race, pick up exit code from channel
@@ -433,7 +454,7 @@ impl AgentService {
// need to close all fd
// ignore errors for some fd might be closed by stream
let _ = cleanup_process(&mut p);
p.cleanup_process_stream();
resp.status = p.exit_code;
// broadcast exit code to all parallel watchers
@@ -535,6 +556,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CreateContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "create_container", req);
is_allowed!(req);
match self.do_create_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -547,6 +569,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StartContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "start_container", req);
is_allowed!(req);
match self.do_start_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -559,6 +582,8 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::RemoveContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "remove_container", req);
is_allowed!(req);
match self.do_remove_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -571,6 +596,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ExecProcessRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "exec_process", req);
is_allowed!(req);
match self.do_exec_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -583,6 +609,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::SignalProcessRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "signal_process", req);
is_allowed!(req);
match self.do_signal_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -595,6 +622,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::WaitProcessRequest,
) -> ttrpc::Result<WaitProcessResponse> {
trace_rpc_call!(ctx, "wait_process", req);
is_allowed!(req);
self.do_wait_process(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -606,6 +634,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "update_container", req);
is_allowed!(req);
let cid = req.container_id.clone();
let res = req.resources;
@@ -641,6 +670,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StatsContainerRequest,
) -> ttrpc::Result<StatsContainerResponse> {
trace_rpc_call!(ctx, "stats_container", req);
is_allowed!(req);
let cid = req.container_id;
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -662,6 +692,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::PauseContainerRequest,
) -> ttrpc::Result<protocols::empty::Empty> {
trace_rpc_call!(ctx, "pause_container", req);
is_allowed!(req);
let cid = req.get_container_id();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -685,6 +716,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ResumeContainerRequest,
) -> ttrpc::Result<protocols::empty::Empty> {
trace_rpc_call!(ctx, "resume_container", req);
is_allowed!(req);
let cid = req.get_container_id();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -707,6 +739,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::WriteStreamRequest,
) -> ttrpc::Result<WriteStreamResponse> {
is_allowed!(req);
self.do_write_stream(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -717,6 +750,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::ReadStreamRequest,
) -> ttrpc::Result<ReadStreamResponse> {
is_allowed!(req);
self.do_read_stream(req, true)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -727,6 +761,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::ReadStreamRequest,
) -> ttrpc::Result<ReadStreamResponse> {
is_allowed!(req);
self.do_read_stream(req, false)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -738,6 +773,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CloseStdinRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "close_stdin", req);
is_allowed!(req);
let cid = req.container_id.clone();
let eid = req.exec_id;
@@ -751,19 +787,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
)
})?;
if p.term_master.is_some() {
p.close_stream(StreamType::TermMaster);
let _ = unistd::close(p.term_master.unwrap());
p.term_master = None;
}
if p.parent_stdin.is_some() {
p.close_stream(StreamType::ParentStdin);
let _ = unistd::close(p.parent_stdin.unwrap());
p.parent_stdin = None;
}
p.notify_term_close();
p.close_stdin();
Ok(Empty::new())
}
@@ -774,6 +798,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::TtyWinResizeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "tty_win_resize", req);
is_allowed!(req);
let cid = req.container_id.clone();
let eid = req.exec_id.clone();
@@ -814,6 +839,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateInterfaceRequest,
) -> ttrpc::Result<Interface> {
trace_rpc_call!(ctx, "update_interface", req);
is_allowed!(req);
let interface = req.interface.into_option().ok_or_else(|| {
ttrpc_error(
@@ -841,6 +867,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateRoutesRequest,
) -> ttrpc::Result<Routes> {
trace_rpc_call!(ctx, "update_routes", req);
is_allowed!(req);
let new_routes = req
.routes
@@ -881,6 +908,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ListInterfacesRequest,
) -> ttrpc::Result<Interfaces> {
trace_rpc_call!(ctx, "list_interfaces", req);
is_allowed!(req);
let list = self
.sandbox
@@ -908,6 +936,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ListRoutesRequest,
) -> ttrpc::Result<Routes> {
trace_rpc_call!(ctx, "list_routes", req);
is_allowed!(req);
let list = self
.sandbox
@@ -930,14 +959,16 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StartTracingRequest,
) -> ttrpc::Result<Empty> {
info!(sl!(), "start_tracing {:?}", req);
is_allowed!(req);
Ok(Empty::new())
}
async fn stop_tracing(
&self,
_ctx: &TtrpcContext,
_req: protocols::agent::StopTracingRequest,
req: protocols::agent::StopTracingRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
Ok(Empty::new())
}
@@ -947,6 +978,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CreateSandboxRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "create_sandbox", req);
is_allowed!(req);
{
let sandbox = self.sandbox.clone();
@@ -981,7 +1013,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
}
match add_storages(sl!(), req.storages.to_vec(), self.sandbox.clone()).await {
match add_storages(sl!(), req.storages.to_vec(), self.sandbox.clone(), None).await {
Ok(m) => {
let sandbox = self.sandbox.clone();
let mut s = sandbox.lock().await;
@@ -1012,6 +1044,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::DestroySandboxRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "destroy_sandbox", req);
is_allowed!(req);
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -1033,6 +1066,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::AddARPNeighborsRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "add_arp_neighbors", req);
is_allowed!(req);
let neighs = req
.neighbors
@@ -1066,6 +1100,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
ctx: &TtrpcContext,
req: protocols::agent::OnlineCPUMemRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
let s = Arc::clone(&self.sandbox);
let sandbox = s.lock().await;
trace_rpc_call!(ctx, "online_cpu_mem", req);
@@ -1083,6 +1118,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ReseedRandomDevRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "reseed_random_dev", req);
is_allowed!(req);
random::reseed_rng(req.data.as_slice())
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1096,6 +1132,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::GuestDetailsRequest,
) -> ttrpc::Result<GuestDetailsResponse> {
trace_rpc_call!(ctx, "get_guest_details", req);
is_allowed!(req);
info!(sl!(), "get guest details!");
let mut resp = GuestDetailsResponse::new();
@@ -1124,6 +1161,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::MemHotplugByProbeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "mem_hotplug_by_probe", req);
is_allowed!(req);
do_mem_hotplug_by_probe(&req.memHotplugProbeAddr)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1137,6 +1175,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::SetGuestDateTimeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "set_guest_date_time", req);
is_allowed!(req);
do_set_guest_date_time(req.Sec, req.Usec)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1150,6 +1189,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CopyFileRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "copy_file", req);
is_allowed!(req);
do_copy_file(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1162,6 +1202,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::GetMetricsRequest,
) -> ttrpc::Result<Metrics> {
trace_rpc_call!(ctx, "get_metrics", req);
is_allowed!(req);
match get_metrics(&req) {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
@@ -1176,8 +1217,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
async fn get_oom_event(
&self,
_ctx: &TtrpcContext,
_req: protocols::agent::GetOOMEventRequest,
req: protocols::agent::GetOOMEventRequest,
) -> ttrpc::Result<OOMEvent> {
is_allowed!(req);
let sandbox = self.sandbox.clone();
let s = sandbox.lock().await;
let event_rx = &s.event_rx.clone();
@@ -1203,8 +1245,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::AddSwapRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "add_swap", req);
is_allowed!(req);
do_add_swap(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
do_add_swap(&self.sandbox, &req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
Ok(Empty::new())
}
@@ -1288,11 +1333,19 @@ fn get_memory_info(block_size: bool, hotplug: bool) -> Result<(u64, bool)> {
Ok((size, plug))
}
pub fn have_seccomp() -> bool {
if cfg!(feature = "seccomp") {
return true;
}
false
}
fn get_agent_details() -> AgentDetails {
let mut detail = AgentDetails::new();
detail.set_version(AGENT_VERSION.to_string());
detail.set_supports_seccomp(false);
detail.set_supports_seccomp(have_seccomp());
detail.init_daemon = unistd::getpid() == Pid::from_raw(1);
detail.device_handlers = RepeatedField::new();
@@ -1557,43 +1610,13 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
Ok(())
}
pub fn path_name_lookup<P: std::clone::Clone + AsRef<Path> + std::fmt::Debug>(
path: P,
lookup: &str,
) -> Result<(PathBuf, String)> {
for entry in fs::read_dir(path.clone())? {
let entry = entry?;
if let Some(name) = entry.path().file_name() {
if let Some(name) = name.to_str() {
if Some(0) == name.find(lookup) {
return Ok((entry.path(), name.to_string()));
}
}
}
}
Err(anyhow!("cannot get {} dir in {:?}", lookup, path))
}
fn do_add_swap(req: &AddSwapRequest) -> Result<()> {
// re-scan PCI bus
// looking for hidden devices
rescan_pci_bus().context("Could not rescan PCI bus")?;
async fn do_add_swap(sandbox: &Arc<Mutex<Sandbox>>, req: &AddSwapRequest) -> Result<()> {
let mut slots = Vec::new();
for slot in &req.PCIPath {
slots.push(pci::Slot::new(*slot as u8)?);
slots.push(pci::SlotFn::new(*slot, 0)?);
}
let pcipath = pci::Path::new(slots)?;
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = format!(
"{}{}",
root_bus_sysfs,
pcipath_to_sysfs(&root_bus_sysfs, &pcipath)?
);
let (mut virtio_path, _) = path_name_lookup(sysfs_rel_path, "virtio")?;
virtio_path.push("block");
let (_, dev_name) = path_name_lookup(virtio_path, "vd")?;
let dev_name = format!("/dev/{}", dev_name);
let dev_name = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
let c_str = CString::new(dev_name)?;
let ret = unsafe { libc::swapon(c_str.as_ptr() as *const c_char, 0) };
@@ -1624,25 +1647,19 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
let rootfs_path = bundle_path.join("rootfs");
fs::create_dir_all(&rootfs_path)?;
BareMount::new(
baremount(
&spec_root.path,
rootfs_path.to_str().unwrap(),
"bind",
MsFlags::MS_BIND,
"",
&sl!(),
)
.mount()?;
)?;
spec.root = Some(Root {
path: rootfs_path.to_str().unwrap().to_owned(),
readonly: spec_root.readonly,
});
info!(
sl!(),
"{:?}",
spec.process.as_ref().unwrap().console_size.as_ref()
);
let _ = spec.save(config_path.to_str().unwrap());
let olddir = unistd::getcwd().context("cannot getcwd")?;
@@ -1651,37 +1668,6 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
Ok(olddir)
}
fn cleanup_process(p: &mut Process) -> Result<()> {
if p.parent_stdin.is_some() {
p.close_stream(StreamType::ParentStdin);
unistd::close(p.parent_stdin.unwrap())?;
}
if p.parent_stdout.is_some() {
p.close_stream(StreamType::ParentStdout);
unistd::close(p.parent_stdout.unwrap())?;
}
if p.parent_stderr.is_some() {
p.close_stream(StreamType::ParentStderr);
unistd::close(p.parent_stderr.unwrap())?;
}
if p.term_master.is_some() {
p.close_stream(StreamType::TermMaster);
unistd::close(p.term_master.unwrap())?;
}
p.notify_term_close();
p.parent_stdin = None;
p.parent_stdout = None;
p.parent_stderr = None;
p.term_master = None;
Ok(())
}
fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> {
if module.name.is_empty() {
return Err(anyhow!("Kernel module name is empty"));
@@ -1734,6 +1720,7 @@ mod tests {
fd: -1,
mh: MessageHeader::default(),
metadata: std::collections::HashMap::new(),
timeout_nano: 0,
}
}

View File

@@ -449,7 +449,7 @@ fn online_memory(logger: &Logger) -> Result<()> {
#[cfg(test)]
mod tests {
use super::Sandbox;
use crate::{mount::BareMount, skip_if_not_root};
use crate::{mount::baremount, skip_if_not_root};
use anyhow::Error;
use nix::mount::MsFlags;
use oci::{Linux, Root, Spec};
@@ -461,11 +461,13 @@ mod tests {
use tempfile::Builder;
fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> {
let baremount = BareMount::new(src, dst, "bind", MsFlags::MS_BIND, "", logger);
baremount.mount()
baremount(src, dst, "bind", MsFlags::MS_BIND, "", logger)
}
use serial_test::serial;
#[tokio::test]
#[serial]
async fn set_sandbox_storage() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -500,6 +502,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn remove_sandbox_storage() {
skip_if_not_root!();
@@ -556,6 +559,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn unset_and_remove_sandbox_storage() {
skip_if_not_root!();
@@ -607,6 +611,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn unset_sandbox_storage() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -690,6 +695,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn get_container_entry_exist() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
@@ -703,6 +709,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn get_container_no_entry() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -712,6 +719,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn add_and_get_container() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
@@ -723,6 +731,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn update_shared_pidns() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
@@ -741,6 +750,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn add_guest_hooks() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -764,6 +774,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn test_sandbox_set_destroy() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();

View File

@@ -3,60 +3,17 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::config::AgentConfig;
use anyhow::Result;
use opentelemetry::sdk::propagation::TraceContextPropagator;
use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
use slog::{info, o, Logger};
use std::collections::HashMap;
use std::error::Error;
use std::fmt;
use std::str::FromStr;
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Registry;
use ttrpc::r#async::TtrpcContext;
#[derive(Debug, PartialEq)]
pub enum TraceType {
Disabled,
Isolated,
}
#[derive(Debug)]
pub struct TraceTypeError {
details: String,
}
impl TraceTypeError {
fn new(msg: &str) -> TraceTypeError {
TraceTypeError {
details: msg.into(),
}
}
}
impl Error for TraceTypeError {}
impl fmt::Display for TraceTypeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
impl FromStr for TraceType {
type Err = TraceTypeError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"isolated" => Ok(TraceType::Isolated),
"disabled" => Ok(TraceType::Disabled),
_ => Err(TraceTypeError::new("invalid trace type")),
}
}
}
pub fn setup_tracing(name: &'static str, logger: &Logger, _agent_cfg: &AgentConfig) -> Result<()> {
pub fn setup_tracing(name: &'static str, logger: &Logger) -> Result<()> {
let logger = logger.new(o!("subsystem" => "vsock-tracer"));
let exporter = vsock_exporter::Exporter::builder()

View File

@@ -97,10 +97,18 @@ impl Uevent {
})
}
#[instrument]
async fn process_remove(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
let mut sb = sandbox.lock().await;
sb.uevent_map.remove(&self.devpath);
}
#[instrument]
async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
if self.action == U_EVENT_ACTION_ADD {
return self.process_add(logger, sandbox).await;
} else if self.action == U_EVENT_ACTION_REMOVE {
return self.process_remove(logger, sandbox).await;
}
debug!(*logger, "ignoring event"; "uevent" => format!("{:?}", self));
}
@@ -111,10 +119,13 @@ pub async fn wait_for_uevent(
sandbox: &Arc<Mutex<Sandbox>>,
matcher: impl UeventMatcher,
) -> Result<Uevent> {
let logprefix = format!("Waiting for {:?}", &matcher);
info!(sl!(), "{}", logprefix);
let mut sb = sandbox.lock().await;
for uev in sb.uevent_map.values() {
if matcher.is_match(uev) {
info!(sl!(), "Device {:?} found in device map", uev);
info!(sl!(), "{}: found {:?} in uevent map", logprefix, &uev);
return Ok(uev.clone());
}
}
@@ -129,7 +140,8 @@ pub async fn wait_for_uevent(
sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
drop(sb); // unlock
info!(sl!(), "Waiting on channel for uevent notification\n");
info!(sl!(), "{}: waiting on channel", logprefix);
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
@@ -146,6 +158,7 @@ pub async fn wait_for_uevent(
}
};
info!(sl!(), "{}: found {:?} on channel", logprefix, &uev);
Ok(uev)
}

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#![allow(clippy::unknown_clippy_lints)]
#![allow(unknown_lints)]
use std::collections::HashMap;
use std::path::{Path, PathBuf};
@@ -20,7 +20,7 @@ use tokio::sync::Mutex;
use tokio::task;
use tokio::time::{self, Duration};
use crate::mount::BareMount;
use crate::mount::baremount;
use crate::protocols::agent as protos;
/// The maximum number of file system entries agent will watch for each mount.
@@ -49,7 +49,7 @@ struct Storage {
/// the source becomes too large, either in number of files (>16) or total size (>1MB).
watch: bool,
/// The list of files to watch from the source mount point and updated in the target one.
/// The list of files, directories, symlinks to watch from the source mount point and updated in the target one.
watched_files: HashMap<PathBuf, SystemTime>,
}
@@ -79,6 +79,20 @@ impl Drop for Storage {
}
}
async fn copy(from: impl AsRef<Path>, to: impl AsRef<Path>) -> Result<()> {
if fs::symlink_metadata(&from).await?.file_type().is_symlink() {
// if source is a symlink, create new symlink with same link source. If
// the symlink exists, remove and create new one:
if fs::symlink_metadata(&to).await.is_ok() {
fs::remove_file(&to).await?;
}
fs::symlink(fs::read_link(&from).await?, &to).await?;
} else {
fs::copy(from, to).await?;
}
Ok(())
}
impl Storage {
async fn new(storage: protos::Storage) -> Result<Storage> {
let entry = Storage {
@@ -93,6 +107,17 @@ impl Storage {
async fn update_target(&self, logger: &Logger, source_path: impl AsRef<Path>) -> Result<()> {
let source_file_path = source_path.as_ref();
// if we are creating a directory: just create it, nothing more to do
if source_file_path.symlink_metadata()?.file_type().is_dir() {
let dest_file_path = self.make_target_path(&source_file_path)?;
fs::create_dir_all(&dest_file_path)
.await
.with_context(|| format!("Unable to mkdir all for {}", dest_file_path.display()))?;
return Ok(());
}
// Assume we are dealing with either a file or a symlink now:
let dest_file_path = if self.source_mount_point.is_file() {
// Simple file to file copy
// Assume target mount is a file path
@@ -110,19 +135,13 @@ impl Storage {
dest_file_path
};
debug!(
logger,
"Copy from {} to {}",
source_file_path.display(),
dest_file_path.display()
);
fs::copy(&source_file_path, &dest_file_path)
copy(&source_file_path, &dest_file_path)
.await
.with_context(|| {
format!(
"Copy from {} to {} failed",
source_file_path.display(),
dest_file_path.display()
dest_file_path.display(),
)
})?;
@@ -135,7 +154,7 @@ impl Storage {
let mut remove_list = Vec::new();
let mut updated_files: Vec<PathBuf> = Vec::new();
// Remove deleted files for tracking list
// Remove deleted files for tracking list.
self.watched_files.retain(|st, _| {
if st.exists() {
true
@@ -147,10 +166,19 @@ impl Storage {
// Delete from target
for path in remove_list {
// File has been deleted, remove it from target mount
let target = self.make_target_path(path)?;
debug!(logger, "Removing file from mount: {}", target.display());
let _ = fs::remove_file(target).await;
// The target may be a directory or a file. If it is a directory that is removed,
// we'll remove all files under that directory as well. Because of this, there's a
// chance the target (a subdirectory or file under a prior removed target) was already
// removed. Make sure we check if the target exists before checking the metadata, and
// don't return an error if the remove fails
if target.exists() && target.symlink_metadata()?.file_type().is_dir() {
debug!(logger, "Removing a directory: {}", target.display());
let _ = fs::remove_dir_all(target).await;
} else {
debug!(logger, "Removing a file: {}", target.display());
let _ = fs::remove_file(target).await;
}
}
// Scan new & changed files
@@ -182,25 +210,18 @@ impl Storage {
let mut size: u64 = 0;
debug!(logger, "Scanning path: {}", path.display());
if path.is_file() {
let metadata = path
.metadata()
.with_context(|| format!("Failed to query metadata for: {}", path.display()))?;
let metadata = path
.symlink_metadata()
.with_context(|| format!("Failed to query metadata for: {}", path.display()))?;
let modified = metadata
.modified()
.with_context(|| format!("Failed to get modified date for: {}", path.display()))?;
let modified = metadata
.modified()
.with_context(|| format!("Failed to get modified date for: {}", path.display()))?;
// Treat files and symlinks the same:
if path.is_file() || metadata.file_type().is_symlink() {
size += metadata.len();
ensure!(
self.watched_files.len() <= MAX_ENTRIES_PER_STORAGE,
WatcherError::MountTooManyFiles {
count: self.watched_files.len(),
mnt: self.source_mount_point.display().to_string()
}
);
// Insert will return old entry if any
if let Some(old_st) = self.watched_files.insert(path.to_path_buf(), modified) {
if modified > old_st {
@@ -211,7 +232,25 @@ impl Storage {
debug!(logger, "New entry: {}", path.display());
update_list.push(PathBuf::from(&path))
}
ensure!(
self.watched_files.len() <= MAX_ENTRIES_PER_STORAGE,
WatcherError::MountTooManyFiles {
count: self.watched_files.len(),
mnt: self.source_mount_point.display().to_string()
}
);
} else {
// Handling regular directories - check to see if this directory is already being tracked, and
// track if not:
if self
.watched_files
.insert(path.to_path_buf(), modified)
.is_none()
{
update_list.push(path.to_path_buf());
}
// Scan dir recursively
let mut entries = fs::read_dir(path)
.await
@@ -269,6 +308,19 @@ impl SandboxStorages {
let entry = Storage::new(storage)
.await
.with_context(|| "Failed to add storage")?;
// If the storage source is a directory, let's create the target mount point:
if entry.source_mount_point.as_path().is_dir() {
fs::create_dir_all(&entry.target_mount_point)
.await
.with_context(|| {
format!(
"Unable to mkdir all for {}",
entry.target_mount_point.display()
)
})?;
}
self.0.push(entry);
}
@@ -314,16 +366,14 @@ impl SandboxStorages {
}
}
match BareMount::new(
match baremount(
entry.source_mount_point.to_str().unwrap(),
entry.target_mount_point.to_str().unwrap(),
"bind",
MsFlags::MS_BIND,
"bind",
logger,
)
.mount()
{
) {
Ok(_) => {
entry.watch = false;
info!(logger, "watchable mount replaced with bind mount")
@@ -427,15 +477,14 @@ impl BindWatcher {
async fn mount(&self, logger: &Logger) -> Result<()> {
fs::create_dir_all(WATCH_MOUNT_POINT_PATH).await?;
BareMount::new(
baremount(
"tmpfs",
WATCH_MOUNT_POINT_PATH,
"tmpfs",
MsFlags::empty(),
"",
logger,
)
.mount()?;
)?;
Ok(())
}
@@ -475,6 +524,85 @@ mod tests {
Ok((storage, src_path))
}
#[tokio::test]
async fn test_empty_sourcedir_check() {
//skip_if_not_root!();
let dir = tempfile::tempdir().expect("failed to create tempdir");
let logger = slog::Logger::root(slog::Discard, o!());
let src_path = dir.path().join("src");
let dest_path = dir.path().join("dest");
let src_filename = src_path.to_str().expect("failed to create src filename");
let dest_filename = dest_path.to_str().expect("failed to create dest filename");
std::fs::create_dir_all(src_filename).expect("failed to create path");
let storage = protos::Storage {
source: src_filename.to_string(),
mount_point: dest_filename.to_string(),
..Default::default()
};
let mut entries = SandboxStorages {
..Default::default()
};
entries
.add(std::iter::once(storage), &logger)
.await
.unwrap();
assert!(entries.check(&logger).await.is_ok());
assert_eq!(entries.0.len(), 1);
assert_eq!(std::fs::read_dir(src_path).unwrap().count(), 0);
assert_eq!(std::fs::read_dir(dest_path).unwrap().count(), 0);
assert_eq!(std::fs::read_dir(dir.path()).unwrap().count(), 2);
}
#[tokio::test]
async fn test_single_file_check() {
//skip_if_not_root!();
let dir = tempfile::tempdir().expect("failed to create tempdir");
let logger = slog::Logger::root(slog::Discard, o!());
let src_file_path = dir.path().join("src.txt");
let dest_file_path = dir.path().join("dest.txt");
let src_filename = src_file_path
.to_str()
.expect("failed to create src filename");
let dest_filename = dest_file_path
.to_str()
.expect("failed to create dest filename");
let storage = protos::Storage {
source: src_filename.to_string(),
mount_point: dest_filename.to_string(),
..Default::default()
};
//create file
fs::write(src_file_path, "original").unwrap();
let mut entries = SandboxStorages::default();
entries
.add(std::iter::once(storage), &logger)
.await
.unwrap();
assert!(entries.check(&logger).await.is_ok());
assert_eq!(entries.0.len(), 1);
// there should only be 2 files
assert_eq!(std::fs::read_dir(dir.path()).unwrap().count(), 2);
assert_eq!(fs::read_to_string(dest_file_path).unwrap(), "original");
}
#[tokio::test]
async fn test_watch_entries() {
skip_if_not_root!();
@@ -523,7 +651,7 @@ mod tests {
.unwrap();
// setup storage3: many files, but still watchable
for i in 1..MAX_ENTRIES_PER_STORAGE + 1 {
for i in 1..MAX_ENTRIES_PER_STORAGE {
fs::write(src3_path.join(format!("{}.txt", i)), "original").unwrap();
}
@@ -533,6 +661,9 @@ mod tests {
..Default::default()
};
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
entries
.add(std::iter::once(storage0), &logger)
.await
@@ -585,7 +716,7 @@ mod tests {
std::fs::read_dir(entries.0[3].target_mount_point.as_path())
.unwrap()
.count(),
MAX_ENTRIES_PER_STORAGE
MAX_ENTRIES_PER_STORAGE - 1
);
// Add two files to storage 0, verify it is updated without needing to run check:
@@ -603,6 +734,9 @@ mod tests {
"updated"
);
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
//
// Prepare for second check: update mount sources
//
@@ -655,7 +789,7 @@ mod tests {
std::fs::read_dir(entries.0[3].target_mount_point.as_path())
.unwrap()
.count(),
MAX_ENTRIES_PER_STORAGE + 1
MAX_ENTRIES_PER_STORAGE
);
// verify that we can remove files as well, but that it isn't observed until check is run
@@ -733,15 +867,20 @@ mod tests {
fs::remove_file(source_dir.path().join("big.txt")).unwrap();
fs::remove_file(source_dir.path().join("too-big.txt")).unwrap();
// Up to 16 files should be okay:
for i in 1..MAX_ENTRIES_PER_STORAGE + 1 {
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
// Up to 15 files should be okay (can watch 15 files + 1 directory)
for i in 1..MAX_ENTRIES_PER_STORAGE {
fs::write(source_dir.path().join(format!("{}.txt", i)), "original").unwrap();
}
assert_eq!(entry.scan(&logger).await.unwrap(), MAX_ENTRIES_PER_STORAGE);
assert_eq!(
entry.scan(&logger).await.unwrap(),
MAX_ENTRIES_PER_STORAGE - 1
);
// 17 files is too many:
fs::write(source_dir.path().join("17.txt"), "updated").unwrap();
// 16 files wll be too many:
fs::write(source_dir.path().join("16.txt"), "updated").unwrap();
thread::sleep(Duration::from_secs(1));
// Expect to receive a MountTooManyFiles error
@@ -754,6 +893,180 @@ mod tests {
}
}
#[tokio::test]
async fn test_copy() {
// prepare tmp src/destination
let source_dir = tempfile::tempdir().unwrap();
let dest_dir = tempfile::tempdir().unwrap();
// verify copy of a regular file
let src_file = source_dir.path().join("file.txt");
let dst_file = dest_dir.path().join("file.txt");
fs::write(&src_file, "foo").unwrap();
copy(&src_file, &dst_file).await.unwrap();
// verify destination:
assert!(!fs::symlink_metadata(dst_file)
.unwrap()
.file_type()
.is_symlink());
// verify copy of a symlink
let src_symlink_file = source_dir.path().join("symlink_file.txt");
let dst_symlink_file = dest_dir.path().join("symlink_file.txt");
tokio::fs::symlink(&src_file, &src_symlink_file)
.await
.unwrap();
copy(src_symlink_file, &dst_symlink_file).await.unwrap();
// verify destination:
assert!(fs::symlink_metadata(&dst_symlink_file)
.unwrap()
.file_type()
.is_symlink());
assert_eq!(fs::read_link(&dst_symlink_file).unwrap(), src_file);
assert_eq!(fs::read_to_string(&dst_symlink_file).unwrap(), "foo");
}
#[tokio::test]
async fn watch_directory_verify_dir_removal() {
let source_dir = tempfile::tempdir().unwrap();
let dest_dir = tempfile::tempdir().unwrap();
let mut entry = Storage::new(protos::Storage {
source: source_dir.path().display().to_string(),
mount_point: dest_dir.path().display().to_string(),
..Default::default()
})
.await
.unwrap();
let logger = slog::Logger::root(slog::Discard, o!());
// create a path we'll remove later
fs::create_dir_all(source_dir.path().join("tmp")).unwrap();
fs::write(&source_dir.path().join("tmp/test-file"), "foo").unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 3); // root, ./tmp, test-file
// Verify expected directory, file:
assert_eq!(
std::fs::read_dir(dest_dir.path().join("tmp"))
.unwrap()
.count(),
1
);
assert_eq!(std::fs::read_dir(&dest_dir).unwrap().count(), 1);
// Now, remove directory, and verify that the directory (and its file) are removed:
fs::remove_dir_all(source_dir.path().join("tmp")).unwrap();
thread::sleep(Duration::from_secs(1));
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
assert_eq!(std::fs::read_dir(&dest_dir).unwrap().count(), 0);
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
}
#[tokio::test]
async fn watch_directory_with_symlinks() {
// Prepare source directory:
// ..2021_10_29_03_10_48.161654083/file.txt
// ..data -> ..2021_10_29_03_10_48.161654083
// file.txt -> ..data/file.txt
let source_dir = tempfile::tempdir().unwrap();
let actual_dir = source_dir.path().join("..2021_10_29_03_10_48.161654083");
let actual_file = actual_dir.join("file.txt");
let sym_dir = source_dir.path().join("..data");
let sym_file = source_dir.path().join("file.txt");
let relative_to_dir = PathBuf::from("..2021_10_29_03_10_48.161654083");
// create backing file/path
fs::create_dir_all(&actual_dir).unwrap();
fs::write(&actual_file, "two").unwrap();
// create indirection symlink directory that points to the directory that holds the actual file:
tokio::fs::symlink(&relative_to_dir, &sym_dir)
.await
.unwrap();
// create presented data file symlink:
tokio::fs::symlink(PathBuf::from("..data/file.txt"), sym_file)
.await
.unwrap();
let dest_dir = tempfile::tempdir().unwrap();
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
let mut entry = Storage::new(protos::Storage {
source: source_dir.path().display().to_string(),
mount_point: dest_dir.path().display().to_string(),
..Default::default()
})
.await
.unwrap();
let logger = slog::Logger::root(slog::Discard, o!());
assert_eq!(entry.scan(&logger).await.unwrap(), 5);
// Should copy no files since nothing is changed since last check
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
// now what, what is updated?
fs::write(actual_file, "updated").unwrap();
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
assert_eq!(
fs::read_to_string(dest_dir.path().join("file.txt")).unwrap(),
"updated"
);
// Verify that resulting file.txt is a symlink:
assert!(
tokio::fs::symlink_metadata(dest_dir.path().join("file.txt"))
.await
.unwrap()
.file_type()
.is_symlink()
);
// Verify that .data directory is a symlink:
assert!(tokio::fs::symlink_metadata(&dest_dir.path().join("..data"))
.await
.unwrap()
.file_type()
.is_symlink());
// Should copy no new files after copy happened
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
// Now, simulate configmap update.
// - create a new actual dir/file,
// - update the symlink directory to point to this one
// - remove old dir/file
let new_actual_dir = source_dir.path().join("..2021_10_31");
let new_actual_file = new_actual_dir.join("file.txt");
fs::create_dir_all(&new_actual_dir).unwrap();
fs::write(&new_actual_file, "new configmap").unwrap();
tokio::fs::remove_file(&sym_dir).await.unwrap();
tokio::fs::symlink(PathBuf::from("..2021_10_31"), &sym_dir)
.await
.unwrap();
tokio::fs::remove_dir_all(&actual_dir).await.unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 3); // file, file-dir, symlink
assert_eq!(
fs::read_to_string(dest_dir.path().join("file.txt")).unwrap(),
"new configmap"
);
}
#[tokio::test]
async fn watch_directory() {
// Prepare source directory:
@@ -764,6 +1077,13 @@ mod tests {
fs::create_dir_all(source_dir.path().join("A/B")).unwrap();
fs::write(source_dir.path().join("A/B/1.txt"), "two").unwrap();
// A/C is an empty directory
let empty_dir = "A/C";
fs::create_dir_all(source_dir.path().join(empty_dir)).unwrap();
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
let dest_dir = tempfile::tempdir().unwrap();
let mut entry = Storage::new(protos::Storage {
@@ -776,13 +1096,14 @@ mod tests {
let logger = slog::Logger::root(slog::Discard, o!());
assert_eq!(entry.scan(&logger).await.unwrap(), 2);
assert_eq!(entry.scan(&logger).await.unwrap(), 6);
// check empty directory
assert!(dest_dir.path().join(empty_dir).exists());
// Should copy no files since nothing is changed since last check
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
// Should copy 1 file
thread::sleep(Duration::from_secs(1));
fs::write(source_dir.path().join("A/B/1.txt"), "updated").unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
assert_eq!(
@@ -790,12 +1111,21 @@ mod tests {
"updated"
);
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
// Should copy no new files after copy happened
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
// Update another file
fs::write(source_dir.path().join("1.txt"), "updated").unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
// create another empty directory A/C/D
let empty_dir = "A/C/D";
fs::create_dir_all(source_dir.path().join(empty_dir)).unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
assert!(dest_dir.path().join(empty_dir).exists());
}
#[tokio::test]
@@ -820,7 +1150,9 @@ mod tests {
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
thread::sleep(Duration::from_secs(1));
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
fs::write(&source_file, "two").unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
assert_eq!(fs::read_to_string(&dest_file).unwrap(), "two");
@@ -846,8 +1178,9 @@ mod tests {
let logger = slog::Logger::root(slog::Discard, o!());
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
assert_eq!(entry.watched_files.len(), 1);
// expect the root directory and the file:
assert_eq!(entry.scan(&logger).await.unwrap(), 2);
assert_eq!(entry.watched_files.len(), 2);
assert!(target_file.exists());
assert!(entry.watched_files.contains_key(&source_file));
@@ -857,7 +1190,7 @@ mod tests {
assert_eq!(entry.scan(&logger).await.unwrap(), 0);
assert_eq!(entry.watched_files.len(), 0);
assert_eq!(entry.watched_files.len(), 1);
assert!(!target_file.exists());
}
@@ -890,7 +1223,10 @@ mod tests {
);
}
use serial_test::serial;
#[tokio::test]
#[serial]
async fn create_tmpfs() {
skip_if_not_root!();
@@ -900,11 +1236,14 @@ mod tests {
watcher.mount(&logger).await.unwrap();
assert!(is_mounted(WATCH_MOUNT_POINT_PATH).unwrap());
thread::sleep(Duration::from_millis(20));
watcher.cleanup();
assert!(!is_mounted(WATCH_MOUNT_POINT_PATH).unwrap());
}
#[tokio::test]
#[serial]
async fn spawn_thread() {
skip_if_not_root!();
@@ -934,6 +1273,7 @@ mod tests {
}
#[tokio::test]
#[serial]
async fn verify_container_cleanup_watching() {
skip_if_not_root!();

View File

@@ -15,6 +15,6 @@ serde = { version = "1.0.126", features = ["derive"] }
tokio-vsock = "0.3.1"
bincode = "1.3.3"
byteorder = "1.4.3"
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug"] }
async-trait = "0.1.50"
tokio = "1.2.0"

View File

@@ -12,7 +12,7 @@
// payload, which allows the forwarder to know how many bytes it must read to
// consume the trace span. The payload is a serialised version of the trace span.
#![allow(clippy::unknown_clippy_lints)]
#![allow(unknown_lints)]
use async_trait::async_trait;
use byteorder::{ByteOrder, NetworkEndian};

View File

@@ -5,16 +5,10 @@ coverage.txt
coverage.html
.git-commit
.git-commit.tmp
/cli/config/configuration-acrn.toml
/cli/config/configuration-clh.toml
/cli/config/configuration-fc.toml
/cli/config/configuration-qemu.toml
/cli/config/configuration-clh.toml
/cli/config-generated.go
/cli/containerd-shim-kata-v2/config-generated.go
/cli/coverage.html
/config/*.toml
config-generated.go
/containerd-shim-kata-v2
/containerd-shim-v2/monitor_address
/pkg/containerd-shim-v2/monitor_address
/data/kata-collect-data.sh
/kata-monitor
/kata-netmon
@@ -23,7 +17,4 @@ coverage.html
/virtcontainers/hack/virtc/virtc
/virtcontainers/hook/mock/hook
/virtcontainers/profile.cov
/virtcontainers/shim/mock/cc-shim/cc-shim
/virtcontainers/shim/mock/kata-shim/kata-shim
/virtcontainers/shim/mock/shim
/virtcontainers/utils/supportfiles

View File

@@ -51,12 +51,13 @@ PROJECT_DIR = $(PROJECT_TAG)
IMAGENAME = $(PROJECT_TAG).img
TARGET = $(BIN_PREFIX)-runtime
TARGET_OUTPUT = $(CURDIR)/$(TARGET)
RUNTIME_OUTPUT = $(CURDIR)/$(TARGET)
RUNTIME_DIR = $(CLI_DIR)/$(TARGET)
BINLIST += $(TARGET)
NETMON_DIR = netmon
NETMON_DIR = $(CLI_DIR)/netmon
NETMON_TARGET = $(PROJECT_TYPE)-netmon
NETMON_TARGET_OUTPUT = $(CURDIR)/$(NETMON_TARGET)
NETMON_RUNTIME_OUTPUT = $(CURDIR)/$(NETMON_TARGET)
BINLIBEXECLIST += $(NETMON_TARGET)
DESTDIR ?= /
@@ -189,6 +190,7 @@ DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
DEFFILEMEMBACKEND := ""
DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
DEFMSIZE9P := 8192
DEFVFIOMODE := guest-kernel
# Default cgroup model
DEFSANDBOXCGROUPONLY ?= false
@@ -200,7 +202,7 @@ FEATURE_SELINUX ?= check
SED = sed
CLI_DIR = cli
CLI_DIR = cmd
SHIMV2 = containerd-shim-kata-v2
SHIMV2_OUTPUT = $(CURDIR)/$(SHIMV2)
SHIMV2_DIR = $(CLI_DIR)/$(SHIMV2)
@@ -225,7 +227,7 @@ ifneq (,$(QEMUCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_QEMU)
CONFIG_FILE_QEMU = configuration-qemu.toml
CONFIG_QEMU = $(CLI_DIR)/config/$(CONFIG_FILE_QEMU)
CONFIG_QEMU = config/$(CONFIG_FILE_QEMU)
CONFIG_QEMU_IN = $(CONFIG_QEMU).in
CONFIG_PATH_QEMU = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU))
@@ -248,7 +250,7 @@ ifneq (,$(CLHCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_CLH)
CONFIG_FILE_CLH = configuration-clh.toml
CONFIG_CLH = $(CLI_DIR)/config/$(CONFIG_FILE_CLH)
CONFIG_CLH = config/$(CONFIG_FILE_CLH)
CONFIG_CLH_IN = $(CONFIG_CLH).in
CONFIG_PATH_CLH = $(abspath $(CONFDIR)/$(CONFIG_FILE_CLH))
@@ -271,7 +273,7 @@ ifneq (,$(FCCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_FC)
CONFIG_FILE_FC = configuration-fc.toml
CONFIG_FC = $(CLI_DIR)/config/$(CONFIG_FILE_FC)
CONFIG_FC = config/$(CONFIG_FILE_FC)
CONFIG_FC_IN = $(CONFIG_FC).in
CONFIG_PATH_FC = $(abspath $(CONFDIR)/$(CONFIG_FILE_FC))
@@ -294,7 +296,7 @@ ifneq (,$(ACRNCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_ACRN)
CONFIG_FILE_ACRN = configuration-acrn.toml
CONFIG_ACRN = $(CLI_DIR)/config/$(CONFIG_FILE_ACRN)
CONFIG_ACRN = config/$(CONFIG_FILE_ACRN)
CONFIG_ACRN_IN = $(CONFIG_ACRN).in
CONFIG_PATH_ACRN = $(abspath $(CONFDIR)/$(CONFIG_FILE_ACRN))
@@ -458,6 +460,7 @@ USER_VARS += DEFENTROPYSOURCE
USER_VARS += DEFVALIDENTROPYSOURCES
USER_VARS += DEFSANDBOXCGROUPONLY
USER_VARS += DEFBINDMOUNTS
USER_VARS += DEFVFIOMODE
USER_VARS += FEATURE_SELINUX
USER_VARS += BUILDFLAGS
@@ -522,15 +525,15 @@ containerd-shim-v2: $(SHIMV2_OUTPUT)
monitor: $(MONITOR_OUTPUT)
netmon: $(NETMON_TARGET_OUTPUT)
netmon: $(NETMON_RUNTIME_OUTPUT)
$(NETMON_TARGET_OUTPUT): $(SOURCES) VERSION
$(NETMON_RUNTIME_OUTPUT): $(SOURCES) VERSION
$(QUIET_BUILD)(cd $(NETMON_DIR) && go build $(BUILDFLAGS) -o $@ -ldflags "-X main.version=$(VERSION)" $(KATA_LDFLAGS))
runtime: $(TARGET_OUTPUT) $(CONFIGS)
runtime: $(RUNTIME_OUTPUT) $(CONFIGS)
.DEFAULT: default
build: default
build: all
#Install an executable file
# params:
@@ -558,16 +561,12 @@ define MAKE_KERNEL_VIRTIOFS_NAME
$(if $(findstring uncompressed,$1),vmlinux-virtiofs.container,vmlinuz-virtiofs.container)
endef
GENERATED_CONFIG = $(abspath $(CLI_DIR)/config-generated.go)
GENERATED_FILES += $(GENERATED_CONFIG)
GENERATED_FILES += pkg/katautils/config-settings.go
$(TARGET_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary
$(QUIET_BUILD)(cd $(CLI_DIR) && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)
$(RUNTIME_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary
$(QUIET_BUILD)(cd $(RUNTIME_DIR) && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)
$(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST)
$(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && ln -fs $(GENERATED_CONFIG))
$(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)
$(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit
@@ -576,10 +575,11 @@ $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit
.PHONY: \
check \
check-go-static \
coverage \
default \
install \
lint \
pre-commit \
show-header \
show-summary \
show-variables \
@@ -598,8 +598,6 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
generate-config: $(CONFIGS)
check: check-go-static
test: install-hook go-test
install-hook:
@@ -610,17 +608,37 @@ ifeq ($(shell id -u), 0)
endif
go-test: $(GENERATED_FILES)
go clean -testcache
go test -v -mod=vendor ./...
check-go-static:
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cli
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./virtcontainers
fast-test: $(GENERATED_FILES)
go clean -testcache
for s in $$(go list ./...); do if ! go test -failfast -v -mod=vendor -p 1 $$s; then break; fi; done
GOLANGCI_LINT_FILE := ../../../tests/.ci/.golangci.yml
GOLANGCI_LINT_NAME = golangci-lint
GOLANGCI_LINT_CMD := $(shell command -v $(GOLANGCI_LINT_NAME) 2>/dev/null)
lint: all
if [ -z $(GOLANGCI_LINT_CMD) ] ; \
then \
echo "ERROR: command $(GOLANGCI_LINT_NAME) not found. Please install it first." >&2; exit 1; \
fi
if [ -f $(GOLANGCI_LINT_FILE) ] ; \
then \
echo "running $(GOLANGCI_LINT_NAME)..."; \
$(GOLANGCI_LINT_NAME) run -c $(GOLANGCI_LINT_FILE) ; \
else \
echo "ERROR: file $(GOLANGCI_LINT_FILE) not found. You should clone https://github.com/kata-containers/tests to run $(GOLANGCI_LINT_NAME) locally." >&2; exit 1; \
fi;
pre-commit: lint fast-test
coverage:
go test -v -mod=vendor -covermode=atomic -coverprofile=coverage.txt ./...
go tool cover -html=coverage.txt -o coverage.html
install: default install-runtime install-containerd-shim-v2 install-monitor install-netmon
install: all install-runtime install-containerd-shim-v2 install-monitor install-netmon
install-bin: $(BINLIST)
$(QUIET_INST)$(foreach f,$(BINLIST),$(call INSTALL_EXEC,$f,$(BINDIR)))
@@ -663,7 +681,6 @@ clean:
$(NETMON_TARGET) \
$(MONITOR) \
$(SHIMV2) \
$(SHIMV2_DIR)/$(notdir $(GENERATED_CONFIG)) \
$(TARGET) \
.git-commit .git-commit.tmp
@@ -678,6 +695,9 @@ show-usage: show-header
@printf "\n"
@printf "\tbuild : standard build (build everything).\n"
@printf "\ttest : run tests.\n"
@printf "\tpre-commit : run $(GOLANGCI_LINT_NAME) and tests locally.\n"
@printf "\tlint : run $(GOLANGCI_LINT_NAME).\n"
@printf "\tfast-test : run tests with failfast option.\n"
@printf "\tcheck : run code checks.\n"
@printf "\tclean : remove built files.\n"
@printf "\tcontainerd-shim-v2 : only build containerd shim v2.\n"

View File

@@ -26,8 +26,7 @@ to work seamlessly with both Docker and Kubernetes respectively.
## License
The code is licensed under an Apache 2.0 license.
See [the license file](LICENSE) for further details.
See [the license file](https://github.com/kata-containers/kata-containers/blob/main/LICENSE) for further details.
## Platform support

View File

@@ -1,40 +0,0 @@
//
// Copyright (c) 2018-2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
// WARNING: This file is auto-generated - DO NOT EDIT!
//
// Note that some variables are "var" to allow them to be modified
// by the tests.
package main
// name is the name of the runtime
const name = "@RUNTIME_NAME@"
// name of the project
const project = "@PROJECT_NAME@"
// prefix used to denote non-standard CLI commands and options.
const projectPrefix = "@PROJECT_TYPE@"
// original URL for this project
const projectURL = "@PROJECT_URL@"
// Project URL's organisation name
const projectORG = "@PROJECT_ORG@"
const defaultRootDirectory = "@PKGRUNDIR@"
// commit is the git commit the runtime is compiled from.
var commit = "@COMMIT@"
// version is the runtime version.
var version = "@VERSION@"
// Default config file used by stateless systems.
var defaultRuntimeConfiguration = "@CONFIG_PATH@"
// Alternate config file that takes precedence over
// defaultRuntimeConfiguration.
var defaultSysConfRuntimeConfiguration = "@SYSCONFIG@"

View File

@@ -1,30 +0,0 @@
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
"os"
"github.com/containerd/containerd/runtime/v2/shim"
containerdshim "github.com/kata-containers/kata-containers/src/runtime/containerd-shim-v2"
"github.com/kata-containers/kata-containers/src/runtime/pkg/types"
)
func shimConfig(config *shim.Config) {
config.NoReaper = true
config.NoSubreaper = true
}
func main() {
if len(os.Args) == 2 && os.Args[1] == "--version" {
fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", project, types.DefaultKataRuntimeName, version, commit)
os.Exit(0)
}
shim.Run(types.DefaultKataRuntimeName, containerdshim.New, shimConfig)
}

View File

@@ -1,28 +0,0 @@
// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import "os"
var atexitFuncs []func()
var exitFunc = os.Exit
// atexit registers a function f that will be run when exit is called. The
// handlers so registered will be called the in reverse order of their
// registration.
func atexit(f func()) {
atexitFuncs = append(atexitFuncs, f)
}
// exit calls all atexit handlers before exiting the process with status.
func exit(status int) {
for i := len(atexitFuncs) - 1; i >= 0; i-- {
f := atexitFuncs[i]
f()
}
exitFunc(status)
}

View File

@@ -1,42 +0,0 @@
// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
)
var testFoo string
func testFunc() {
testFoo = "bar"
}
func TestExit(t *testing.T) {
assert := assert.New(t)
var testExitStatus int
exitFunc = func(status int) {
testExitStatus = status
}
defer func() {
exitFunc = os.Exit
}()
// test with no atexit functions added.
exit(1)
assert.Equal(testExitStatus, 1)
// test with a function added to the atexit list.
atexit(testFunc)
exit(0)
assert.Equal(testFoo, "bar")
assert.Equal(testExitStatus, 0)
}

View File

@@ -0,0 +1,32 @@
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
"os"
shimapi "github.com/containerd/containerd/runtime/v2/shim"
shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/types"
)
func shimConfig(config *shimapi.Config) {
config.NoReaper = true
config.NoSubreaper = true
}
func main() {
if len(os.Args) == 2 && os.Args[1] == "--version" {
fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", katautils.PROJECT, types.DefaultKataRuntimeName, katautils.VERSION, katautils.COMMIT)
os.Exit(0)
}
shimapi.Run(types.DefaultKataRuntimeName, shim.New, shimConfig)
}

View File

@@ -7,6 +7,7 @@ package main
import (
"flag"
"fmt"
"net/http"
"os"
goruntime "runtime"
@@ -25,7 +26,7 @@ var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug
var (
appName = "kata-monitor"
// version is the kata monitor version.
version = "0.1.0"
version = "0.2.0"
GitCommit = "unknown-commit"
)
@@ -54,6 +55,15 @@ func printVersion(ver versionInfo) {
}
}
type endpoint struct {
handler http.HandlerFunc
path string
desc string
}
// global variable endpoints contains all available endpoints
var endpoints []endpoint
func main() {
ver := versionInfo{
AppName: appName,
@@ -97,19 +107,62 @@ func main() {
panic(err)
}
// setup handlers, now only metrics is supported
// setup handlers, currently only metrics are supported
m := http.NewServeMux()
m.Handle("/metrics", http.HandlerFunc(km.ProcessMetricsRequest))
m.Handle("/sandboxes", http.HandlerFunc(km.ListSandboxes))
m.Handle("/agent-url", http.HandlerFunc(km.GetAgentURL))
endpoints = []endpoint{
{
path: "/metrics",
desc: "Get metrics from sandboxes.",
handler: km.ProcessMetricsRequest,
},
{
path: "/sandboxes",
desc: "List all Kata Containers sandboxes.",
handler: km.ListSandboxes,
},
{
path: "/agent-url",
desc: "Get sandbox agent URL.",
handler: km.GetAgentURL,
},
{
path: "/debug/vars",
desc: "Golang pprof `/debug/vars` endpoint for kata runtime shim process.",
handler: km.ExpvarHandler,
},
{
path: "/debug/pprof/",
desc: "Golang pprof `/debug/pprof/` endpoint for kata runtime shim process.",
handler: km.PprofIndex,
},
{
path: "/debug/pprof/cmdline",
desc: "Golang pprof `/debug/pprof/cmdline` endpoint for kata runtime shim process.",
handler: km.PprofCmdline,
},
{
path: "/debug/pprof/profile",
desc: "Golang pprof `/debug/pprof/profile` endpoint for kata runtime shim process.",
handler: km.PprofProfile,
},
{
path: "/debug/pprof/symbol",
desc: "Golang pprof `/debug/pprof/symbol` endpoint for kata runtime shim process.",
handler: km.PprofSymbol,
},
{
path: "/debug/pprof/trace",
desc: "Golang pprof `/debug/pprof/trace` endpoint for kata runtime shim process.",
handler: km.PprofTrace,
},
}
// for debug shim process
m.Handle("/debug/vars", http.HandlerFunc(km.ExpvarHandler))
m.Handle("/debug/pprof/", http.HandlerFunc(km.PprofIndex))
m.Handle("/debug/pprof/cmdline", http.HandlerFunc(km.PprofCmdline))
m.Handle("/debug/pprof/profile", http.HandlerFunc(km.PprofProfile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(km.PprofSymbol))
m.Handle("/debug/pprof/trace", http.HandlerFunc(km.PprofTrace))
for _, endpoint := range endpoints {
m.Handle(endpoint.path, endpoint.handler)
}
// root index page to show all endpoints in kata-monitor
m.Handle("/", http.HandlerFunc(indexPage))
// listening on the server
svr := &http.Server{
@@ -119,6 +172,23 @@ func main() {
logrus.Fatal(svr.ListenAndServe())
}
func indexPage(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("Available HTTP endpoints:\n"))
spacing := 0
for _, endpoint := range endpoints {
if len(endpoint.path) > spacing {
spacing = len(endpoint.path)
}
}
spacing = spacing + 3
formattedString := fmt.Sprintf("%%-%ds: %%s\n", spacing)
for _, endpoint := range endpoints {
w.Write([]byte(fmt.Sprintf(formattedString, endpoint.path, endpoint.desc)))
}
}
// initLog setup logger
func initLog() {
kataMonitorLog := logrus.WithFields(logrus.Fields{

View File

@@ -25,7 +25,6 @@ import (
"strings"
"syscall"
"github.com/containerd/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@@ -62,9 +61,9 @@ type vmContainerCapableDetails struct {
const (
moduleParamDir = "parameters"
successMessageCapable = "System is capable of running " + project
successMessageCreate = "System can currently create " + project
failMessage = "System is not capable of running " + project
successMessageCapable = "System is capable of running " + katautils.PROJECT
successMessageCreate = "System can currently create " + katautils.PROJECT
failMessage = "System is not capable of running " + katautils.PROJECT
kernelPropertyCorrect = "Kernel property value correct"
// these refer to fields in the procCPUINFO file
@@ -229,7 +228,7 @@ func checkKernelModules(modules map[string]kernelModule, handler kernelParamHand
}
if !haveKernelModule(module) {
kataLog.WithFields(fields).Error("kernel property not found")
kataLog.WithFields(fields).Errorf("kernel property %s not found", module)
if details.required {
count++
}
@@ -292,11 +291,9 @@ func genericHostIsVMContainerCapable(details vmContainerCapableDetails) error {
errorCount := uint32(0)
count := checkCPUAttribs(cpuinfo, details.requiredCPUAttribs)
errorCount += count
count = checkCPUFlags(cpuFlags, details.requiredCPUFlags)
errorCount += count
count, err = checkKernelModules(details.requiredKernelModules, archKernelParamHandler)
@@ -316,7 +313,7 @@ func genericHostIsVMContainerCapable(details vmContainerCapableDetails) error {
var kataCheckCLICommand = cli.Command{
Name: "check",
Aliases: []string{"kata-check"},
Usage: "tests if system can run " + project,
Usage: "tests if system can run " + katautils.PROJECT,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "check-version-only",
@@ -375,14 +372,14 @@ EXAMPLES:
$ %s check --only-list-releases --include-all-releases
`,
project,
katautils.PROJECT,
noNetworkEnvVar,
name,
name,
name,
name,
name,
name,
katautils.NAME,
katautils.NAME,
katautils.NAME,
katautils.NAME,
katautils.NAME,
katautils.NAME,
),
Action: func(context *cli.Context) error {
@@ -401,7 +398,7 @@ EXAMPLES:
if os.Geteuid() == 0 {
kataLog.Warn("Not running network checks as super user")
} else {
err := HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
err := HandleReleaseVersions(cmd, katautils.VERSION, context.Bool("include-all-releases"))
if err != nil {
return err
}
@@ -417,11 +414,6 @@ EXAMPLES:
return errors.New("check: cannot determine runtime config")
}
// check if cgroup can work use the same logic for creating containers
if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly {
return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.")
}
err := setCPUtype(runtimeConfig.HypervisorType)
if err != nil {
return err

View File

@@ -161,6 +161,16 @@ func setCPUtype(hypervisorType vc.HypervisorType) error {
required: false,
},
}
case "mock":
archRequiredCPUFlags = map[string]string{
cpuFlagVMX: "Virtualization support",
cpuFlagLM: "64Bit CPU",
cpuFlagSSE4_1: "SSE4.1",
}
archRequiredCPUAttribs = map[string]string{
archGenuineIntel: "Intel Architecture CPU",
}
default:
return fmt.Errorf("setCPUtype: Unknown hypervisor type %s", hypervisorType)
}
@@ -292,6 +302,8 @@ func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error {
return kvmIsUsable()
case "acrn":
return acrnIsUsable()
case "mock":
return nil
default:
return fmt.Errorf("archHostCanCreateVMContainer: Unknown hypervisor type %s", hypervisorType)
}

View File

@@ -317,11 +317,12 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
}
}
setupCheckHostIsVMContainerCapable(assert, cpuInfoFile, cpuData, moduleData)
// remove the modules to force a failure
err = os.RemoveAll(sysModuleDir)
// to check if host is capable for Kata Containers, must setup CPU info first.
_, config, err := makeRuntimeConfig(dir)
assert.NoError(err)
setCPUtype(config.HypervisorType)
setupCheckHostIsVMContainerCapable(assert, cpuInfoFile, cpuData, moduleData)
details := vmContainerCapableDetails{
cpuInfoFile: cpuInfoFile,
@@ -332,6 +333,12 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
err = hostIsVMContainerCapable(details)
assert.Nil(err)
// remove the modules to force a failure
err = os.RemoveAll(sysModuleDir)
assert.NoError(err)
err = hostIsVMContainerCapable(details)
assert.Error(err)
}
func TestArchKernelParamHandler(t *testing.T) {

View File

@@ -28,9 +28,9 @@ func setupCheckHostIsVMContainerCapable(assert *assert.Assertions, cpuInfoFile s
func TestCCCheckCLIFunction(t *testing.T) {
var cpuData []testCPUData
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), true, ""},
{filepath.Join(sysModuleDir, "vhost"), true, ""},
{filepath.Join(sysModuleDir, "vhost_net"), true, ""},
{filepath.Join(sysModuleDir, "kvm"), "", true},
{filepath.Join(sysModuleDir, "vhost"), "", true},
{filepath.Join(sysModuleDir, "vhost_net"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)

View File

@@ -10,7 +10,7 @@ vendor_id : IBM/S390
# processors : 4
bogomips per cpu: 20325.00
max thread id : 0
features : esan3 zarch stfle msa ldisp eimm dfp edat etf3eh highgprs te vx sie
features : esan3 zarch stfle msa ldisp eimm dfp edat etf3eh highgprs te vx sie
cache0 : level=1 type=Data scope=Private size=128K line_size=256 associativity=8
cache1 : level=1 type=Instruction scope=Private size=96K line_size=256 associativity=6
cache2 : level=2 type=Data scope=Private size=2048K line_size=256 associativity=8

View File

@@ -39,7 +39,8 @@ func testSetCPUTypeGeneric(t *testing.T) {
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
setCPUtype(config.HypervisorType)
err = setCPUtype(config.HypervisorType)
assert.NoError(err)
assert.Equal(archRequiredCPUFlags, savedArchRequiredCPUFlags)
assert.Equal(archRequiredCPUAttribs, savedArchRequiredCPUAttribs)

View File

@@ -47,8 +47,8 @@ func TestCCCheckCLIFunction(t *testing.T) {
}
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm_hv"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm"), "", true},
{filepath.Join(sysModuleDir, "kvm_hv"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)
@@ -58,51 +58,51 @@ func TestArchKernelParamHandler(t *testing.T) {
assert := assert.New(t)
type testData struct {
onVMM bool
expectIgnore bool
fields logrus.Fields
msg string
onVMM bool
expectIgnore bool
}
data := []testData{
{true, false, logrus.Fields{}, ""},
{false, false, logrus.Fields{}, ""},
{logrus.Fields{}, "", true, false},
{logrus.Fields{}, "", false, false},
{
false,
false,
logrus.Fields{
// wrong type
"parameter": 123,
},
"foo",
false,
false,
},
{
false,
false,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
false,
false,
},
{
true,
true,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
true,
true,
},
{
false,
true,
logrus.Fields{
"parameter": "nested",
},
"",
false,
true,
},
}

View File

@@ -47,7 +47,7 @@ func TestCCCheckCLIFunction(t *testing.T) {
}
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)
@@ -57,51 +57,51 @@ func TestArchKernelParamHandler(t *testing.T) {
assert := assert.New(t)
type testData struct {
onVMM bool
expectIgnore bool
fields logrus.Fields
msg string
onVMM bool
expectIgnore bool
}
data := []testData{
{true, false, logrus.Fields{}, ""},
{false, false, logrus.Fields{}, ""},
{logrus.Fields{}, "", true, false},
{logrus.Fields{}, "", false, false},
{
false,
false,
logrus.Fields{
// wrong type
"parameter": 123,
},
"foo",
false,
false,
},
{
false,
false,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
false,
false,
},
{
true,
true,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
true,
true,
},
{
false,
true,
logrus.Fields{
"parameter": "nested",
},
"",
false,
true,
},
}

View File

@@ -17,8 +17,10 @@ import (
"strings"
"testing"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/urfave/cli"
@@ -247,6 +249,13 @@ func genericCheckCLIFunction(t *testing.T, cpuData []testCPUData, moduleData []t
flagSet := &flag.FlagSet{}
ctx := createCLIContext(flagSet)
ctx.App.Name = "foo"
if katatestutils.IsInGitHubActions() {
// only set to mock if on GitHub
t.Logf("running tests under GitHub actions")
config.HypervisorType = vc.MockHypervisor
}
ctx.App.Metadata["runtimeConfig"] = config
// create buffer to save logger output

View File

@@ -13,21 +13,23 @@ import (
"strings"
"github.com/BurntSushi/toml"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/procfs"
"github.com/urfave/cli"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/procfs"
"github.com/urfave/cli"
)
// Semantic version for the output of the command.
//
// XXX: Increment for every change to the output format
// (meaning any change to the EnvInfo type).
const formatVersion = "1.0.25"
const formatVersion = "1.0.26"
// MetaInfo stores information on the format of the output itself
type MetaInfo struct {
@@ -106,6 +108,7 @@ type HypervisorInfo struct {
EntropySource string
SharedFS string
VirtioFSDaemon string
SocketPath string
Msize9p uint32
MemorySlots uint32
PCIeRootPort uint32
@@ -115,10 +118,8 @@ type HypervisorInfo struct {
// AgentInfo stores agent details
type AgentInfo struct {
TraceMode string
TraceType string
Debug bool
Trace bool
Debug bool
Trace bool
}
// DistroInfo stores host operating system distribution details.
@@ -129,13 +130,14 @@ type DistroInfo struct {
// HostInfo stores host details
type HostInfo struct {
Kernel string
Architecture string
Distro DistroInfo
CPU CPUInfo
Memory MemoryInfo
VMContainerCapable bool
SupportVSocks bool
AvailableGuestProtections []string
Kernel string
Architecture string
Distro DistroInfo
CPU CPUInfo
Memory MemoryInfo
VMContainerCapable bool
SupportVSocks bool
}
// NetmonInfo stores netmon details
@@ -155,11 +157,11 @@ type EnvInfo struct {
Meta MetaInfo
Image ImageInfo
Initrd InitrdInfo
Agent AgentInfo
Hypervisor HypervisorInfo
Netmon NetmonInfo
Runtime RuntimeInfo
Netmon NetmonInfo
Host HostInfo
Agent AgentInfo
}
func getMetaInfo() MetaInfo {
@@ -169,8 +171,8 @@ func getMetaInfo() MetaInfo {
}
func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo {
runtimeVersionInfo := constructVersionInfo(version)
runtimeVersionInfo.Commit = commit
runtimeVersionInfo := constructVersionInfo(katautils.VERSION)
runtimeVersionInfo.Commit = katautils.COMMIT
runtimeVersion := RuntimeVersionInfo{
Version: runtimeVersionInfo,
@@ -240,14 +242,17 @@ func getHostInfo() (HostInfo, error) {
memoryInfo := getMemoryInfo()
availableGuestProtection := vc.AvailableGuestProtections()
host := HostInfo{
Kernel: hostKernelVersion,
Architecture: arch,
Distro: hostDistro,
CPU: hostCPU,
Memory: memoryInfo,
VMContainerCapable: hostVMContainerCapable,
SupportVSocks: supportVSocks,
Kernel: hostKernelVersion,
Architecture: arch,
Distro: hostDistro,
CPU: hostCPU,
Memory: memoryInfo,
AvailableGuestProtections: availableGuestProtection,
VMContainerCapable: hostVMContainerCapable,
SupportVSocks: supportVSocks,
}
return host, nil
@@ -301,13 +306,11 @@ func getAgentInfo(config oci.RuntimeConfig) (AgentInfo, error) {
agentConfig := config.AgentConfig
agent.Debug = agentConfig.Debug
agent.Trace = agentConfig.Trace
agent.TraceMode = agentConfig.TraceMode
agent.TraceType = agentConfig.TraceType
return agent, nil
}
func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) {
hypervisorPath := config.HypervisorConfig.HypervisorPath
version, err := getCommandVersion(hypervisorPath)
@@ -315,6 +318,19 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
version = unknown
}
hypervisorType := config.HypervisorType
socketPath := unknown
// It is only reliable to make this call as root since a
// non-privileged user may not have access to /dev/vhost-vsock.
if os.Geteuid() == 0 {
socketPath, err = vc.GetHypervisorSocketTemplate(hypervisorType, &config.HypervisorConfig)
if err != nil {
return HypervisorInfo{}, err
}
}
return HypervisorInfo{
Debug: config.HypervisorConfig.Debug,
MachineType: config.HypervisorConfig.HypervisorMachineType,
@@ -329,7 +345,8 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
SocketPath: socketPath,
}, nil
}
func getEnvInfo(configFile string, config oci.RuntimeConfig) (env EnvInfo, err error) {
@@ -354,7 +371,10 @@ func getEnvInfo(configFile string, config oci.RuntimeConfig) (env EnvInfo, err e
return EnvInfo{}, err
}
hypervisor := getHypervisorInfo(config)
hypervisor, err := getHypervisorInfo(config)
if err != nil {
return EnvInfo{}, err
}
image := ImageInfo{
Path: config.HypervisorConfig.ImagePath,

View File

@@ -184,10 +184,6 @@ func getExpectedAgentDetails(config oci.RuntimeConfig) (AgentInfo, error) {
return AgentInfo{
Debug: agentConfig.Debug,
Trace: agentConfig.Trace,
// No trace mode/type set by default
TraceMode: "",
TraceType: "",
}, nil
}
@@ -281,7 +277,7 @@ VERSION_ID="%s"
}
func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
return HypervisorInfo{
info := HypervisorInfo{
Version: testHypervisorVersion,
Path: config.HypervisorConfig.HypervisorPath,
MachineType: config.HypervisorConfig.HypervisorMachineType,
@@ -296,6 +292,16 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
if os.Geteuid() == 0 {
// This assumes the test hypervisor is a non-hybrid-vsock
// one (such as QEMU).
info.SocketPath = ""
} else {
info.SocketPath = unknown
}
return info
}
func getExpectedImage(config oci.RuntimeConfig) ImageInfo {
@@ -314,8 +320,8 @@ func getExpectedKernel(config oci.RuntimeConfig) KernelInfo {
func getExpectedRuntimeDetails(config oci.RuntimeConfig, configFile string) RuntimeInfo {
runtimePath, _ := os.Executable()
runtimeVersionInfo := constructVersionInfo(version)
runtimeVersionInfo.Commit = commit
runtimeVersionInfo := constructVersionInfo(katautils.VERSION)
runtimeVersionInfo.Commit = katautils.COMMIT
return RuntimeInfo{
Version: RuntimeVersionInfo{
Version: runtimeVersionInfo,
@@ -677,14 +683,10 @@ func TestEnvGetAgentInfo(t *testing.T) {
assert.True(t, agent.Debug)
agentConfig.Trace = true
agentConfig.TraceMode = "traceMode"
agentConfig.TraceType = "traceType"
config.AgentConfig = agentConfig
agent, err = getAgentInfo(config)
assert.NoError(t, err)
assert.True(t, agent.Trace)
assert.Equal(t, agent.TraceMode, "traceMode")
assert.Equal(t, agent.TraceType, "traceType")
}
func testEnvShowTOMLSettings(t *testing.T, tmpdir string, tmpfile *os.File) error {
@@ -1015,12 +1017,58 @@ func TestGetHypervisorInfo(t *testing.T) {
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
info := getHypervisorInfo(config)
info, err := getHypervisorInfo(config)
assert.NoError(err)
assert.Equal(info.Version, testHypervisorVersion)
err = os.Remove(config.HypervisorConfig.HypervisorPath)
assert.NoError(err)
info = getHypervisorInfo(config)
info, err = getHypervisorInfo(config)
assert.NoError(err)
assert.Equal(info.Version, unknown)
}
func TestGetHypervisorInfoSocket(t *testing.T) {
assert := assert.New(t)
tmpdir, err := ioutil.TempDir("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
type TestHypervisorDetails struct {
hType vc.HypervisorType
hybridVsock bool
}
hypervisors := []TestHypervisorDetails{
{vc.AcrnHypervisor, false},
{vc.ClhHypervisor, true},
{vc.FirecrackerHypervisor, true},
{vc.MockHypervisor, false},
{vc.QemuHypervisor, false},
}
for i, details := range hypervisors {
msg := fmt.Sprintf("hypervisor[%d]: %+v", i, details)
config.HypervisorType = details.hType
info, err := getHypervisorInfo(config)
assert.NoError(err, msg)
if os.Geteuid() == 0 {
if !details.hybridVsock {
assert.Equal(info.SocketPath, "", msg)
} else {
assert.NotEmpty(info.SocketPath, msg)
assert.True(strings.HasPrefix(info.SocketPath, "/"), msg)
}
} else {
assert.Equal(info.SocketPath, unknown, msg)
}
}
}

Some files were not shown because too many files have changed in this diff Show More