Compare commits

...

277 Commits

Author SHA1 Message Date
Fabiano Fidêncio
119edcc443 Merge pull request #2837 from fidencio/2.3.0-alpha2-branch-bump
# Kata Containers 2.3.0-alpha2
2021-10-14 09:52:37 +02:00
Fabiano Fidêncio
8873ddab9e release: Kata Containers 2.3.0-alpha2
- kata-monitor: add index page
- clh: Refine the usage of guest console and kernel parameters with Cloud Hypervisor
- agent: exec should inherit container process capabilities
- GitHubActions: fix invalid format of require-pr-porting-labels.yaml
- agent: flush root span before process finish
- Extend PCI submodules to represent non-zero functions and addresses
- packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
- runtime: don't start shim management server in tests
- qemu: use GitLab repos instead of qemu.org
- runtime: optimize code for managing temp users for rootless mode
- Agent configuration file and API restriction
- Delete file virtcontainers-setup.sh
- vendor: Update containerd to v1.5.7
- runtime: Optimize func noNeedForOutput and add test cases
- runtime: Fix !x86 static checks
- #2676: fixing centos gpg key url for ppc64le
- Pass the host route IP family to the guest
- cmd: get return value for setCPUtype
- packaging: Configure QEMU with --enable-pie
- clh: Enable guest userland output
- cmd: Fix mismatched types in testModuleData
- runtime: update .gitignore to ignore monitor_address file
- runtime: fix the make check-go-static command error
- virtcontainers: clean up useless code
- Remove forced PCI rescans from agent
- kernel: Enable SGX in experimental kernel.
- runtime: fix nil reference in cleanup rootless user
- qemu: prepare to upgrade qemu version to 6.1.0 for arm
- kata-monitor (minor) improvements
- virtcontainers: Fix incorrect scripts path
- runtime: clear virtcontainers cgroup duplicated function
- Kata monitor: cache improvements
- virtiofs: fix error report in TestVirtiofsdStart when go test running

176dee6f agent: exec should inherit container process capabilities
7b2bfd4e virtcontainers: clh: Use 'quiet' as the default kernel parameter
3e24e46c virtcontainers: clh: Turn-off serial and virtio-console by default
2d7b65e8 agent: flush root span before process finish
5c77cc2c runtime: don't start shim management server in tests
72044180 agent/device: Return PCI address from wait_for_pci_device()
e50b05d9 agent/pci: Add type to represent PCI addresses
8528157b agent/pci:  Extend Slot type to represent PCI function as well
bf8f582c runtime: optimize code for managing temp users for rootless mode
a9c2a4ba GitHubActions: fix invalid format of require-pr-porting-labels.yaml
c4236cb2 packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
08360c98 agent: Add an agent configutation file example
8a4e69d2 agent: rpc: Return UNIMPLEMENTED for not allowed endpoints
0ea2e3af agent: config: Allow for building the configuration from a file
63539dc9 agent: config: Add allowed endpoints
a953fea3 agent: config: Simplify configuration creation
b888edc2 agent: config: Implement Default
7eac2ec7 protection: add confidential compute frame for arm
8acfc154 check: fix typecheck failure in qemu_arm64_test.go
5b02d54e virtcontainers: fix lint failure on ppc64le
ff9728f0 virtcontainers: nolint guestProtection
5c138c8f runtime: Fix field alignment on s390x
191d0016 vendor: Update containerd to v1.5.7
f7f6bd01 kata-monitor: add index page
a44cde7e agent: netlink: Use the grpc IP family field when updating the route
71ce6cfe runtime: Pass the route IP family to the agent
99450bd1 agent: protos: Add a Family field to the Route payload
f85fe702 runtime: vendor: Bump the netlink package dependency
e439cec7 cmd: fix field alignment on ppc64le
e5159ea7 cmd: get return value for setCPUtype
2ce8d426 clh: Suppress hypervisor output to make guest output visible
cd1064b1 packaging: Configure QEMU with --enable-pie
762922a5 runtime: delete func ConstraintsToVCPUs
4f485430 runtime: delete virtcontainers-setup.sh
80f6b977 osbuilder: fixing centos gpg key url for ppc64le
bb99bfb4 runtime: fix the make check-go-static command error
870771d7 runtime: update .gitignore to ignore monitor_address file
18bff584 runtime: Optimize func noNeedForOutput and add test cases
e5fe53f0 runtime: fix nil reference in cleanup rootless user
2304a596 runtime: set the sandbox storage path static
315295e0 runtime: rename GetSanboxesStoragePath() --> GetSandboxesStoragePath()
13e65f2e cmd: Fix mismatched types in testModuleData
da42cbc0 actions: Build experimental kernel on kata-deploy push action
dffc5092 kernel: Enable SGX in experimental kernel.
ff6a677d kernel-build: Enable multiple config types.
90046964 experimental-kernel: bump 5.13.10
1fbb7304 build: kata-deploy kernel experimental
907459c1 agent/device: Don't force PCI rescans
75f426dd agent: Simplify do_add_swap()
aad1a873 runtime/device: Give the agent information about VFIO devices
ebd7b618 runtime: Don't repeat GetDeviceByID between appendDevices() and append*()
ad45c52f runtime/device: Record guest PCI path for VFIO devices
5c2af3e3 runtime/device: Refactor hotplugVFIODevice() to have common exit path
8bc71105 agent/device: Add device type for VFIO devices
f7a27075 agent: Move driver type constants into device.rs
5b1eb08b agent/uevent: Improve logging of wait_for_uevent()
cf36fd87 runtime: Fix some leftover go fmt errors
6d94957a kernel: reduce alignment size of memory hotplug to 128M
48090f62 qemu: disable plug on arm64 when pie is added
57e3712d virtiofs: fix error report in TestVirtiofsdStart when go test running
8b0bc1f4 kata-monitor: bump version to 0.2.0
bfb556d5 kata-monitor: refresh kata sandbox list on fs events
0e854f3b kata-monitor: improve detection of kata workloads
80463b44 qemu: use GitLab repos instead of qemu.org
3b0c4bf9 runtime: clear virtcontainers cgroup duplicated function
afad910d kata-monitor: add getSandboxFS()
e38686f7 runtime: add GetSandboxesStoragePath()
245a12bb kata-monitor: improve sandbox caching
fc067d61 kata-monitor: warn when unable to retrive the lower level runtime
53ec4df9 kata-monitor: minor fixes
47516988 virtcontainers: Fix incorrect scripts path
814cea96 virtcontainers: clean up useless code

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-14 06:56:30 +02:00
Bin Liu
8be85fda4f Merge pull request #2775 from fgiudici/kata-monitor_issue2292
kata-monitor: add index page
2021-10-14 09:12:57 +08:00
GabyCT
5c7e1b457c Merge pull request #2821 from likebreath/1011/clh_console
clh: Refine the usage of guest console and kernel parameters with Cloud Hypervisor
2021-10-13 13:36:32 -05:00
Eric Ernst
6cc4d6b54e Merge pull request #2829 from bergwolf/capability
agent: exec should inherit container process capabilities
2021-10-13 09:02:03 -07:00
Peng Tao
176dee6f37 agent: exec should inherit container process capabilities
Otherwise rustjail would not set its capabilities and it ends up getting
all capabilities.

Fixes: #2828
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-10-13 17:24:52 +08:00
Bo Chen
7b2bfd4eca virtcontainers: clh: Use 'quiet' as the default kernel parameter
The 'quiet' kernel parameter can avoid guest kernel logs while booting,
which can reduce boot time.

Fix: #2820

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-11 22:06:27 -07:00
Bo Chen
3e24e46c70 virtcontainers: clh: Turn-off serial and virtio-console by default
We will need to have console output from the guest only for debugging
purposes. As a result, we can turn-off both the serial and
virtio-console devices by default for better boot time.

Fixes: #2820

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-11 22:06:23 -07:00
GabyCT
88affdb7b7 Merge pull request #2799 from liubin/fix/github-action-format
GitHubActions: fix invalid format of require-pr-porting-labels.yaml
2021-10-11 09:36:04 -05:00
Bin Liu
b7cd4ca2b8 Merge pull request #2813 from liubin/fix/2812-flush-root-span
agent: flush root span before process finish
2021-10-11 18:46:09 +08:00
bin
2d7b65e8eb agent: flush root span before process finish
Variables in rust will be dropped at the end of the function.

In function real_main the trace will be shut down by `tracer::end_tracing()`,
but at this time the root span is in an active state, so this root span
will not be sent to the trace collector.

This can be fixed by dropping the root span manually.

Fixes: #2812

Signed-off-by: bin <bin@hyper.sh>
2021-10-11 17:14:37 +08:00
Marcel Apfelbaum
06f4ab10b4 Merge pull request #2764 from dgibson/more-pci
Extend PCI submodules to represent non-zero functions and addresses
2021-10-10 15:57:54 +03:00
Marcel Apfelbaum
9796babd92 Merge pull request #2311 from dgibson/mmconfig
packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
2021-10-10 15:11:33 +03:00
Bin Liu
0300e91cd0 Merge pull request #2808 from liubin/fix/2805-fix-test-leak-of-monitor-socket
runtime: don't start shim management server in tests
2021-10-08 19:42:09 +08:00
bin
5c77cc2c49 runtime: don't start shim management server in tests
Shim management server is running in a go routine, in test mode
this will cause the directory where the listen socket
file(/run/vc/sbs/777-77-77777777/shim-monitor.sock) in leak
after the tests finished.

Fixes: #2805

Signed-off-by: bin <bin@hyper.sh>
2021-10-08 18:41:53 +08:00
Jakob Naucke
4152c45e4c Merge pull request #2706 from yuanzhe-liu0/qemu_link
qemu: use GitLab repos instead of qemu.org
2021-10-08 12:03:55 +02:00
David Gibson
72044180e4 agent/device: Return PCI address from wait_for_pci_device()
wait_for_pci_device() waits for the PCI device at the given path to become
ready, but it doesn't currently give you any meaningful handle on that
device.

Change the signature, so that it returns the PCI address of the device.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
David Gibson
e50b05d93c agent/pci: Add type to represent PCI addresses
Add a new pci::Address type which represents a guest PCI address in
DDDD:BB:SS.F form.

fixes #2745

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
David Gibson
8528157b9b agent/pci: Extend Slot type to represent PCI function as well
pci::Slot represents a PCI slot.  However, in all cases where we use it, we
actually care about addressing a specific PCI function.  So, at the moment
we can only refer to function 0 in each slot.

Replace pci::Slot with pci::SlotFn to represent both the slot and function.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
Fupan Li
988eb95621 Merge pull request #2760 from liubin/fix/2759-optimize-code-for-managing-temp-users
runtime: optimize code for managing temp users for rootless mode
2021-10-08 13:49:14 +08:00
bin
bf8f582c1d runtime: optimize code for managing temp users for rootless mode
This commit does two chagnes:

- move code for managing temp users to rootless.go.
- use common function in qemu.go when shutdown the VM.

Fixes: #2759

Signed-off-by: bin <bin@hyper.sh>
2021-10-08 11:04:21 +08:00
Eric Ernst
011c58d626 Merge pull request #2517 from sameo/topic/agent-config
Agent configuration file and API restriction
2021-10-07 08:54:51 -07:00
Bin Liu
10ec4b133c Merge pull request #2742 from liubin/fix/2741-delete-file-code
Delete file virtcontainers-setup.sh
2021-10-07 11:54:47 +08:00
bin
a9c2a4ba8e GitHubActions: fix invalid format of require-pr-porting-labels.yaml
The yaml file has an indent issue from line 15.

And the branches filter should be under pull_request_target but
not the pull_request trigger.

Also actions/checkout@v2 does not need the token parameter.

Fixes: #2798

Signed-off-by: bin <bin@hyper.sh>
2021-10-07 10:23:44 +08:00
David Gibson
c4236cb2d1 packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
The guest kernel configuration suggested for Kata, and which is used by the
CI didn't include CONFIG_PCI_MMCONFIG.  That's kind of weird, MMCONFIG is
the modern normal way of handling configuration cycles.

In addition, due to a complex set of interactions through the ACPI code,
disabling MMCONFIG means that SHPC hotplug doesn't work: the driver is
included in the guest kernel, but will fail to probe on PCI to PCI bridges,
meaning it won't actually be activated.

Enable MMCONFIG so that we suggest and testa more typical guest kernel
configuration.

fixes #2288

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-07 13:21:48 +11:00
Samuel Ortiz
08360c981d agent: Add an agent configutation file example
With all endpoints allowed.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-07 04:04:52 +02:00
Samuel Ortiz
8a4e69d237 agent: rpc: Return UNIMPLEMENTED for not allowed endpoints
From the endpoints string described through the configuration file, we
build a hash set of allowed enpoints. If a configuration files does not
include an endpoints section, we assume all endpoints are not allowed.
If there is no configuration file, then all endpoints are allowed.

Then for every ttrpc request, we check if the name of the endpoint is
part of the hashset. If it is not, then we return ttrcp::UNIMPLEMENTED.

Fixes: #1837

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 04:04:32 +02:00
Samuel Ortiz
0ea2e3af07 agent: config: Allow for building the configuration from a file
When the kernel command line includes a agent.config_file=<path> entry,
then we will try to override the default confiuguration values with the
ones we parse from a TOML file at <path>.

As the configuration file overrides the default values, we need to go
through a simplified builder that convert a set of Option<> fields into
the actual AgentConfig structure.

Fixes: #1837

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
63539dc9fd agent: config: Add allowed endpoints
They will define the list of endpoints that an agent supports.
They're empty and non actionable for now.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
a953fea324 agent: config: Simplify configuration creation
We dont need a constructor and derive directly from the command line
parsing.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
b888edc2fc agent: config: Implement Default
A single constructor setting default value is a typical pattern for a
Default implementation.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Fabiano Fidêncio
4cde619c68 Merge pull request #2797 from fidencio/wip/upgrade-vendored-containerd
vendor: Update containerd to v1.5.7
2021-10-06 21:05:44 +02:00
Chelsea Mafrica
6e3fcce2a2 Merge pull request #2748 from liubin/fix/2747-add-test
runtime: Optimize func noNeedForOutput and add test cases
2021-10-06 11:24:57 -07:00
Fabiano Fidêncio
04cdf5b1f0 Merge pull request #2774 from Jakob-Naucke/fix-s390x-alignment
runtime: Fix !x86 static checks
2021-10-06 19:57:00 +02:00
Jianyong Wu
7eac2ec786 protection: add confidential compute frame for arm
Even CCA, which is the confidential compute archtecture, has not been
ready, add a empty implementation to avoid static check error.

Fixes: #2789
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Suggested-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-06 15:53:36 +02:00
Jianyong Wu
8acfc154de check: fix typecheck failure in qemu_arm64_test.go
fix typecheck failure in qemu_arm64_test.go

Fixes: #2789
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-06 15:53:35 +02:00
Amulya Meka
5b02d54e23 virtcontainers: fix lint failure on ppc64le
Add nolint for arch specific code to exclude
from lint check.

Fixes: #2773

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-06 15:53:35 +02:00
Jakob Naucke
ff9728f032 virtcontainers: nolint guestProtection
Exclude from lint checking for it is ultimately only used in
architecture-specific code.

Fixes: #2273
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-06 15:53:35 +02:00
Jakob Naucke
5c138c8f12 runtime: Fix field alignment on s390x
Follow-up of #2237 for s390x -- field alignment isn't always minimal

Fixes: #2773
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-06 15:53:35 +02:00
Fabiano Fidêncio
191d001610 vendor: Update containerd to v1.5.7
Bump containerd to v1.5.7 in order to bring in a fix for CVE-2021-41103,
"insufficiently restricted permissions ons plugins directories
(https://github.com/advisories/GHSA-c2h3-6mxw-7mvq)".

dependabot found a potential security vulnerability and raised a PR to
fix it.  However, dependabot does not properly follows nor understands
the needed of our CIs (mainly related to formatting the PR and whatnot),
thus I'm re-raising it.

Fixes: #2796
Supersedes: #2787

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-06 10:40:43 +02:00
Chelsea Mafrica
1f6a551570 Merge pull request #2755 from paleozogt/centos-ppc64le-gpg
#2676: fixing centos gpg key url for ppc64le
2021-10-05 09:37:58 -07:00
Eric Ernst
2bc7561561 Merge pull request #2769 from sameo/topic/agent-route
Pass the host route IP family to the guest
2021-10-05 07:20:33 -07:00
Chelsea Mafrica
db7d3b91bd Merge pull request #2780 from Amulyam24/checks
cmd: get return value for setCPUtype
2021-10-04 22:19:59 -07:00
Bin Liu
f7f6bd0142 kata-monitor: add index page
Add an index page to the kata-monitor endpoint.

Porting of https://github.com/liubin/kata-containers/commit/a45aa0696d55

Fixes: #2292
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-10-04 18:13:56 +02:00
Archana Shinde
5fd963530d Merge pull request #2756 from gkurz/fix-enable-pie
packaging: Configure QEMU with --enable-pie
2021-10-04 03:50:00 -07:00
Samuel Ortiz
a44cde7e8d agent: netlink: Use the grpc IP family field when updating the route
Not all routes have either a gateway or a destination IP.
Interface routes, where the source, destination and gateway are undefined,
will default to IP v4 with the current is_ipv6() check even when they
are v6 routes.

We use the provided gRPC Route.Family field instead. This field is built
from the host netlink messages, and is a reliable way of finding out
a route's IP family.

Fixes: #2768

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:39:46 +02:00
Samuel Ortiz
71ce6cfe9e runtime: Pass the route IP family to the agent
When updating the guest routing table, we should forward the IP family
information up to the guest.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:17 +02:00
Samuel Ortiz
99450bd1f7 agent: protos: Add a Family field to the Route payload
Our check for the IP family is working as long as we have either a
gateway or a destination IP. Some routes are missing both.
The RT netlink messages provide the IP family information for each
route, so we can carry that piece of information up to the guest. That
will allow for a more reliable route IP family determination.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:17 +02:00
Samuel Ortiz
f85fe70231 runtime: vendor: Bump the netlink package dependency
We need to be able to get the IP family from the netlink route meesages,
and the Route.Family field only got recently added to the netlink
package.

The update generates static check warnings about the call for
nethandler.Delete() being deprecated in favor of a Close() call instead.
So we include the s/Delete()/Close()/ change as part of this PR.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:01 +02:00
Amulya Meka
e439cec7c5 cmd: fix field alignment on ppc64le
Optimising structure field alignment.

Fixes: #2779

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-01 11:45:27 +00:00
Amulya Meka
e5159ea755 cmd: get return value for setCPUtype
Accept and assert the return value in testSetCPUTypeGeneric.

Fixes: #2779

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-01 11:44:14 +00:00
Bo Chen
fd5c858390 Merge pull request #2751 from jodh-intel/clh-fix-guest-output
clh: Enable guest userland output
2021-09-30 09:05:30 -07:00
James O. D. Hunt
2ce8d4263c clh: Suppress hypervisor output to make guest output visible
Reduce the cloud-hypervisor log level from `Debug` to `Info` when hypervisor
debug is enabled. This is required since `Debug` level:

- Is overkill for debugging hypervisor failures.
- Effectively hides the output from the guest kernel and userland: CLH
  generates so much output that the output from the guest gets "lost in
  the noise" (experiments show that for each full CLH debug message, at most
  1 _byte_ of guest output is displayed).

Fixes: #2726.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-09-30 14:22:09 +01:00
Greg Kurz
cd1064b16f packaging: Configure QEMU with --enable-pie
We explicitely set the Postion Independant Executlable (PIE) options
in the extra CFLAGS and LDFLAGS that are passed to the QEMU configure
script for all archs. This means that these options are used pretty
much everywhere, including when building the sample plugins under the
test directory. These cannot be linked with -pie and break the build,
as experienced recently on ARM (see PR #2732).

This only broke on ARM because other archs are configured with
--disable-tcg : this disables plugins which are built by default
otherwise.

The --enable-pie option is all that is needed. The QEMU build system
knows which binaries should be created as PIE, e.g. the important
bits like QEMU and virtiofsd, and which ones should not, e.g. the
sample plugins that aren't used in production.

Rely on --enable-pie only, for all archs. This allows to drop the
workaround that was put in place in PR #2732.

Fixes: #2757
Signed-off-by: Greg Kurz <groug@kaod.org>
2021-09-30 11:17:41 +02:00
Jakob Naucke
8739a73dd3 Merge pull request #2736 from Amulyam24/kata-check-test
cmd: Fix mismatched types in testModuleData
2021-09-30 10:20:19 +02:00
bin
762922a521 runtime: delete func ConstraintsToVCPUs
ConstraintsToVCPUs is not used any more.

Fixes: #2741

Signed-off-by: bin <bin@hyper.sh>
2021-09-30 14:44:41 +08:00
bin
4f4854308a runtime: delete virtcontainers-setup.sh
This file is not used anymore.

Fixes: #2741

Signed-off-by: bin <bin@hyper.sh>
2021-09-30 14:44:30 +08:00
Chelsea Mafrica
96c033ba6c Merge pull request #2763 from liubin/fix/2762-update-gitignore
runtime: update .gitignore to ignore monitor_address file
2021-09-29 09:45:57 -07:00
Carlos Venegas
7183de47df Merge pull request #2766 from YchauWang/wyc-runtime-cmd
runtime: fix the make check-go-static command error
2021-09-29 10:53:02 -05:00
Aaron Simmons
80f6b97710 osbuilder: fixing centos gpg key url for ppc64le
The centos ppc64le gpg key at mirror.centos.org doesn't exist (link rot?).
Replacing it with url from CentOS/sig-core-AltArch on github.

Fixes: #2676

Signed-off-by: Aaron Simmons <paleozogt@gmail.com>
2021-09-29 09:20:51 -06:00
Bin Liu
4ac7199282 Merge pull request #2494 from rapiz1/clean-up-code
virtcontainers: clean up useless code
2021-09-29 22:56:13 +08:00
wangyongchao.bj
bb99bfb45d runtime: fix the make check-go-static command error
modify the make script of the check-go-static, changing the `./cli` path to `./cmd/kata-runtime`

Fixes: #2765

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-29 15:37:25 +08:00
David Gibson
b57613f53e Merge pull request #1682 from dgibson/rescan
Remove forced PCI rescans from agent
2021-09-29 13:03:55 +10:00
bin
870771d76d runtime: update .gitignore to ignore monitor_address file
Run tests sometimes generate pkg/containerd-shim-v2/monitor_address,
and `git status` will treat it as a new file.

Package containerd-shim-v2 has moved to pkg/containerd-shim-v2,
the monitor_address in .gitignore should be updated too.

Fixes: #2762

Signed-off-by: bin <bin@hyper.sh>
2021-09-29 09:24:14 +08:00
Chelsea Mafrica
20f4c252b8 Merge pull request #2519 from jcvenegas/kernel-experimental-5.13.10
kernel: Enable SGX in experimental kernel.
2021-09-28 11:00:46 -07:00
Fupan Li
823818cfbc Merge pull request #2744 from fengwang666/nil-bug
runtime: fix nil reference in cleanup rootless user
2021-09-28 22:43:24 +08:00
Fabiano Fidêncio
f9ecaaa6be Merge pull request #2732 from jongwu/plugin
qemu: prepare to upgrade qemu version to 6.1.0 for arm
2021-09-28 12:12:48 +02:00
bin
18bff58487 runtime: Optimize func noNeedForOutput and add test cases
Optimize func noNeedForOutput and add test cases for this func.

Fixes: #2747

Signed-off-by: bin <bin@hyper.sh>
2021-09-28 16:58:44 +08:00
Feng Wang
e5fe53f0a9 runtime: fix nil reference in cleanup rootless user
It seems the client (crio) can send multiple requests to stop the Kata VM,
resulting a nil reference if the uid has already been cleaned up by a different thread.

Fixes #2743

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-27 21:28:47 -07:00
Chelsea Mafrica
0b087a873d Merge pull request #2739 from fgiudici/kata-monitor_improvements3
kata-monitor (minor) improvements
2021-09-27 15:45:21 -07:00
Francesco Giudici
2304a59601 runtime: set the sandbox storage path static
Since we now have "unix://" kind of socket returned by the
SocketAddress() function, there is no more need to build the sandbox
storage path dynamically to keep OS compatibility.

Fixes: #2738
Suggested-by: Christophe de Dinechin <dinechin@redhat.com>
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-27 15:57:34 +02:00
Francesco Giudici
315295e0ef runtime: rename GetSanboxesStoragePath() --> GetSandboxesStoragePath()
Add the missing 'd'.

Fixes: #2738
Suggested-by: Jakob Naucke <jakob.naucke@ibm.com>
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-27 15:56:14 +02:00
Bin Liu
3217b03b17 Merge pull request #2522 from Bevisy/main-2515
virtcontainers: Fix incorrect scripts path
2021-09-27 21:14:40 +08:00
Bin Liu
39df808f6a Merge pull request #2695 from YchauWang/wyc-vc-cgroup
runtime: clear virtcontainers cgroup duplicated function
2021-09-27 21:12:39 +08:00
Amulya Meka
13e65f2ee8 cmd: Fix mismatched types in testModuleData
Rectify the values of testModuleData with the correct
types in TestCCCheckCLiFunction in kata-check_(!x86)_test.go

Fixes: #2735

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-09-27 07:17:07 +00:00
Peng Tao
05995632c3 Merge pull request #2566 from fgiudici/kata-monitor_improvements
Kata monitor: cache improvements
2021-09-27 12:29:13 +08:00
Carlos Venegas
da42cbc0a7 actions: Build experimental kernel on kata-deploy push action
Build experimental kernel on kata-deploy push action.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
dffc50928a kernel: Enable SGX in experimental kernel.
Enable Intel SGX support in experimental kernel.

Fixes: #2518

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
ff6a677d16 kernel-build: Enable multiple config types.
Optional build types are common for early adoption.
Lets add a flag to build and optional config.

e.g.
kernel-build.sh -b experimental

In the future instead of add more flags just add a new build type.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
90046964ef experimental-kernel: bump 5.13.10
Upgrade Linux kernel to latest stable release.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
1fbb73041b build: kata-deploy kernel experimental
Allow build experimental kernel from kata-deploy.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 02:56:59 +00:00
David Gibson
907459c1c1 agent/device: Don't force PCI rescans
The agent initiates a PCI rescan from two places.  One is triggered
for each virtio-blk PCI device, and one is triggered unconditionally
when we start a new container.

The PCI bus rescan code was added long time ago in Clear Containers due to
lack of ACPI support in QEMU 2.9 + q35.  Since Kata routinely plugs devices
under a PCIe-to-PCI bridge, that left SHPC as the only available hotplug
mechanism.

However, while Kata was using SHPC on the qemu side, it wasn't actually
using it on the guest side.  Due to a quirk of our guest kernel
configuration, the SHPC driver never bound to the bridge, and *no* hotplug
was working at all.  To work around that, Kata was forcing the rescan
manually, which would discover the new device.  That was very fragile (we
were arguably relying on a kernel bug).  Even if we were using SHPC
propertly, it includes a mandatory 5s delay during plug operations
(designed for physical cards and human operators), which makes it
unsuitable quick start up.

Worse, the forced PCI rescans could race with either SHPC or PCIe native
hotplug sequences, causing several problems.  In some cases this could put
the device into an entirely broken state where it wouldn't respond to
config space accesses at all.

Since pull request #2323 was merged, we have instead used ACPI hotplug
which is both fast, and more solid in terms of semantics and races.  So,
the forced PCI rescans are no longer necessary.  Remove them all.

fixes #683

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
75f426dd1e agent: Simplify do_add_swap()
do_add_swap() has some mildly complex code to translate the PCI path of
a virtio-blk device (where the swap will reside) into a /dev path. However,
the device module already has get_virtio_blk_pci_device_name() which does
exactly that.  The existing code has some further advantages: it uses
more precise matching of the sysfs paths, and if necessary it will wait for
the device to be added to the guest.

While we're there, remove an unnecessary 'as u8' from the PCI path
construction: pci::Path::new() already accepts anything which implements
TryInfo<u8>, which u32 certainly does.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
aad1a8734f runtime/device: Give the agent information about VFIO devices
We send information about several kinds of devices to the agent so
that it can apply specific handling.  We don't currently do this with
VFIO devices.  However we need to do that so that the agent can
properly wait for VFIO devices to be ready (previously it did that
using a PCI rescan which may not be reliable and has some very bad
side effects).

This patch collates and sends the relevant information.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
ebd7b61884 runtime: Don't repeat GetDeviceByID between appendDevices() and append*()
Both appendBlockDevice and appendVhostUserBlkDevice start by using
GetDeviceByID to lookup the api.Device object corresponding to their
ContainerDevice object.  However their common caller, appendDevices() has
already done this.

This changes it so the looked up api.Device is passed to the individual
append*Device() functions.  This slightly reduces duplicated work, but more
importantly it makes it clearer that append*Device() don't need to check
for a nil result from GetDeviceByID, since the caller has already done
that.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
ad45c52fbe runtime/device: Record guest PCI path for VFIO devices
For several device types which correspond to a PCI device in the guest
we record the device's PCI path in the guest.  We don't currently do
that for VFIO devices, but we're going to need to for better handling
of SR-IOV devices.

To accomplish this, we have to determine the guest PCI path from the
information the VMM gives us:

For qemu, we query the slot of the device and its bridge from QMP.

For cloud-hypervisor, the device add interface gives us a guest PCI
address.  In fact this represents a design error in the clh API -
there's no way it can really know the guest PCI address in general.
It works in this case, because clh doesn't use PCI bridges, so the
device will always be on the root bus.  Based on that, the PCI path is
simply the device's slot number.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
5c2af3e308 runtime/device: Refactor hotplugVFIODevice() to have common exit path
hotplugVFIODevice() has several different paths depending if we're
plugging into a root port or a PCIE<->PCI bridge and if we're using a
regular or mediated VFIO device.

We're going to want some common code on the successful exit path here,
so refactor the function to allow that without duplication.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
8bc71105f4 agent/device: Add device type for VFIO devices
Currently, VFIO devices attached to a Kata container aren't described to
the agent at all.  We essentially just hope they're ready by the time
we've entered the container proper, which is usually the case because of
the PCI rescan - but that causes other problems.

This adds a new device type to the agent representing VFIO devices.  The
agent will use its existing uevent watching mechanisms to wait for the
associated guest PCI device to appear before proceeding.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
f7a2707505 agent: Move driver type constants into device.rs
Currently the constants giving the names for each device/driver type in
the protocol are in mount.rs, and used in device.rs.  Since these constants
are inherently related to, well, devices, it makes more sense to put them
in device.rs and use them from mount.rs.

This will become even more so with planned extensions which will add some
device types that will not be used in mount.rs at all.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
5b1eb08bde agent/uevent: Improve logging of wait_for_uevent()
These messages will help when debugging matchers not matching properly.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
cf36fd87ad runtime: Fix some leftover go fmt errors
A few "go fmt" errors appear to have crept it.  Clean them up with
"go fmt ./..." in the src/runtime directory.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
Jianyong Wu
6d94957a14 kernel: reduce alignment size of memory hotplug to 128M
After 5.11-rc4, memory hotplug alignment size is reduced to 128M for 4K
page.
It works better for memory hotplug and nvdimm plug in kata on arm.
without this patch, memory hotplug will fail for the current memory
hotplug alignment is 1G but the nvdimm size align with 128M in kata.
After port it here, we can avoid a fix in qemu side.

Note: if you change the page size to other size than 4K, memory hotplug
will has no effect.

Fixes: #2707
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-26 15:33:33 +08:00
Jianyong Wu
48090f624a qemu: disable plug on arm64 when pie is added
For qemu 6.1.0 build on arm64, compile error occurs when "-pie" is added
 to ldflag.
tests/plugins/empty.c won't be linked as a sysmbol is missing.
I consider there maybe a bug.
Before figure it out, we should disable plugins for qemu 6.1.0 on arm64.

Fixes: #2707
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-26 15:33:33 +08:00
Fabiano Fidêncio
c811dd7484 Merge pull request #2720 from Kvasscn/kata_dev_virtiofsd_ctx
virtiofs: fix error report in TestVirtiofsdStart when go test running
2021-09-25 12:17:00 +02:00
Fabiano Fidêncio
ed705482a2 Merge pull request #2730 from fidencio/wip/release-fix-using-vendored-sources
workflows: Fix the config file path for using vendored sources
2021-09-24 23:37:10 +02:00
Fabiano Fidêncio
a525991c2c workflows: Fix the config file path for using vendored sources
There's a typo in the file that should receive the output of `cargo
vendor`.  We should use forward the output to `.cargo/config` instead of
`.cargo/vendor`.

This was introduced by 21c8511630.

Fixes: #2729

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-24 20:26:27 +02:00
Fabiano Fidêncio
9ad44750e8 Merge pull request #2727 from fidencio/wip/fix-wrong-tags-attribution
workflows: Fix tag attribution
2021-09-24 19:28:33 +02:00
Fabiano Fidêncio
39dcbaa672 workflows: Fix tag attribution
While releasing kata-containers 2.3.0-alpha1 we've hit some issues as
the tags attribution is done incorrectly.  We want an array of tags to
iterate over, but the currently code is just lost is the parenthesis.

This issue was introduced in a156288c1f.

Fixes: #2725

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-24 17:58:50 +02:00
Fabiano Fidêncio
832d57c960 Merge pull request #2722 from fidencio/2.3.0-alpha1-branch-bump
# Kata Containers 2.3.0-alpha1
2021-09-24 15:03:06 +02:00
Fabiano Fidêncio
04139ba686 release: Kata Containers 2.3.0-alpha1
- virtiofs: Create shared directory with 0700 mode, not 0750
- watcher: ensure we create target mount point for storage
- packaging: fix qemu build on ppc64le
- runtime: tracing: Use root context to stop tracing
- Replace SHPC with ACPI PCI hotplug for Kata guests
- kata-deploy: Also provide "stable" & "latest" tags
- runtime: tracing: Fix logger passed in newContainer
- virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
- sandbox: Allow the device to be accessed,such as /dev/null and /dev/u…
- qemu: add v5.1.0 dir under tag_patches
- threat-model: Add missing threat-model document
- docs: documentation for running non-root VMM
- workflows,release: Upload the vendored cargo code
- runtime: run the QEMU VMM process with a non-root user
- runtime: update .gitignore file cleare the vc shim config
- runtime: fix empty cgroup path validation error
- ci: Call agent shutdown test only in the correspondent CI_JOB
- runtime: Remove outdated TestStoreContainer
- runtime: refactor commandline code directory
- virtcontainers: update VC HypervisorConfig API add three lost fields
- virtcontainers: add unit tests for container.go
- runtime: clh: Enable hugepages support
- agent: Simplify mount point creation
- versions: Allow newer Rust versions
- runtime/qemu: Move from query-cpus to query-cpus-fast
- Update Kata to use qemu-6.1
- Host cgroups improvements and simplifications
- Add doc for guest swap
- versions: Upgrade to Cloud Hypervisor v18.0
- runtime: Fix README link
- qemu: remove default config for arm64.
- sandbox: Add device permissions such as /dev/null to cgroup
- virtcontainers: fc: parse vcpuID correctly
- kata-tarball: Build and test fixes
- test: enable running tests under root user
- osbuilder: Change to "=" operator to make script more portable
- makefile: Fix error exit status code
- osbuilder: fix inconsistent calculation of fs size
- virtcontainers: Remove NewStoreFeature
- snap: Test variable instead of executing "branch"
- license: drop redundent license files
- Fix swap fail insert fail issue

272771dc watcher: ensure we create target mount point for storage
439e5ac3 packaging: fix qemu build on ppc64le
8bbcb06a qemu: Disable SHPC hotplug
cc4983ee runtime: Remove unused qemuArchBase.appendBridges definition
e248de46 vendor: Update govmm
0ca8c272 qemu: add v5.1.0 dir under tag_patches
3bdcfaa6 kata-deploy: Add more info about the stable tag
41c590fa kata-deploy: Improve README
debf3c9f kata-deploy: Remove qemu-virtiofs runtime class
43a72d76 release: update the kata-deploy yaml files accordingly
ea9b2f9c kata-deploy: Add "stable" info to the README
e5411056 kata-deploy: Update the README
9acf4e5d kata-deploy: Add `stable` yaml files
a86babe0 kata-deploy: Point to the `latest` release
a156288c workflows: Add "stable" & "latest" tags to kata-deploy
305afc8b docs: documentation for running non-root VMM
1fe080fd threat-model: Add missing threat-model document
21c85116 workflows,release: Upload the vendored cargo code
9a6d56f1 runtime: fix empty cgroup path validation error
90e63887 ci: Call agent shutdown test only in the correspondent CI_JOB
48fb1d92 virtiofs: Create shared directory with 0700 mode, not 0750
077b77c1 runtime: tracing: Fix logger passed in newContainer
39cd05e0 runtime: tracing: Use root context to stop tracing
1cfe5930 runtime: Run QEMU using a non-root user/group
fd983738 runtime: update .gitignore file cleare the vc shim config
067c44d0 runtime: fix UT build failure
9353cd77 runtime: Remove outdated TestStoreContainer
9a311a2b docs: fix invalid kernel dax doc url
e7c42fbc runtime: unify generated config
4f7cc186 runtime: refactor commandline code directory
9d3cd984 agent/mount: Remove unused ensure_destination_exists()
64aa5623 agent: Correct mount point creation
08d7aebc agent/mount: Split out regular file case from ensure_destination_exists()
9fa3beff agent: Remove unnecessary BareMount structure
49282854 agent: Simplify BareMount::mount by using nix::mount::mount
d00decc9 runtime: clh: Enable hugepages support
64bb803f runtime/qemu: Move from query-cpus to query-cpus-fast
25ac3524 versions: Allow newer Rust versions
851d5f86 tests: Correct heading in static checks test
4b7e4a4c runtime: Vendoring update
8d9d6e6a docs: Host cgroups documentation update
9bed2ade virtcontainers: Convert to the new cgroups package API
b42ed393 virtcontainers: cgroups: Add a containerd API based cgroups package
f17752b0 virtcontainers: container: Do not create and manage container host cgroups
dc7e9bce virtcontainers: sandbox: Host cgroups partitioning
f811026c virtcontainers: Unconditionally create the sandbox cgroup manager
a6066404 virtcontainers: update VC HypervisorConfig API add three lost fields
bb18cd47 virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
58e77a3c sandbox: Allow the device to be accessed,such as /dev/null and /dev/urandom
d67a414b src/runtime/README.md: Fix URL of Licence
13b8bb0c runtime: Fix README link
25670d30 packaging/qemu: Update qemu-exerimental version to v6.1.0
041a513f versions: Update qemu to v6.1.0
62baa48e virtcontainers: fc: parse vcpuID correctly
81de2d47 packaging: Correct error message in apply_patches.sh
f785ff0b virtcontainers: clh: Revert the workaround incorrect default values
0e0e59dc virtcontainers: clh: Re-generate the client code
f0b53314 versions: Upgrade to Cloud Hypervisor v18.0
11652136 actions: test make kata-tarball
626d659f actions: kata-deploy on PRs and use makefile
78d99f51 kata-deploy: Make verbose single builds
59486b85 kata-deploy: Add tarball suffix to makefile targets
96e1246b makefile: Include kata-deploy targets
74d645cd how-to: Add how-to-setup-swap-devices-in-guest-kernel.md
d865c809 virtcontainers: add unit tests for container.go
71f915c6 sandbox: Add device permissions such as /dev/null to cgroup
2174fee4 docs: Add swap annotations introduction
2abc450a test: enable running tests under root user
924a68d0 osbuilder: Change to "=" operator to make script more portable
1fff9be7 qemu: remove default config for arm64.
e2a9e78c virtcontainers: Remove NewStoreFeature
bfcee911 osbuilder: fix inconsistent calculation of fs size
4996f9b7 snap: Test variable instead of executing "branch"
256c3b27 license: drop redundent license files
bcc9fa3b hotplugAddBlockDevice: Use ExecuteBlockdevAddWithDriverCache with swap
bd85da04 vendor: Update vendor/github.com/kata-containers/govmm
d422789f makefile: Fix error exit status code

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-24 12:32:26 +02:00
zhanghj
57e3712dbd virtiofs: fix error report in TestVirtiofsdStart when go test running
Initialize ctx with context.Background() instead of nil value.

Fixes: #2718

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-09-24 16:06:06 +08:00
Fabiano Fidêncio
279f8e9d03 Merge pull request #2590 from c3d/issue/2589-virtiofsd-perms
virtiofs: Create shared directory with 0700 mode, not 0750
2021-09-24 09:16:40 +02:00
Eric Ernst
fa44e5c1e5 Merge pull request #2703 from egernst/watcher-fixup
watcher: ensure we create target mount point for storage
2021-09-23 21:59:08 -07:00
Chelsea Mafrica
e987632deb Merge pull request #2693 from Amulyam24/qemu-build
packaging: fix qemu build on ppc64le
2021-09-23 10:31:34 -07:00
Julio Montes
1766c93b08 Merge pull request #2662 from cmaf/tracing-stop-rootctx
runtime: tracing: Use root context to stop tracing
2021-09-23 11:50:35 -05:00
Eric Ernst
272771dcf9 watcher: ensure we create target mount point for storage
We would only create the target when updating files. We need to make
sure that we create the target if the source is a directory. Without
this, we'll fail to start a container that utilizes an empty configmap,
for example.

Add unit tests for this.

Fixes: #2638

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-09-23 08:29:28 -07:00
Julio Montes
5d2a82fbf9 Merge pull request #2323 from dgibson/acpi-pcihp
Replace SHPC with ACPI PCI hotplug for Kata guests
2021-09-23 09:55:31 -05:00
Francesco Giudici
8b0bc1f45e kata-monitor: bump version to 0.2.0
We now support any container engine CRI compliant. Let's bump the
kata-monitor version to 0.2.0.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Francesco Giudici
bfb556d56a kata-monitor: refresh kata sandbox list on fs events
This commit stops the container engine polling in favor of
the kata sandbox storage path monitoring.
The pod cache list is now refreshed based on fs events and synced with
the container engine only when needed.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Francesco Giudici
0e854f3b80 kata-monitor: improve detection of kata workloads
When the container engine is different than containerd or CRI-O we
lack proper detection of kata workloads and consider all the pods as
kata ones.
Instead of querying the container engine for the lower level runtime
used in each pod, check if a directory matching the pod exists in
the virtualcontainers sandboxes storage path.
This provides a container engine independent way to check for kata pods.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Yuanzhe Liu
80463b445a qemu: use GitLab repos instead of qemu.org
arm using qemu 5.1.0, thus is affected by the wired submodules
link.

Fixes: #2705
Signed-off-by: Yuanzhe Liu <yuanzheliu09@gmail.com>
2021-09-23 12:07:44 +00:00
Samuel Ortiz
3276f3b5b6 Merge pull request #2453 from fidencio/wip/kata-deploy-use-stable-and-latest-tags
kata-deploy: Also provide "stable" & "latest" tags
2021-09-23 13:54:01 +02:00
Fabiano Fidêncio
0ececc630f Merge pull request #2666 from cmaf/tracing-newContainer-logger
runtime: tracing: Fix logger passed in newContainer
2021-09-23 13:07:19 +02:00
Fabiano Fidêncio
e33c26ba18 Merge pull request #2622 from YchauWang/wyc-vc-api
virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
2021-09-23 13:05:33 +02:00
Fabiano Fidêncio
47170e302a Merge pull request #2616 from Bevisy/main-2615
sandbox: Allow the device to be accessed,such as /dev/null and /dev/u…
2021-09-23 13:04:18 +02:00
Amulya Meka
439e5ac3b0 packaging: fix qemu build on ppc64le
Since the qemu upgrade to v6.1.0, the build fails
with a linking issue. Adding --disable-tcg to fix
it.

Fixes: #2710

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-09-23 06:27:15 +00:00
David Gibson
8bbcb06af5 qemu: Disable SHPC hotplug
Under certain circumstances[0] Kata will attempt to use SHPC hotplug
for PCI devices on the guest.  In fact we explicitly enable SHPC on
our PCI to PCI bridges, regardless of the qemu default.

SHPC was designed a long, long time ago for physical hotplugging and
works very poorly for a virtual environment. In particular it has a
mandatory 5s delay to allow a (real, human) operator to back out the
operation if they press a button by mistake. This alone makes it
unusable for a fast start up application like Kata.

Worse, the agent forces a PCI rescan during startup.  That will race
with the SHPC hotplug operation causing the device to go into a bad
state where config space can't be accessed from the guest at all.

The only reason we've sort of gotten away with this is that our
default guest kernel configuration triggers what's arguably a kernel
bug effectively disabling SHPC.  That makes the agent rescan the only
reason we see the new device.

Now that we require a qemu >=6.1, which includes ACPI PCI hotplug on
the q35 machine, we can explicitly disable SHPC in all cases.  It's
nothing but trouble.

fixes #2174

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-23 10:27:26 +10:00
David Gibson
cc4983eeac runtime: Remove unused qemuArchBase.appendBridges definition
qemuArchBase.appendBridges is never actually used, because the bare
qemuArchBase type is itself never used (outside of unit tests).  Instead
*all* the subclasses of qemuArchBase override appendBridges() to call
the very similar, but not identical genericAppendBridges.  So, we can
remove the qemuArchBase.appendBridges implementation.

Furthermore, all those subclasses override appendBridges() in exactly
the same way, and so we can remove *those* definitions and replace the
base class qemuArchBase appendBridges() with that version, calling
genericAppendBridges().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-23 10:15:08 +10:00
David Gibson
e248de4616 vendor: Update govmm
Update to commit 1b60b536f3, in particular to get extensions to
allow IO and memory window reservations to be set on PCI bridges.

https://github.com/kata-containers/govmm/pull/201

Git log:

de039da govmm/qemu: Let IO/memory reservations be specified for bridge devices

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-23 10:14:29 +10:00
Chelsea Mafrica
b6ff23d21b Merge pull request #2697 from jongwu/patch_qemu
qemu: add v5.1.0 dir under tag_patches
2021-09-22 09:08:05 -07:00
Jianyong Wu
0ca8c27241 qemu: add v5.1.0 dir under tag_patches
A related dir is needed when apply qemu patch using script. As qemu 5.1
is used for arm, a dir of "v5.1.0" is needed under tag_patches.

Fixes: #2696
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-22 18:07:24 +08:00
Archana Shinde
771a934fc5 Merge pull request #2341 from amshinde/add-threat-model
threat-model: Add missing threat-model document
2021-09-22 01:17:05 -07:00
wangyongchao.bj
3b0c4bf9a0 runtime: clear virtcontainers cgroup duplicated function
There are `DeviceToDeviceCgroup` and `deviceToDeviceCgroup` two functions,
 creating a `specs.LinuxDeviceCgroup` object. We clear the new function `deviceToDeviceCgroup`.

Fixes: #2694

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-22 15:13:34 +08:00
Fabiano Fidêncio
3bdcfaa658 kata-deploy: Add more info about the stable tag
Let's make it as clear as possible for the user that if they go for a
tagged version of kata-deploy, eg, 2.2.1, they'll have the kata runtime
2.2.1 deployed on their cluster.

Suggested-by: Eric Adams <eric.adams@intel.com>
Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 23:13:45 +02:00
Fabiano Fidêncio
41c590fa0a kata-deploy: Improve README
Let's add more instructions in the README in order to make clear to the
reader what they can do to check whether kata-deploy is ready, or
whether they have to wait till proceeding with the next instruction.

Suggested-by: Eric Adams <eric.adams@intel.com>
Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 23:13:45 +02:00
Fabiano Fidêncio
debf3c9fe9 kata-deploy: Remove qemu-virtiofs runtime class
There's only one QEMU runtime class deployed as part of kata-deploy, and
that includes virtiofs support (which is the default for quite some time
already).  Knowing this, let's just remove the `qemu-virtiofs` runtime
class definition.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
43a72d76e2 release: update the kata-deploy yaml files accordingly
Let's teach our `update-repository-version.sh` script to properly update
the kata-deploy tags on both kata-deploy and kata-cleanup yaml files.

The 3 scenarios that we're dealing with, based on which branch we're
targetting, are:
```
 1) [main] ------> [main]        NO-OP
   "alpha0"       "alpha1"

                   +----------------+----------------+
                   |      from      |       to       |
  -----------------+----------------+----------------+
  kata-deploy      | "latest"       | "latest"       |
  -----------------+----------------+----------------+
  kata-deploy-base | "stable        | "stable"       |
  -----------------+----------------+----------------+

 2) [main] ------> [stable] Update kata-deploy and
   "alpha2"         "rc0"   get rid of kata-deploy-base

                   +----------------+----------------+
                   |      from      |       to       |
  -----------------+----------------+----------------+
  kata-deploy      | "latest"       | "rc0"          |
  -----------------+----------------+----------------+
  kata-deploy-base | "stable"       | REMOVED        |
  -----------------+----------------+----------------+

 3) [stable] ------> [stable]    Update kata-deploy
    "x.y.z"         "x.y.(z+1)"

                   +----------------+----------------+
                   |      from      |       to       |
  -----------------+----------------+----------------+
  kata-deploy      | "x.y.z"        | "x.y.(z+1)"    |
  -----------------+----------------+----------------+
  kata-deploy-base | NON-EXISTENT   | NON-EXISTENT   |
  -----------------+----------------+----------------+
```

And we can easily cover those 3 cases only with the information about
the "${target_branch}" and the "${new_version}", where:
* case 1) if "${target_branch}" is "main" *and* "${new_version}"
  contains "alpha", do nothing
* case 2) if "${target_branch}" is "main" *and* "${new_version}"
  contains "rc":
  * change the kata-deploy & kata-cleanup tags from "latest" to
    "${new_version}".
  * delete the kata-deploy-stable & kata-cleanup-stable files.
* case 3) if the "${target_branch}" contains "stable":
  * change the kata-deploy & kata-cleanup tags from "${current_version}"
    to "${new_version}".

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
ea9b2f9c92 kata-deploy: Add "stable" info to the README
Similar to the instructions we have for the "latest" images, let's also
add instructions about the "stable" images.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
e541105680 kata-deploy: Update the README
Let's just point to our repo URLs rather than assume users using
kata-deploy will have our repo cloned.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
9acf4e5d32 kata-deploy: Add stable yaml files
This is **not** the nicest patch of my career, and I know it adds code
duplication.  However, I've decided to take this approach in order to
have easier / better instructions for users who're consuming
kata-deploy.

Having both stable & latest yaml on `main` will let us point to just one
place, without having to update the instructions.

I know, would be better to have those generated from a .in file,
wouldn't it?  For sure, but then we'd lose the ability to just point to
those files from kata-deploy pages (either on dockerhub or quay.io).

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
a86babe0d0 kata-deploy: Point to the latest release
Instead of point to a specific release number, let's point to the
`latest` tag on the main branch.

There's still some work needed in order to point to the `stable` tag on
the stable-x.y branches, as this is something that should be done
automagically as part of the release process.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
a156288c1f workflows: Add "stable" & "latest" tags to kata-deploy
When releasing a tarball, let's *also* add the "stable" & "latest" tags
to the kata-deploy image.

The "stable" tag refers to any official release, while the "latest" tag
refers to any pre-release / release candidate.

Fixes: #2302

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 22:48:04 +02:00
Fabiano Fidêncio
32c3fb71f2 Merge pull request #2546 from fengwang666/rootless-qemu-doc
docs: documentation for running non-root VMM
2021-09-21 22:45:33 +02:00
Fabiano Fidêncio
08e55a279a Merge pull request #2573 from fidencio/wip/upload-cargo-vendored-tarball
workflows,release: Upload the vendored cargo code
2021-09-21 21:45:59 +02:00
Fabiano Fidêncio
2bee8bc6bd Merge pull request #2432 from fengwang666/qemu-rootless
runtime: run the QEMU VMM process with a non-root user
2021-09-21 21:37:02 +02:00
Feng Wang
305afc8b70 docs: documentation for running non-root VMM
Documentation for running non-root QEMU VMM in Kata runtime

Fixes: #2545

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-21 11:20:37 -07:00
Archana Shinde
1fe080fd24 threat-model: Add missing threat-model document
This was added in the 1.x repo and is missing in the 2.x repo.
Copying over the document from 1.x.
This is a starting point and focuses on the devices / interfaces
with the virtual machine, and ultimately to the container itself.

We then discuss how these devices/interfaces vary by VMM/hypervisor.

The threat model drawing is created via gdocs, located here:
https://docs.google.com/drawings/d/1dPi9DG9bcCUXlayxrR2OUa1miEZXewtW7YCt4r_VDmA/edit?usp=sharing

For Kata 2.x, the block named as `kata-runtime` has been changed to
`kata-shim`.

Fixes: #2340

Signed-off-by: Eric Ernst <eric.ernst@intel.com>
Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2021-09-21 20:20:39 +05:30
Samuel Ortiz
3a4aca4d67 Merge pull request #2671 from YchauWang/wyc-runtime-config
runtime: update .gitignore file cleare the vc shim config
2021-09-21 15:15:09 +02:00
Fabiano Fidêncio
21c8511630 workflows,release: Upload the vendored cargo code
As part of the release, let's also upload a tarball with the vendored
cargo code.  By doing this we allow distros, which usually don't have
access to the internet while performing the builds, to just add the
vendored code as a second source, making the life of the downstream
maintainers slightly easier*.

Fixes: #1203

*: The current workflow requires the downstream maintainer to download
the tarball, unpack it, run `cargo vendor`, create the tarball, etc.
Although this doesn't look like a ridiculous amount of work, it's better
if we can have it in an automated fashion.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-09-21 09:28:16 +02:00
Fabiano Fidêncio
9ea78ac386 Merge pull request #2675 from fengwang666/cgroup-bug-fix
runtime: fix empty cgroup path validation error
2021-09-21 08:48:22 +02:00
Feng Wang
9a6d56f1ab runtime: fix empty cgroup path validation error
An empty cgroup path shouldn't fail cgroup creation

Fixes #2674

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-20 13:48:09 -07:00
GabyCT
c4bafc4e68 Merge pull request #2684 from GabyCT/topic/remoteagenttest
ci: Call agent shutdown test only in the correspondent CI_JOB
2021-09-20 14:13:36 -05:00
Gabriela Cervantes
90e6388726 ci: Call agent shutdown test only in the correspondent CI_JOB
The agent shutdown test should only run on the CI JOB of CRI_CONTAINERD_K8S_MINIMAL
which is the only one where testing tracing is being enabled, however, this
test is being triggered in multiple CI jobs where it should not run. This PR
fixes that issue.

Fixes #2683

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-09-20 17:04:48 +00:00
Julio Montes
45d40179c2 Merge pull request #2655 from Jakob-Naucke/no-store-container
runtime: Remove outdated TestStoreContainer
2021-09-20 11:22:50 -05:00
Christophe de Dinechin
48fb1d9203 virtiofs: Create shared directory with 0700 mode, not 0750
A discussion on the Linux kernel mailing list [1] exposed that virtiofsd makes a
core assumption that the file systems being shared are not accessible by any
non-privileged user. We currently create the `shared` directory in the sandbox
with the default `0750` permissions, which gives read and directory traversal
access to the group. There is no real good reason for a non-root user to access
the shared directory, and this is potentially dangerous.

Fixes: #2589

[1]: https://lore.kernel.org/linux-fsdevel/YTI+k29AoeGdX13Q@redhat.com/

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-09-20 10:47:18 +02:00
Francesco Giudici
afad910d0e kata-monitor: add getSandboxFS()
Retrieve the absolute sandbox storage path. We will soon need this to
monitor the creation/deletion of new kata sandboxes.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
e38686f74d runtime: add GetSandboxesStoragePath()
The storage path we use to collect the sandbox files is defined in the
virtcontainers/persist/fs package.
We create the runtime socket in that storage path, by hardcoding the
full path in the SocketAddress() function in the runtime package.
This commit splits the hardcoded path by the socket address path so that
the runtime package will be able to provide the storage path to all the
components that may need it.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
245a12bbb7 kata-monitor: improve sandbox caching
In order to retrieve the list of sandboxes, we poll the container engine
every 15 seconds via the CRI. Once we have the list we have to inspect
each pod to find out the kata ones.
This commit extend the sandbox cache to keep track of all the pods,
marking the kata ones, so that during the next polling only the new
sandboxes should be inspected to figure out which ones are using the
kata runtime.

Fixes: #2563
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
fc067d61d4 kata-monitor: warn when unable to retrive the lower level runtime
this is an unexpected event (likely a change in how containerd/cri-o
record the lower level runtime in the pod) and should be more visible:
raise the log level to "warning".

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:54 +02:00
Francesco Giudici
53ec4df953 kata-monitor: minor fixes
fix comment and use literals

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:54 +02:00
Chelsea Mafrica
077b77c178 runtime: tracing: Fix logger passed in newContainer
Change logger in Trace call in newContainer from sandbox.Logger() to
nil. Passing nil will cause an error to be logged by kataTraceLogger
instead of the sandbox logger, which will avoid having the log message
report it as part of the sandbox subsystem when it is part of the
container subsystem.

The kataTraceLogger will not log it as related to the container
subsystem, but since the container logger has not been created at this
point, and we already use the kataTraceLogger in other instances where a
subsystem's logger has not been created yet, this PR makes the call
consistent with other code.

Fixes #2665

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-09-17 11:41:04 -07:00
Chelsea Mafrica
39cd05e0bb runtime: tracing: Use root context to stop tracing
Call StopTracing with s.rootCtx, which is the root context for tracing,
instead of s.ctx, which is parent to a subset of trace spans.

Fixes #2661

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-09-17 11:39:13 -07:00
Feng Wang
1cfe59304d runtime: Run QEMU using a non-root user/group
A random generated user/group is used to start QEMU VMM process.
The /dev/kvm group owner is also added to the QEMU process to grant it access.

Fixes #2444

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-17 11:28:44 -07:00
wangyongchao.bj
fd98373850 runtime: update .gitignore file cleare the vc shim config
update .gitignore file, remove the follow configurations:
/virtcontainers/shim/mock/cc-shim/cc-shim
/virtcontainers/shim/mock/kata-shim/kata-shim
/virtcontainers/shim/mock/shim

Fixes: #2670

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-17 15:25:28 +08:00
Hui Zhu
fff82b4ef5 Merge pull request #2628 from bergwolf/runtime-reorg
runtime: refactor commandline code directory
2021-09-17 10:37:22 +08:00
Chelsea Mafrica
6159ef3499 Merge pull request #2626 from YchauWang/wyc-vc-api02
virtcontainers: update VC HypervisorConfig API add three lost fields
2021-09-16 16:46:27 -07:00
Peng Tao
067c44d0b6 runtime: fix UT build failure
storeContainer has been removed.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 19:42:02 +08:00
Jakob Naucke
9353cd77fd runtime: Remove outdated TestStoreContainer
Due to #2332 being merged after running tests for #2604, and the latter
being merged now, a test for the now removed `storeContainer` was added.
Remove it.

Fixes: #2652
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-09-16 12:26:37 +02:00
Peng Tao
9a311a2b58 docs: fix invalid kernel dax doc url
And use a released version instead of the master branch so that it no
longer gets invalidated.

Depends-on: github.com/kata-containers/kata-containers#2645
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 17:19:18 +08:00
Peng Tao
e7c42fbc76 runtime: unify generated config
We don't need to maintain two generated config.go and even have
duplicates between them.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 17:19:18 +08:00
Peng Tao
4f7cc18622 runtime: refactor commandline code directory
Move all command line code to `cmd` and move containerd-shim-v2 to pkg.

Fixes: #2627
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-16 17:19:18 +08:00
Samuel Ortiz
7bf96d2457 Merge pull request #2604 from Amulyam24/container_tests
virtcontainers: add unit tests for container.go
2021-09-16 11:02:16 +02:00
Samuel Ortiz
9ed024e0bf Merge pull request #2649 from likebreath/0916/clh_hugepages
runtime: clh: Enable hugepages support
2021-09-16 10:57:34 +02:00
David Gibson
b46adbc527 Merge pull request #2428 from dgibson/simplify-mount-storage
agent: Simplify mount point creation
2021-09-16 14:43:29 +10:00
David Gibson
9d3cd9841f agent/mount: Remove unused ensure_destination_exists()
The only remaining callers of ensure_destination_exists() are in its own
unit tests.  So, just remove it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
64aa562355 agent: Correct mount point creation
mount_storage() first makes sure the mount point for the storage volume
exists.  It uses fs::create_dir_all() in the case of 9p or virtiofs volumes
otherwise ensure_destination_exists().  But.. ensure_destination_exists()
boils down to an fs::create_dir_all() in most cases anyway.  The only case
it doesn't is for a bind fstype, where it creates a file instead of a
directory.  But, that's not correct anyway because we need to create either
a file or a directory depending on the source of the bind mount, which
ensure_destination_exists() doesn't know.

The 9p/virtiofs paths also check if the mountpoint exists before calling
fs::create_dir_all(), which is unnecessary (fs::create_dir_all already
handles that case).

mount_storage() does have the information to know what we need to create,
so have it explicitly call ensure_destination_file_exists() for the bind
mount to a non-directory case, and fs::create_dir_all() in all other cases.

fixes #2390

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
08d7aebc28 agent/mount: Split out regular file case from ensure_destination_exists()
ensure_destination_exists() can create either a directory or a regular file
depending on the arguments.  This patch extracts the regular file specific
option into its own helper: ensure_destination_file_exists().  This:
 - Avoids doing some steps in the directory case (they're already handled
   by create_dir_all())
 - Enables some further future cleanups

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
9fa3beff4f agent: Remove unnecessary BareMount structure
struct Baremount contains the information necessary to make a new mount.
As a datastructure, however, it's pointless, since every user just
constructs it, immediately calls the BareMount::mount() method then
discards the structure.

Simplify the code by making this a direct function call baremount().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
49282854f1 agent: Simplify BareMount::mount by using nix::mount::mount
BareMount::mount does some complicated marshalling and uses unsafe code to
call into the mount(2) system call.  However, we're already using the nix
crate which provides a more Rust-like wrapper for mount(2).  We're even
already using nix::mount::umount and nix::mount::MsFlags from the same
module.

In the same way, we can replace the direct usage of libc::umount() with
nix::mount::umount() in one of the tests.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-16 12:24:47 +10:00
David Gibson
bac849ecba Merge pull request #2634 from dgibson/newer-rust
versions: Allow newer Rust versions
2021-09-16 12:23:37 +10:00
Bo Chen
d00decc97d runtime: clh: Enable hugepages support
This patch adds the configuration option that allows to use hugepages
with Cloud Hypervisor guests.

Fixes: #2648

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-15 10:43:57 -07:00
GabyCT
2a26c2397d Merge pull request #2645 from dgibson/query-cpus
runtime/qemu: Move from query-cpus to query-cpus-fast
2021-09-15 10:35:03 -05:00
David Gibson
64bb803fcf runtime/qemu: Move from query-cpus to query-cpus-fast
We recently updated to using qemu-6.1 (from qemu 5.2).  Unfortunately one
breaking change in qemu 6.0 wasn't caught by the CI.

The query-cpus QMP command has been removed, replaced by query-cpus-fast
(which has been available since qemu 2.12).  govmm already had support for
query-cpus-fast, we just weren't using it, so the change is quite easy.

fixes #2643

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-15 16:41:26 +10:00
David Gibson
e7deee948a Merge pull request #2502 from dgibson/qemu-6.1
Update Kata to use qemu-6.1
2021-09-15 11:06:14 +10:00
David Gibson
25ac3524c9 versions: Allow newer Rust versions
Rust 1.47.0 which is the latest we note as tested in versions.yaml is now
getting fairly old - many current distros have newer versions (e.g.
Rust 1.54.0 in Fedora 34).  Bring this more up to date.

Note that this is only updating the 'newest-version', not the minimum
required version.

The new version changes the name of the 'clippy::unknown_clipp_lints'
option to simply 'unknown_lints' so we need to change that as well to avoid
warnings.

fixes #2633

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-15 08:58:28 +10:00
Eric Ernst
e4cb6cbfbb Merge pull request #2332 from sameo/topic/host-cgroups
Host cgroups improvements and simplifications
2021-09-14 09:09:10 -07:00
David Gibson
851d5f8613 tests: Correct heading in static checks test
The github static checks action has a section heading called "Building
rust".  It doesn't actually build rust, though, just installs it with
rustup.  Correct the misleading message.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-14 20:17:27 +10:00
Tim Zhang
842c76cb40 Merge pull request #2359 from teawater/swap_doc
Add doc for guest swap
2021-09-14 15:54:10 +08:00
Samuel Ortiz
4b7e4a4c70 runtime: Vendoring update
Due to the libcontainer dependencies removal.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
8d9d6e6af0 docs: Host cgroups documentation update
Update according to the new sandbox/overhead cgroup split.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
9bed2ade0f virtcontainers: Convert to the new cgroups package API
The new API is based on containerd's cgroups package.
With that conversion we can simpligy the virtcontainers sandbox code and
also uniformize our cgroups external API dependency. We now only depend
on containerd/cgroups for everything cgroups related.

Depends-on: github.com/kata-containers/tests#3805
Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
b42ed39349 virtcontainers: cgroups: Add a containerd API based cgroups package
Eventually, we will convert the virtcontainers and the whole Kata
runtime code base to only rely on that package.

This will make Kata only depends on the simpler containerd cgroups API.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:34 +02:00
Samuel Ortiz
f17752b0dc virtcontainers: container: Do not create and manage container host cgroups
The only process we are adding there is the container host one, and
there is no such thing anymore.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:33 +02:00
Samuel Ortiz
dc7e9bce73 virtcontainers: sandbox: Host cgroups partitioning
This is a simplification of the host cgroup handling by partitioning the
host cgroups into 2: A sandbox cgroup and an overhead cgroup.

The sandbox cgroup is always created and initialized. The overhead
cgroup is only available when sandbox_cgroup_only is unset, and is
unconstrained on all controllers. The goal of having an overhead cgroup
is to be more flexible on how we manage a pod overhead. Having such
cgroup will allow for setting a fixed overhead per pod, for a subset of
controllers, while at the same time not having the pod being accounted
for those resources.

When sandbox_cgroup_only is not set, we move all non vCPU threads
to the overhead cgroup and let them run unconstrained. When it is set,
all pod related processes and threads will run in the sandbox cgroup.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:09:29 +02:00
Samuel Ortiz
f811026c77 virtcontainers: Unconditionally create the sandbox cgroup manager
Regardless of the sandbox_cgroup_only setting, we create the sandbox
cgroup manager and set the sandbox cgroup path at the same time.

Without doing this, the hypervisor constraint routine is mostly a NOP as
the sandbox state cgroup path is not initialized.

Fixes #2184

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-09-14 07:05:57 +02:00
wangyongchao.bj
a6066404f7 virtcontainers: update VC HypervisorConfig API add three lost fields
Sync the virtcontainers api.md document, add `ConfidentialGuest` `EntropySourceList` `GuestSwap` three
 fields to the HypervisorConfig API.

Fixes #2625

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-14 10:42:54 +08:00
wangyongchao.bj
bb18cd475c virtcontainers: update VC SandboxConfig API add SandboxBindMounts field
sync the virtcontainers api.md document, add SandboxBindMounts field to the SandboxConfig API.
And update the order of the SandboxConfig API fields.

Fixes #2621

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-14 09:56:47 +08:00
Eric Ernst
967db0cbcc Merge pull request #2544 from likebreath/0831/upgrade_clh_v18.0
versions: Upgrade to Cloud Hypervisor v18.0
2021-09-13 11:27:45 -07:00
Fabiano Fidêncio
9381f23ccf Merge pull request #2613 from sameo/topic/runtime-readme
runtime: Fix README link
2021-09-13 17:44:56 +02:00
Binbin Zhang
58e77a3c13 sandbox: Allow the device to be accessed,such as /dev/null and /dev/urandom
If the device has no permission, such as /dev/null, /dev/urandom,
it needs to be added into cgroup.

Fixes: #2615

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-13 20:47:16 +08:00
Samuel Ortiz
057eb80ac9 Merge pull request #2596 from jongwu/qemu_mak
qemu: remove default config for arm64.
2021-09-13 11:23:35 +02:00
Samuel Ortiz
75ef8c243a Merge pull request #2603 from Bevisy/main-2539
sandbox: Add device permissions such as /dev/null to cgroup
2021-09-13 11:04:51 +02:00
Samuel Ortiz
62a1a6f827 Merge pull request #2593 from nubificus/fix_fc_vcpu_thread
virtcontainers: fc: parse vcpuID correctly
2021-09-13 09:23:53 +02:00
Hui Zhu
d67a414b2b src/runtime/README.md: Fix URL of Licence
Fix URL of Licence of src/runtime/README.md.

Fixes: #2326

Signed-off-by: Hui Zhu <teawater@gmail.com>
2021-09-13 09:11:42 +08:00
Samuel Ortiz
13b8bb0c74 runtime: Fix README link
The LICENSE file lives in the project's root.

Fixes #2612

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-09-11 09:44:40 +02:00
David Gibson
25670d3058 packaging/qemu: Update qemu-exerimental version to v6.1.0
This brings it back into line with the normal qemu version.  We refer to
v6.1.0 by full SHA in versions.yaml, rather than the tag, so that
apply_patches.sh sees it as different and applies the virtiofs DAX patches
which is what the experimental version is actually about having.

The virtiofs DAX patches themselves are updated to the version from
https://gitlab.com/virtio-fs/qemu, virtio-fs-dev branch as of commit
3620cb0a.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-11 16:43:26 +10:00
David Gibson
041a513f80 versions: Update qemu to v6.1.0
We need qemu-6.1 for ACPI PCI hotplug support for the q35 machine.  At the
moment qemu will use SHPC hotplug under the PCIe to PCI bridge on q35.
SHPC is too slow to use for our purposes (it requires a 5s delay).

Update the qemu version to v6.1.0.  This leaves the experimental version
*older* than the normal version, but we'll fix that up later.

We also need to tweak the snapcraft.yaml, since the location for configs
has changed in the new qemu version.

fixes #1691

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-11 16:43:22 +10:00
Anastassios Nanos
62baa48ef5 virtcontainers: fc: parse vcpuID correctly
In getThreadIDs(), the cpuID variable is derived from a string that
already contains a whitespace. As a result, strings.SplitAfter returns
the cpuID with a leading space. This makes any go variant of string to int
fail (strconv.ParseInt() in our case). This patch makes sure that the
leading space character is removed so the string passed to
strconv.ParseInt() is "CPUID" and not " CPUID".

This has been caused by a change in the naming scheme of vcpu threads
for Firecracker after v0.19.1.

Fixes: #2592

Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2021-09-10 09:39:56 +00:00
David Gibson
81de2d476b packaging: Correct error message in apply_patches.sh
If the script doesn't find a patches directory it expects, it gives an
error saying to create a dummy 'no_patches' file if you really don't want
any patches applied for that version.

But actual practice in the tree is to call the dummy file 'no_patches.txt'
rather than simply 'no_patches'.  Correct the message to match existing
practice.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-10 11:19:10 +10:00
Bo Chen
f785ff0bf2 virtcontainers: clh: Revert the workaround incorrect default values
Given the fix to the bugs of the openapi spec file is included in the
Cloud Hypervisor v18.0 [1], this patch reverts the workaround we carried
in the CLH driver.

This reverts commit 932ee41b3f.

[1] https://github.com/cloud-hypervisor/cloud-hypervisor/pull/3029

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-09 14:52:53 -07:00
Bo Chen
0e0e59dc5f virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor v18.0.
Note: The client code of cloud-hypervisor's (CLH) OpenAPI is
automatically generated by openapi-generator [1-2].

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-09 14:51:55 -07:00
Bo Chen
f0b5331430 versions: Upgrade to Cloud Hypervisor v18.0
Highlights from the Cloud Hypervisor release v18.0: 1) Experimental User
Device (vfio-user) support; 2) Migration support for vhost-user devices;
3) VHDX disk image support; 4) Device pass through on MSHV hypervisor;
5) AArch64 for support virtio-mem; 6) Live migration on MSHV hypervisor;
7) AArch64 CPU topology support; 8) Power button support on AArch64; 9)
Various bug fixes on PTY, TTY, signal handling, and live-migration on
AArch64.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v18.0

Fixes: #2543

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-09-09 14:44:04 -07:00
Carlos Venegas
230eae3ff3 Merge pull request #2417 from jcvenegas/docker-build-fixes
kata-tarball: Build and test fixes
2021-09-09 14:14:26 -05:00
Carlos Venegas
116521367e actions: test make kata-tarball
make kata-tarball is the main way to
build a kata in a single host. Lets
test it to make sure it works on every PR.

Fixes: #2416

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
626d659fd9 actions: kata-deploy on PRs and use makefile
- Run kata-deploy tarball generation action on every PR.

- Use kata-deploy makefile targets.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
78d99f5129 kata-deploy: Make verbose single builds
If a binary tarball for a single component is done,
the logs will be shown in stdout.

e.g.

make kernel-tarball

To build all a the same time still store logs in files.

make kata-tarball

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
59486b855a kata-deploy: Add tarball suffix to makefile targets
Now that local-build kata-deploy makefile is inlucded in toplevel
makefile, lets use the suffix `-tarball` to avoid name collitions
and identify the tarball releted targets.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Carlos Venegas
96e1246bce makefile: Include kata-deploy targets
Use kata-deploy targets from toplevel.
This will help if want to build and
reinstall just one single kata component.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-09 14:32:08 +00:00
Fupan Li
a2db68e347 Merge pull request #2565 from liubin/fix/2446-run-tests-by-root
test: enable running tests under root user
2021-09-09 17:45:35 +08:00
Hui Zhu
74d645cd21 how-to: Add how-to-setup-swap-devices-in-guest-kernel.md
Add how-to-setup-swap-devices-in-guest-kernel.md to how-to to introduce
how to setup swap device in guest kernel.

Fixes: #2326

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-09 17:24:24 +08:00
Fabiano Fidêncio
f28e6e506f Merge pull request #2585 from Bevisy/main-2584
osbuilder: Change to "=" operator to make script more portable
2021-09-09 10:53:47 +02:00
Amulyam24
d865c80986 virtcontainers: add unit tests for container.go
Fixes: #268

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-09-09 13:09:38 +05:30
Binbin Zhang
71f915c63f sandbox: Add device permissions such as /dev/null to cgroup
adds the default devices for unix such as /dev/null, /dev/urandom to
the container's resource cgroup spec

Fixes: #2539

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-09 15:33:24 +08:00
Hui Zhu
2174fee48d docs: Add swap annotations introduction
Add swap annotations introduction of
`io.katacontainers.config.hypervisor.enable_guest_swap`,
`io.katacontainers.container.resource.swappiness"` and
`io.katacontainers.container.resource.swap_in_bytes"` to
how-to-set-sandbox-config-kata.md.

Fixes: #2326

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-09 15:28:51 +08:00
bin
2abc450a4d test: enable running tests under root user
Add tests that run under root user to test special cases.

Fixes: #2446

Signed-off-by: bin <bin@hyper.sh>
2021-09-09 14:21:34 +08:00
Binbin Zhang
924a68d08d osbuilder: Change to "=" operator to make script more portable
zsh doesn't support "==" as equal comparison operator, so
replace "==" with "=" to make the script more portable

Fixes: #2584

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-08 16:57:34 +08:00
Jianyong Wu
1fff9be707 qemu: remove default config for arm64.
The current default config in qemu for arm64 doesn't suit for qemu
version 5.1+, so remove them here.

Fixes: #2595
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-08 16:25:22 +08:00
Julio Montes
9bbaa66f39 Merge pull request #2480 from Bevisy/main
makefile: Fix error exit status code
2021-09-06 07:28:15 -05:00
Bin Liu
3c9170ea0d Merge pull request #2561 from rapiz1/patch-4
osbuilder: fix inconsistent calculation of fs size
2021-09-04 15:14:07 +08:00
Bin Liu
103fdd3f6c Merge pull request #2564 from Bevisy/main-2296
virtcontainers: Remove NewStoreFeature
2021-09-03 10:41:21 +08:00
James O. D. Hunt
c64867ad34 Merge pull request #2559 from dgibson/snapbranch
snap: Test variable instead of executing "branch"
2021-09-02 15:42:28 +01:00
James O. D. Hunt
f3a1bf3b45 Merge pull request #2552 from bergwolf/license
license: drop redundent license files
2021-09-02 14:31:18 +01:00
Binbin Zhang
e2a9e78c9e virtcontainers: Remove NewStoreFeature
remove NewStoreFeature

Fixes: #2296

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-09-02 21:28:36 +08:00
Yujia Qiao
bfcee91164 osbuilder: fix inconsistent calculation of fs size
This patch fixes inconsistent calculations of the rootfs size.
For `du` and `df`, `-B 1MB` is different from `-BM`. The
former is the power of 1000, and the latter is the power of
1024. So comparing them doesn't make sense. The bug may result
in a larger image than needed.

Fixes: #2560

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-09-02 16:00:29 +08:00
David Gibson
4996f9b7da snap: Test variable instead of executing "branch"
In snapcraft.yaml we have a case statement on $(branch) - that is on the
output of executing a command "branch".  From the selections it appears
that what it actually wants is to simply select on the contents of the
$branch variable, which should be ${branch} instead.

fixes #2558

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-02 14:38:15 +10:00
Julio Montes
716ab32acf Merge pull request #2551 from teawater/fix_swap
Fix swap fail insert fail issue
2021-09-01 08:25:27 -05:00
Fabiano Fidêncio
fee6f49d38 Merge pull request #2540 from fidencio/2.3.0-alpha0-branch-bump
# Kata Containers 2.3.0-alpha0
2021-09-01 09:13:52 +02:00
Peng Tao
256c3b2747 license: drop redundent license files
There is no need to keep multiple copies of the license file in
different directory. We can just use the top level one for the project.

Fixes: #2553
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-09-01 15:10:04 +08:00
Hui Zhu
bcc9fa3b35 hotplugAddBlockDevice: Use ExecuteBlockdevAddWithDriverCache with swap
Use ExecuteBlockdevAddWithDriverCache with swap in
hotplugAddBlockDevice to handle swap file cannot work OK with
ExecuteBlockdevAddWithCache issue.

Fixes: #2548

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-01 14:13:11 +08:00
Hui Zhu
bd85da0461 vendor: Update vendor/github.com/kata-containers/govmm
Update vendor/github.com/kata-containers/govmm for
ExecuteBlockdevAddWithDriverCache.

Fixes: #2548

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-09-01 13:59:19 +08:00
Fabiano Fidêncio
18c95b9ab1 release: Kata Containers 2.3.0-alpha0
- tracing: Change runtime tracing tags to vars
- shimv2: add logging to shimv2 api calls
- drop qemu-lite support
- runtime: delete types or const that no longer needed
- runtime: Optimize the way slice created
- virtcontainers: simplify tests
- virtcontainers: clh: Upgrade to the openapi-generator v5.2.1
- build_image: Fix error soft link about initrd.img
- ci: Temporarily skip agent shutdown test on s390x
- Fix version parsing for firecracker version 0.25 and over
- Osbuilder fixes
- docs: update the GoDoc url from runtime project to kata-containers/sr…
- docs: update `how-to` README file for Firecracker config
- ci/openshift-ci: Pull centos from registry.centos.org
- docs: update containerd CRI plugin url

2250360b docs: remove mentioning of qemu-lite
a9de761d runtime: drop qemu-lite support
8ae3edbc runtime: fix default hypervisor path
0c7789fa runtime: Add container field to logs
72e3538e shimv2: add information to method comment
8dadca9c shimv2: add logging to shimv2 api calls
a99fcc3a virtcontainers: simplify tests
39ffd8ee runtime: delete types or const that no longer needed
ff37f5c7 runtime: Optimize the way slice created
8f0f949a tracing: Move dynamically added attributes to Trace()
932ee41b virtcontainers: clh: Workaround incorrect default values
bff38e4f virtcontainers: clh: Fix the unit test
d967d3cb virtcontainers: clh: Use constructors to ensure proper default value
87de26bd tracing: Modify Trace() to accept multiple tag maps
8058e972 tracing: Change runtime tracing tags to vars
a6a2e525 virtcontainers: clh: Migrate to use the updated client APIs
9de1129b osbuilder: Fix rootfs-builder when running in VMs
65a1e131 osbuilder: Allow running the tool several times
a4214738 osbuilder: Fix Makefile
b8717f35 ci: Temporarily skip agent shutdown test on s390x
938981be build_image: Fix error soft link about initrd.img
2304f935 docs: update the GoDoc url from kata 1.x to 2.x
2a614577 docs: update `how-to` README file for Firecracker config
486baba7 docs: update containerd CRI plugin url
46eb07e1 virtcontainers: clh: Re-generate the client code
80fba4d6 virtcontainers: clh: Upgrade to the openapi-generator v5.2.1
8594f80c ci/openshift-ci: Pull centos from registry.centos.org
87bbae1b fc: fix version parsing for fc >= 0.25

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-08-31 18:43:09 +02:00
Peng Tao
c0daa4ebff Merge pull request #2513 from cmaf/tracing-tracingtags-consistency
tracing: Change runtime tracing tags to vars
2021-08-31 10:25:10 +08:00
Fabiano Fidêncio
67d1f4fd14 Merge pull request #2528 from snir911/main_debuggabillity_sq
shimv2: add logging to shimv2 api calls
2021-08-30 15:50:55 +02:00
Samuel Ortiz
87152fffcb Merge pull request #2530 from bergwolf/qemu-lite
drop qemu-lite support
2021-08-30 14:12:08 +02:00
Peng Tao
2250360b56 docs: remove mentioning of qemu-lite
vm-templating should just work with upstream qemu v4.1.0 or above.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-08-30 16:58:12 +08:00
Peng Tao
a9de761d71 runtime: drop qemu-lite support
As the project is not maintained and we have not been testing against it
for a long time.

Fixes: #2529
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-08-30 16:58:12 +08:00
Peng Tao
8ae3edbc18 runtime: fix default hypervisor path
Should not be qemu-lite.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-08-30 16:09:02 +08:00
Snir Sheriber
0c7789fad6 runtime: Add container field to logs
and unified field naming

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-08-30 10:09:05 +03:00
Snir Sheriber
72e3538e36 shimv2: add information to method comment
add a comment to explicitly mentioned method is a binary call

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-08-30 10:09:05 +03:00
Snir Sheriber
8dadca9cd1 shimv2: add logging to shimv2 api calls
and also fetch and log container id from the request

Fixes: #2527
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-08-30 10:09:05 +03:00
Bo Chen
b564dd47b6 Merge pull request #2526 from Bevisy/main-2285
runtime: delete types or const that no longer needed
2021-08-29 15:35:03 -07:00
Bin Liu
a89cc0bb5c Merge pull request #2524 from Bevisy/main-2264
runtime: Optimize the way slice created
2021-08-29 16:00:08 +08:00
Eric Ernst
8771d8c375 Merge pull request #2514 from rapiz1/improve-util-test
virtcontainers: simplify tests
2021-08-28 06:41:15 -07:00
Yujia Qiao
a99fcc3af1 virtcontainers: simplify tests
Simplify tests in utils_test.go by table-driven tests.

Fixes: #2281

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-08-28 12:35:25 +08:00
Binbin Zhang
39ffd8ee84 runtime: delete types or const that no longer needed
type: ProcessListOptions; ProcessList
const: SocketTypeVSOCK

Fixes: #2285

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-28 04:09:25 +00:00
Binbin Zhang
ff37f5c798 runtime: Optimize the way slice created
Initialize and assign a value, reducing one append operation

Fixes: #2264

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-28 04:15:59 +08:00
Carlos Venegas
fb583780f6 Merge pull request #2488 from likebreath/0823/clh_openapi_generator
virtcontainers: clh: Upgrade to the openapi-generator v5.2.1
2021-08-27 14:28:09 -05:00
Binbin Zhang
4751698829 virtcontainers: Fix incorrect scripts path
modify to the correct relative path

Fixes: #2515

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-27 19:16:53 +00:00
Chelsea Mafrica
8f0f949abf tracing: Move dynamically added attributes to Trace()
Where possible, move attributes added with AddTag() to Trace() call to
reduce the amount of code used for tracing.

Fixes #2512

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-08-27 08:26:40 -07:00
Bo Chen
932ee41b3f virtcontainers: clh: Workaround incorrect default values
Two default values defined in the 'cloud-hypervisor.yaml' have typo, and this
patch manually overwrites them with the correct value as a workaround
before the corresponding fix is landed to Cloud Hypervisor upstream.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-08-26 22:53:31 -07:00
Bo Chen
bff38e4f4d virtcontainers: clh: Fix the unit test
This patch fixes the unit tests over clh.go with the updated client code.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-08-26 22:53:17 -07:00
Bo Chen
d967d3cb37 virtcontainers: clh: Use constructors to ensure proper default value
With the updated openapi-generator, the client code now handles optional
attributes correctly, and ensures to assign the right default
values. This patch enables to use those constructors to make sure the
proper default values being used.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-08-26 22:53:13 -07:00
Chelsea Mafrica
87de26bda3 tracing: Modify Trace() to accept multiple tag maps
The general Trace() function accepts one map as a set of tags. Modify it
to accept multiple sets of tags so that additional ones can be added at
Trace() and not as a subsequent call.

Additionally, we should not iterate over the maps unless tracing tracing
is enabled.

Fixes #2512

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-08-26 15:55:32 -07:00
Chelsea Mafrica
8058e97212 tracing: Change runtime tracing tags to vars
Tracing tags are stored inconsistently throughout the runtime. Change
all instances of tracing tags to variables.

Fixes #2512

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-08-26 15:55:32 -07:00
Bo Chen
a6a2e525de virtcontainers: clh: Migrate to use the updated client APIs
The client code (and APIs) for Cloud Hypervisor has been changed
dramatically due to the upgrade to `openapi-generator` v5.2.1. This
patch migrate the Cloud Hypervisor driver in the kata-runtime to use
those updated APIs.

The main change from the client code is that it now uses "pointer" type
to represent "optional" attributes from the input openapi specification
file.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-08-26 14:04:18 -07:00
GabyCT
d490704133 Merge pull request #2504 from Bevisy/main-2503
build_image: Fix error soft link about initrd.img
2021-08-26 13:26:52 -05:00
GabyCT
a0dd840ef6 Merge pull request #2508 from Jakob-Naucke/s390x-skip-agent-shutdown-test
ci: Temporarily skip agent shutdown test on s390x
2021-08-26 13:25:48 -05:00
Chelsea Mafrica
0be91280f2 Merge pull request #2466 from Bl1tz23/main
Fix version parsing for firecracker version 0.25 and over
2021-08-26 08:51:18 -07:00
Marcel Apfelbaum
f62ea5dade Merge pull request #2511 from marcel-apf/osbuilder-fixes
Osbuilder fixes
2021-08-26 11:32:48 +03:00
Marcel Apfelbaum
9de1129bf7 osbuilder: Fix rootfs-builder when running in VMs
The script runs apt sync at some point which scans all possible fds
in order to close them. The operation is incredibly slow on VMs
and may lead to build timeouts.

Fix it by limiting the container runtime fds to a sane limit.

Fixes: #2510

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
2021-08-25 19:58:21 +00:00
Marcel Apfelbaum
65a1e13195 osbuilder: Allow running the tool several times
Once the ${ROOTFS_DIR} is created, the tool can't run the second
time since the directory is populated and the debootstrap tool
will fail.

Fix by deleting the contents of ${ROOTFS_DIR} if the directory exists.
Note that running make clean will also allow the re-run, it
is only an optimization for some cases the build fails in the middle.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
2021-08-25 19:49:14 +00:00
Marcel Apfelbaum
a4214738b9 osbuilder: Fix Makefile
Let the DISTRO variable to be set from outside,
allowing "sudo -E DISTRO=<ANY> make clean" to delete the correct files.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
2021-08-25 19:45:53 +00:00
GabyCT
a989238348 Merge pull request #2501 from YchauWang/wyc-godoc
docs: update the GoDoc url from runtime project to kata-containers/sr…
2021-08-25 10:49:26 -05:00
Jakob Naucke
b8717f35d7 ci: Temporarily skip agent shutdown test on s390x
see https://github.com/kata-containers/tests/issues/3878 for tracking

Fixes: #2507
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-08-25 16:34:38 +02:00
Binbin Zhang
938981be1d build_image: Fix error soft link about initrd.img
fix error soft link about initrd.img

Fixes #2503

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-25 16:00:55 +08:00
wangyongchao.bj
2304f935b4 docs: update the GoDoc url from kata 1.x to 2.x
the katatestutils GoDoc url stilled using the kata 1.x branch url. This PR fixed the
url from kata-containers/runtime/pkg/katatestutils to
kata-containers/kata-containers/src/runtime/pkg/katatestutils

Fixes: #2500

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-08-25 11:21:36 +08:00
Chelsea Mafrica
c5fea9ff70 Merge pull request #2493 from YchauWang/wyc-how-to-02
docs: update `how-to` README file for Firecracker config
2021-08-24 10:12:19 -07:00
Wainer Moschetta
f3539d2fb7 Merge pull request #2486 from wainersm/registry_centos
ci/openshift-ci: Pull centos from registry.centos.org
2021-08-24 11:41:57 -03:00
Samuel Ortiz
e26a140448 Merge pull request #2491 from YchauWang/wyc-how-to
docs: update containerd CRI plugin url
2021-08-24 11:37:18 +02:00
Yujia Qiao
814cea9601 virtcontainers: clean up useless code
Fixes: #2275

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-08-24 16:04:34 +08:00
wangyongchao.bj
2a614577fb docs: update how-to README file for Firecracker config
Remove the `Kata Containers with Firecracker` additional configuration steps.
From kata 2.x,  the config of `firecracker` is same to `qemu` and `cloud-hypervisor`.

Fixes: #2492

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-08-24 15:16:46 +08:00
wangyongchao.bj
486baba7fd docs: update containerd CRI plugin url
update cri plugin source path to containerd pkg in the
 how-to-use-k8s-with-cri-containerd-and-kata.md file. The cri project was moved to containerd project pkg directory.

Fixes: #2490

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-08-24 11:11:06 +08:00
Bo Chen
46eb07e14f virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor with the
updated `openapi-generator` v5.2.1.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-08-23 16:00:32 -07:00
Bo Chen
80fba4d637 virtcontainers: clh: Upgrade to the openapi-generator v5.2.1
To improve the quality and correctness of the auto-generated code, this
patch upgrade the `openapi-generator` to its latest stable release
v5.2.1.

Fixes: #2487

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-08-23 15:59:41 -07:00
Wainer dos Santos Moschetta
8594f80c0a ci/openshift-ci: Pull centos from registry.centos.org
In order to avoid hit the pull requests limit of docker.io, this changed the
openshift-ci/images/Dockerfile.buildroot dockerfile to pull the centos image
from registry.centos.org.

Fixes #1636

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-08-23 15:21:10 -03:00
Bl1tz23
87bbae1bd7 fc: fix version parsing for fc >= 0.25
Allows to use firecracker version >=0.25.

Fixes: #2471

Signed-off-by: Bl1tz23 <alex3angle@gmail.com>
2021-08-23 15:09:59 +03:00
Binbin Zhang
d422789fac makefile: Fix error exit status code
Generate `config-generated.go` file under src/runtime/cli/containerd-shim-kata-v2 before excuting test or coverage.

Fixes #2479

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-23 11:31:33 +08:00
657 changed files with 24679 additions and 52033 deletions

View File

@@ -1,6 +1,6 @@
name: kata-deploy-build
name: kata deploy build
on: push
on: [push, pull_request]
jobs:
build-asset:
@@ -9,6 +9,7 @@ jobs:
matrix:
asset:
- kernel
- kernel-experimental
- shim-v2
- qemu
- cloud-hypervisor
@@ -24,7 +25,7 @@ jobs:
- name: Build ${{ matrix.asset }}
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh --build="${KATA_ASSET}"
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
sudo cp -r --preserve=all "${build_dir}" "kata-build"
@@ -47,12 +48,21 @@ jobs:
uses: actions/download-artifact@v2
with:
name: kata-artifacts
path: kata-artifacts
path: build
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
make merge-builds
- name: store-artifacts
uses: actions/upload-artifact@v2
with:
name: kata-static-tarball
path: kata-static.tar.xz
make-kata-tarball:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: make kata-tarball
run: |
make kata-tarball
sudo make install-tarball

View File

@@ -100,10 +100,14 @@ jobs:
run: |
# tag the container image we created and push to DockerHub
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
docker tag katadocker/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} katadocker/kata-deploy:${tag}
docker tag quay.io/kata-containers/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} quay.io/kata-containers/kata-deploy:${tag}
docker push katadocker/kata-deploy:${tag}
docker push quay.io/kata-containers/kata-deploy:${tag}
tags=($tag)
tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable"))
for tag in ${tags[@]}; do \
docker tag katadocker/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} katadocker/kata-deploy:${tag} && \
docker tag quay.io/kata-containers/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} quay.io/kata-containers/kata-deploy:${tag} && \
docker push katadocker/kata-deploy:${tag} && \
docker push quay.io/kata-containers/kata-deploy:${tag}; \
done
upload-static-tarball:
needs: kata-deploy
@@ -127,3 +131,21 @@ jobs:
pushd $GITHUB_WORKSPACE
echo "uploading asset '${tarball}' for tag: ${tag}"
GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}"
popd
upload-cargo-vendored-tarball:
needs: upload-static-tarball
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: generate-and-upload-tarball
run: |
pushd $GITHUB_WORKSPACE/src/agent
cargo vendor >> .cargo/config
popd
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
tarball="kata-containers-$tag-vendor.tar.gz"
pushd $GITHUB_WORKSPACE
tar -cvzf "${tarball}" src/agent/.cargo/config src/agent/vendor
GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}"
popd

View File

@@ -12,8 +12,7 @@ on:
- reopened
- labeled
- unlabeled
pull_request:
branches:
branches:
- main
jobs:
@@ -32,8 +31,6 @@ jobs:
- name: Checkout code to allow hub to communicate with the project
uses: actions/checkout@v2
with:
token: ${{ secrets.KATA_GITHUB_ACTIONS_TOKEN }}
- name: Install porting checker script
run: |

View File

@@ -60,7 +60,7 @@ jobs:
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
env:
GOPATH: ${{ runner.workspace }}/kata-containers
- name: Building rust
- name: Installing rust
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh
@@ -84,3 +84,7 @@ jobs:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && make test
- name: Run Unit Tests As Root User
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && sudo -E PATH="$PATH" make test

View File

@@ -18,6 +18,7 @@ TOOLS += agent-ctl
STANDARD_TARGETS = build check clean install test vendor
include utils.mk
include ./tools/packaging/kata-deploy/local-build/Makefile
all: build
@@ -33,10 +34,4 @@ generate-protocols:
static-checks: build
bash ci/static-checks.sh
binary-tarball:
make -f ./tools/packaging/kata-deploy/local-build/Makefile
install-binary-tarball:
make -f ./tools/packaging/kata-deploy/local-build/Makefile install
.PHONY: all default static-checks binary-tarball install-binary-tarball

View File

@@ -1 +1 @@
2.2.0-rc0
2.3.0-alpha2

View File

@@ -4,6 +4,6 @@
#
# This is the build root image for Kata Containers on OpenShift CI.
#
FROM centos:8
FROM registry.centos.org/centos:8
RUN yum -y update && yum -y install git sudo wget

View File

@@ -8,10 +8,14 @@
set -e
cidir=$(dirname "$0")
source "${cidir}/lib.sh"
export CI_JOB="${CI_JOB:-}"
clone_tests_repo
pushd ${tests_repo_dir}
.ci/run.sh
tracing/test-agent-shutdown.sh
# temporary fix, see https://github.com/kata-containers/tests/issues/3878
if [ "$(uname -m)" != "s390x" ] && [ "$CI_JOB" == "CRI_CONTAINERD_K8S_MINIMAL" ]; then
tracing/test-agent-shutdown.sh
fi
popd

View File

@@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -40,6 +40,7 @@ Documents that help to understand and contribute to Kata Containers.
* [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
* [Kata Containers E2E Flow](design/end-to-end-flow.md): The entire end-to-end flow of Kata Containers
* [Kata Containers design](./design/README.md): More Kata Containers design documents
* [Kata Containers threat model](./threat-model/threat-model.md): Kata Containers threat model
### How to Contribute

View File

@@ -14,7 +14,7 @@ through the [CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and
Kata Containers creates a QEMU\*/KVM virtual machine for pod that `kubelet` (Kubernetes) creates respectively.
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/containerd-shim-v2)
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/cmd/containerd-shim-kata-v2/)
is the Kata Containers entrypoint, which
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
@@ -259,7 +259,7 @@ With `RuntimeClass`, users can define Kata Containers as a `RuntimeClass` and th
## DAX
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.txt)
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.rst?h=v5.14)
feature to efficiently map some host-side files into the guest VM space.
In particular, Kata Containers uses the QEMU NVDIMM feature to provide a
memory-mapped virtual device that can be used to DAX map the virtual machine's

View File

@@ -12,187 +12,244 @@ The OCI [runtime specification][linux-config] provides guidance on where the con
> [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups
> hierarchy for containers or to run a new process in an existing container
cgroups are hierarchical, and this can be seen with the following pod example:
Cgroups are hierarchical, and this can be seen with the following pod example:
- Pod 1: `cgroupsPath=/kubepods/pod1`
- Container 1:
`cgroupsPath=/kubepods/pod1/container1`
- Container 2:
`cgroupsPath=/kubepods/pod1/container2`
- Container 1: `cgroupsPath=/kubepods/pod1/container1`
- Container 2: `cgroupsPath=/kubepods/pod1/container2`
- Pod 2: `cgroupsPath=/kubepods/pod2`
- Container 1:
`cgroupsPath=/kubepods/pod2/container2`
- Container 2:
`cgroupsPath=/kubepods/pod2/container2`
- Container 1: `cgroupsPath=/kubepods/pod2/container2`
- Container 2: `cgroupsPath=/kubepods/pod2/container2`
Depending on the upper-level orchestrator, the cgroup under which the pod is placed is
managed by the orchestrator. In the case of Kubernetes, the pod-cgroup is created by Kubelet,
while the container cgroups are to be handled by the runtime. Kubelet will size the pod-cgroup
based on the container resource requirements.
Depending on the upper-level orchestration layers, the cgroup under which the pod is placed is
managed by the orchestrator or not. In the case of Kubernetes, the pod cgroup is created by Kubelet,
while the container cgroups are to be handled by the runtime.
Kubelet will size the pod cgroup based on the container resource requirements, to which it may add
a configured set of [pod resource overheads](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/).
Kata Containers introduces a non-negligible overhead for running a sandbox (pod). Based on this, two scenarios are possible:
1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod-cgroup, or
2) Kata Containers do not fully constrain the VMM and associated processes, instead placing a subset of them outside of the pod-cgroup.
Kata Containers introduces a non-negligible resource overhead for running a sandbox (pod). Typically, the Kata shim,
through its underlying VMM invocation, will create many additional threads compared to process based container runtimes:
the para-virtualized I/O back-ends, the VMM instance or even the Kata shim process, all of those host processes consume
memory and CPU time not directly tied to the container workload, and introduces a sandbox resource overhead.
In order for a Kata workload to run without significant performance degradation, its sandbox overhead must be
provisioned accordingly. Two scenarios are possible:
Kata Containers provides two options for how cgroups are handled on the host. Selection of these options is done through
the `SandboxCgroupOnly` flag within the Kata Containers [configuration](../../src/runtime/README.md#configuration)
file.
1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod cgroup.
For example, Kubernetes [`PodOverhead`](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/)
feature lets the orchestrator add a configured sandbox overhead to the sum of all its containers resources. In
that case, the pod sandbox is properly sized and all Kata created processes will run under the pod cgroup
defined constraints and limits.
2) The upper-layer orchestrator does **not** take the sandbox overhead into account and the pod cgroup is not
sized to properly run all Kata created processes. With that scenario, attaching all the Kata processes to the sandbox
cgroup may lead to non-negligible workload performance degradations. As a consequence, Kata Containers will move
all processes but the vCPU threads into a dedicated overhead cgroup under `/kata_overhead`. The Kata runtime will
not apply any constraints or limits to that cgroup, it is up to the infrastructure owner to optionally set it up.
## `SandboxCgroupOnly` enabled
Those 2 scenarios are not dynamically detected by the Kata Containers runtime implementation, and thus the
infrastructure owner must configure the runtime according to how the upper-layer orchestrator creates and sizes the
pod cgroup. That configuration selection is done through the `sandbox_cgroup_only` flag within the Kata Containers
[configuration](../../src/runtime/README.md#configuration) file.
With `SandboxCgroupOnly` enabled, it is expected that the parent cgroup is sized to take the overhead of running
a sandbox into account. This is ideal, as all the applicable Kata Containers components can be placed within the
given cgroup-path.
## `sandbox_cgroup_only = true`
In the context of Kubernetes, Kubelet will size the pod-cgroup to take the overhead of running a Kata-based sandbox
into account. This will be feasible in the 1.16 Kubernetes release through the `PodOverhead` feature.
Setting `sandbox_cgroup_only` to `true` from the Kata Containers configuration file means that the pod cgroup is
properly sized and takes the pod overhead into account. This is ideal, as all the applicable Kata Containers processes
can simply be placed within the given cgroup path.
In the context of Kubernetes, Kubelet can size the pod cgroup to take the overhead of running a Kata-based sandbox
into account. This has been supported since the 1.16 Kubernetes release, through the
[`PodOverhead`](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/) feature.
```
+----------------------------------------------------------+
| +---------------------------------------------------+ |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | | kata-shimv2, VMM and threads: | | | |
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
| | | | | | | |
| | | | kata_<sandbox-id> | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 1 | | |
| | +---------------------------------------------+ | |
| | | |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | | kata-shimv2, VMM and threads: | | | |
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
| | | | | | | |
| | | | kata_<sandbox-id> | | | |
| | | +--------------------------------------+ | | |
| | |Pod 2 | | |
| | +---------------------------------------------+ | |
| |kubepods | |
| +---------------------------------------------------+ |
| |
|Node |
+----------------------------------------------------------+
┌─────────────────────────────────────────┐
│ │
┌──────────────────────────────────┐ │
│ │
│ ┌─────────────────────────────┐ │ │
│ │ │ │ │
│ │ ┌─────────────────────┐
│ │ │ vCPU threads
│ │ │ I/O threads │ │ │ │
│ │ │ │ VMM
│ │ │ Kata Shim
│ │ │ │ │ │ │
│ │ │ /kata_<sandbox_id>
│ │ │ └─────────────────────┘ │ │
│ │Pod 1 │ │ │
│ └─────────────────────────────┘ │ │
│ │
│ │ ┌─────────────────────────────┐
│ │ │ │
│ │ ┌─────────────────────┐ │ │ │
│ │ │ vCPU threads
│ │ │ │ I/O threads │ │ │
│ │ │ VMM
│ │ │ │ Kata Shim │ │
│ │ │
│ │ │ │ /kata_<sandbox_id> │
│ │ │ └─────────────────────┘ │ │ │
│ │ │Pod 2 │ │ │
│ │ └─────────────────────────────┘ │ │
│ │ │ │
│ │/kubepods │ │
│ └──────────────────────────────────┘ │
│ │
│ Node │
└─────────────────────────────────────────┘
```
### What does Kata do in this configuration?
1. Given a `PodSandbox` container creation, let:
### Implementation details
```
podCgroup=Parent(container.CgroupsPath)
KataSandboxCgroup=<podCgroup>/kata_<PodSandboxID>
```
When `sandbox_cgroup_only` is enabled, the Kata shim will create a per pod
sub-cgroup under the pod's dedicated cgroup. For example, in the Kubernetes context,
it will create a `/kata_<PodSandboxID>` under the `/kubepods` cgroup hierarchy.
On a typical cgroup v1 hierarchy mounted under `/sys/fs/cgroup/`, the memory cgroup
subsystem for a pod with sandbox ID `12345678` would live under
`/sys/fs/cgroup/memory/kubepods/kata_12345678`.
2. Create the cgroup, `KataSandboxCgroup`
In most cases, the `/kata_<PodSandboxID>` created cgroup is unrestricted and inherits and shares all
constraints and limits from the parent cgroup (`/kubepods` in the Kubernetes case). The exception is
for the `cpuset` and `devices` cgroup subsystems, which are managed by the Kata shim.
3. Join the `KataSandboxCgroup`
After creating the `/kata_<PodSandboxID>` cgroup, the Kata Containers shim will move itself to it, **before** starting
the virtual machine. As a consequence all processes subsequently created by the Kata Containers shim (the VMM itself, and
all vCPU and I/O related threads) will be created in the `/kata_<PodSandboxID>` cgroup.
Any process created by the runtime will be created in `KataSandboxCgroup`.
The runtime will limit the cgroup in the host only if the sandbox doesn't have a
container type annotation, but the caller is free to set the proper limits for the `podCgroup`.
### Why create a kata-cgroup under the parent cgroup?
In the example above the pod cgroups are `/kubepods/pod1` and `/kubepods/pod2`.
Kata creates the unrestricted sandbox cgroup under the pod cgroup.
And why not directly adding the per sandbox shim directly to the pod cgroup (e.g.
`/kubepods` in the Kubernetes context)?
### Why create a Kata-cgroup under the parent cgroup?
The Kata Containers shim implementation creates a per-sandbox cgroup
(`/kata_<PodSandboxID>`) to support the `Docker` use case. Although `Docker` does not
have a notion of pods, Kata Containers still creates a sandbox to support the pod-less,
single container use case that `Docker` implements. Since `Docker` does create any
cgroup hierarchy to place a container into, it would be very complex for Kata to map
a particular container to its sandbox without placing it under a `/kata_<containerID>>`
sub-cgroup first.
`Docker` does not have a notion of pods, and will not create a cgroup directory
to place a particular container in (i.e., all containers would be in a path like
`/docker/container-id`. To simplify the implementation and continue to support `Docker`,
Kata Containers creates the sandbox-cgroup, in the case of Kubernetes, or a container cgroup, in the case
of docker.
### Advantages
### Improvements
Keeping all Kata Containers processes under a properly sized pod cgroup is ideal
and makes for a simpler Kata Containers implementation. It also helps with gathering
accurate statistics and preventing Kata workloads from being noisy neighbors.
- Get statistics about pod resources
#### Pod resources statistics
If the Kata caller wants to know the resource usage on the host it can get
statistics from the pod cgroup. All cgroups stats in the hierarchy will include
the Kata overhead. This gives the possibility of gathering usage-statics at the
pod level and the container level.
- Better host resource isolation
#### Better host resource isolation
Because the Kata runtime will place all the Kata processes in the pod cgroup,
the resource limits that the caller applies to the pod cgroup will affect all
processes that belong to the Kata sandbox in the host. This will improve the
isolation in the host preventing Kata to become a noisy neighbor.
## `SandboxCgroupOnly` disabled (default, legacy)
## `sandbox_cgroup_only = false` (Default setting)
If the cgroup provided to Kata is not sized appropriately, Kata components will
consume resources that the actual container workloads expect to see and use.
This can cause instability and performance degradations.
To avoid that situation, Kata Containers creates an unconstrained overhead
cgroup and moves all non workload related processes (Anything but the virtual CPU
threads) to it. The name of this overhead cgroup is `/kata_overhead` and a per
sandbox sub cgroup will be created under it for each sandbox Kata Containers creates.
Kata Containers does not add any constraints or limitations on the overhead cgroup. It is up to the infrastructure
owner to either:
- Provision nodes with a pre-sized `/kata_overhead` cgroup. Kata Containers will
load that existing cgroup and move all non workload related processes to it.
- Let Kata Containers create the `/kata_overhead` cgroup, leave it
unconstrained or resize it a-posteriori.
If the cgroup provided to Kata is not sized appropriately, instability will be
introduced when fully constraining Kata components, and the user-workload will
see a subset of resources that were requested. Based on this, the default
handling for Kata Containers is to not fully constrain the VMM and Kata
components on the host.
```
+----------------------------------------------------------+
| +---------------------------------------------------+ |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | |Container 1 |-|Container 2 | | | |
| | | | |-| | | | |
| | | | Shim+container1 |-| Shim+container2 | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 1 | | |
| | +---------------------------------------------+ | |
| | | |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | |Container 1 |-|Container 2 | | | |
| | | | |-| | | | |
| | | | Shim+container1 |-| Shim+container2 | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 2 | | |
| | +---------------------------------------------+ | |
| |kubepods | |
| +---------------------------------------------------+ |
| +---------------------------------------------------+ |
| | Hypervisor | |
| |Kata | |
| +---------------------------------------------------+ |
| |
|Node |
+----------------------------------------------------------+
┌────────────────────────────────────────────────────────────────────┐
│ ┌─────────────────────────────┐ ┌───────────────────────────┐ │
│ │ │ │
┌─────────────────────────┼────┼─────────────────────────┐ │ │
│ │ │ │ │
│ ┌─────────────────────┐ │ │ ┌─────────────────────┐ │ │ │
│ │ vCPU threads │ │ │ │ VMM │ │ │ │
│ │ │ │ │ │ │ │ I/O threads │ │ │ │
│ │ │ │ │ │ │ │ Kata Shim │ │ │ │
│ │ │ │ │ │ │ │ │ │
│ │ /kata_<sandbox_id> │ │ │ │ /<sandbox_id> │ │ │ │
│ └─────────────────────┘ │ │ └─────────────────────┘ │ │ │
│ │ │ │ │
│ Pod 1 │ │ │ │ │
└─────────────────────────┼────┼─────────────────────────┘ │ │
│ │ │ │
│ │ │ │
┌─────────────────────────┼────┼─────────────────────────┐ │ │
│ │ │ │ │
│ ┌─────────────────────┐ │ │ ┌─────────────────────┐ │ │ │
│ │ vCPU threads │ │ │ │ VMM │ │ │ │
│ │ │ │ │ │ I/O threads │ │ │ │
│ │ │ │ │ │ Kata Shim │ │ │ │
│ │ │ │ │ │ │ │ │ │
│ │ /kata_<sandbox_id> │ │ │ │ /<sandbox_id> │ │ │ │
│ └─────────────────────┘ │ │ └─────────────────────┘ │ │ │
│ │ │ │ │ │
Pod 2 │ │ │
│ │ └─────────────────────────┼────┼─────────────────────────┘ │ │
│ │ │ │ │ │
│ │ /kubepods │ │ /kata_overhead │ │
│ └─────────────────────────────┘ └───────────────────────────┘ │
│ │
│ │
│ Node │
└────────────────────────────────────────────────────────────────────┘
```
### What does this method do?
### Implementation Details
1. Given a container creation let `containerCgroupHost=container.CgroupsPath`
1. Rename `containerCgroupHost` path to add `kata_`
1. Let `PodCgroupPath=PodSanboxContainerCgroup` where `PodSanboxContainerCgroup` is the cgroup of a container of type `PodSandbox`
1. Limit the `PodCgroupPath` with the sum of all the container limits in the Sandbox
1. Move only vCPU threads of hypervisor to `PodCgroupPath`
1. Per each container, move its `kata-shim` to its own `containerCgroupHost`
1. Move hypervisor and applicable threads to memory cgroup `/kata`
When `sandbox_cgroup_only` is disabled, the Kata Containers shim will create a per pod
sub-cgroup under the pods dedicated cgroup, and another one under the overhead cgroup.
For example, in the Kubernetes context, it will create a `/kata_<PodSandboxID>` under
the `/kubepods` cgroup hierarchy, and a `/<PodSandboxID>` under the `/kata_overhead` one.
_Note_: the Kata Containers runtime will not add all the hypervisor threads to
the cgroup path requested, only vCPUs. These threads are run unconstrained.
On a typical cgroup v1 hierarchy mounted under `/sys/fs/cgroup/`, for a pod which sandbox
ID is `12345678`, create with `sandbox_cgroup_only` disabled, the 2 memory subsystems
for the sandbox cgroup and the overhead cgroup would respectively live under
`/sys/fs/cgroup/memory/kubepods/kata_12345678` and `/sys/fs/cgroup/memory/kata_overhead/12345678`.
This mitigates the risk of the VMM and other threads receiving an out of memory scenario (`OOM`).
Unlike when `sandbox_cgroup_only` is enabled, the Kata Containers shim will move itself
to the overhead cgroup first, and then move the vCPU threads to the sandbox cgroup as
they're created. All Kata processes and threads will run under the overhead cgroup except for
the vCPU threads.
With `sandbox_cgroup_only` disabled, Kata Containers assumes the pod cgroup is only sized
to accommodate for the actual container workloads processes. For Kata, this maps
to the VMM created virtual CPU threads and so they are the only ones running under the pod
cgroup. This mitigates the risk of the VMM, the Kata shim and the I/O threads going through
a catastrophic out of memory scenario (`OOM`).
#### Impact
#### Pros and Cons
If resources are reserved at a system level to account for the overheads of
running sandbox containers, this configuration can be utilized with adequate
stability. In this scenario, non-negligible amounts of CPU and memory will be
utilized unaccounted for on the host.
Running all non vCPU threads under an unconstrained overhead cgroup could lead to workloads
potentially consuming a large amount of host resources.
On the other hand, running all non vCPU threads under a dedicated overhead cgroup can provide
accurate metrics on the actual Kata Container pod overhead, allowing for tuning the overhead
cgroup size and constraints accordingly.
[linux-config]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md
[cgroupspath]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
# Supported cgroups
Kata Containers supports cgroups `v1` and `v2`. In the following sections each cgroup is
described briefly and what changes are needed in Kata Containers to support it.
Kata Containers currently only supports cgroups `v1`.
In the following sections each cgroup is described briefly.
## Cgroups V1
@@ -244,7 +301,7 @@ diagram:
A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file,
or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file.
Kata Containers supports `v1` by default and no change in the configuration file is needed.
Kata Containers only supports `v1`.
To know more about `cgroups v1`, see [cgroupsv1(7)][2].
## Cgroups V2
@@ -297,22 +354,13 @@ Same as `cgroups v1`, a process can join the cgroup by writing its process id (`
`cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to
`cgroup.threads` file.
For backwards compatibility Kata Containers defaults to supporting cgroups v1 by default.
To change this to `v2`, set `sandbox_cgroup_only=true` in the `configuration.toml` file.
To know more about `cgroups v2`, see [cgroupsv2(7)][3].
Kata Containers does not support cgroups `v2` on the host.
### Distro Support
Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition.
For more information about the status of this feature see [issue #2494][4].
# Summary
| cgroup option | default? | status | pros | cons | cgroups
|-|-|-|-|-|-|
| `SandboxCgroupOnly=false` | yes | legacy | Easiest to make Kata work | Unaccounted for memory and resource utilization | v1
| `SandboxCgroupOnly=true` | no | recommended | Complete tracking of Kata memory and CPU utilization. In Kubernetes, the Kubelet can fully constrain Kata via the pod cgroup | Requires upper layer orchestrator which sizes sandbox cgroup appropriately | v1, v2
[1]: http://man7.org/linux/man-pages/man5/tmpfs.5.html
[2]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_1

View File

@@ -17,10 +17,9 @@
- `firecracker`
- `ACRN`
While `qemu` and `cloud-hypervisor` work out of the box with installation of Kata,
some additional configuration is needed in case of `firecracker` and `ACRN`.
While `qemu` , `cloud-hypervisor` and `firecracker` work out of the box with installation of Kata,
some additional configuration is needed in case of `ACRN`.
Refer to the following guides for additional configuration steps:
- [Kata Containers with Firecracker](https://github.com/kata-containers/documentation/wiki/Initial-release-of-Kata-Containers-with-Firecracker-support)
- [Kata Containers with ACRN Hypervisor](how-to-use-kata-containers-with-acrn.md)
## Advanced Topics
@@ -35,3 +34,5 @@
- [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
- [How to monitor Kata Containers in K8s](how-to-set-prometheus-in-k8s.md)
- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)
- [How to setup swap devices in guest kernel](how-to-setup-swap-devices-in-guest-kernel.md)
- [How to run rootless vmm](how-to-run-rootless-vmm.md)

View File

@@ -39,7 +39,7 @@ use `RuntimeClass` instead of the deprecated annotations.
### Containerd Runtime V2 API: Shim V2 API
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/containerd-shim-v2)
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/cmd/containerd-shim-kata-v2/)
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1`
shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy`

View File

@@ -0,0 +1,33 @@
## Introduction
To improve security, Kata Container supports running the VMM process (currently only QEMU) as a non-`root` user.
This document describes how to enable the rootless VMM mode and its limitations.
## Pre-requisites
The permission and ownership of the `kvm` device node (`/dev/kvm`) need to be configured to:
```
$ crw-rw---- 1 root kvm
```
use the following commands:
```
$ sudo groupadd kvm -r
$ sudo chown root:kvm /dev/kvm
$ sudo chmod 660 /dev/kvm
```
## Configure rootless VMM
By default, the VMM process still runs as the root user. There are two ways to enable rootless VMM:
1. Set the `rootless` flag to `true` in the hypervisor section of `configuration.toml`.
2. Set the Kubernetes annotation `io.katacontainers.hypervisor.rootless` to `true`.
## Implementation details
When `rootless` flag is enabled, upon a request to create a Pod, Kata Containers runtime creates a random user and group (e.g. `kata-123`), and uses them to start the hypervisor process.
The `kvm` group is also given to the hypervisor process as a supplemental group to give the hypervisor process access to the `/dev/kvm` device.
Another necessary change is to move the hypervisor runtime files (e.g. `vhost-fs.sock`, `qmp.sock`) to a directory (under `/run/user/[uid]/`) where only the non-root hypervisor has access to.
## Limitations
1. Only the VMM process is running as a non-root user. Other processes such as Kata Container shimv2 and `virtiofsd` still run as the root user.
2. Currently, this feature is only supported in QEMU. Still need to bring it to Firecracker and Cloud Hypervisor (see https://github.com/kata-containers/kata-containers/issues/2567).
3. Certain features will not work when rootless VMM is enabled, including:
1. Passing devices to the guest (`virtio-blk`, `virtio-scsi`) will not work if the non-privileged user does not have permission to access it (leading to a permission denied error). A more permissive permission (e.g. 666) may overcome this issue. However, you need to be aware of the potential security implications of reducing the security on such devices.
2. `vfio` device will also not work because of permission denied error.

View File

@@ -91,6 +91,13 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` |
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
| `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest |
## Container Options
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.container.resource.swappiness"` | `uint64` | specify the `Resources.Memory.Swappiness` |
| `io.katacontainers.container.resource.swap_in_bytes"` | `uint64` | specify the `Resources.Memory.Swap` |
# CRI-O Configuration
@@ -100,11 +107,12 @@ In case of CRI-O, all annotations specified in the pod spec are passed down to K
For containerd, annotations specified in the pod spec are passed down to Kata
starting with version `1.3.0` of containerd. Additionally, extra configuration is
needed for containerd, by providing a `pod_annotations` field in the containerd config
file. The `pod_annotations` field is a list of annotations that can be passed down to
Kata as OCI annotations. It supports golang match patterns. Since annotations supported
by Kata follow the pattern `io.katacontainers.*`, the following configuration would work
for passing annotations to Kata from containerd:
needed for containerd, by providing `pod_annotations` field and
`container_annotations` field in the containerd config
file. The `pod_annotations` field and `container_annotations` field are two lists of
annotations that can be passed down to Kata as OCI annotations. They support golang match
patterns. Since annotations supported by Kata follow the pattern `io.katacontainers.*`,
the following configuration would work for passing annotations to Kata from containerd:
```
$ cat /etc/containerd/config
@@ -113,6 +121,7 @@ $ cat /etc/containerd/config
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
pod_annotations = ["io.katacontainers.*"]
container_annotations = ["io.katacontainers.*"]
....
```

View File

@@ -0,0 +1,59 @@
# Setup swap device in guest kernel
## Introduction
Setup swap device in guest kernel can help to increase memory capacity, handle some memory issues and increase file access speed sometimes.
Kata Containers can insert a raw file to the guest as the swap device.
## Requisites
The swap config of the containers should be set by [annotations](how-to-set-sandbox-config-kata.md#container-options). So [extra configuration is needed for containerd](how-to-set-sandbox-config-kata.md#containerd-configuration).
Kata Containers just supports setup swap device in guest kernel with QEMU.
Install and setup Kata Containers as shown [here](../install/README.md).
Enable setup swap device in guest kernel as follows:
```
$ sudo sed -i -e 's/^#enable_guest_swap.*$/enable_guest_swap = true/g' /etc/kata-containers/configuration.toml
```
## Run a Kata Container utilizing swap device
Use following command to start a Kata Container with swappiness 60 and 1GB swap device (swap_in_bytes - memory_limit_in_bytes).
```
$ pod_yaml=pod.yaml
$ container_yaml=container.yaml
$ image="quay.io/prometheus/busybox:latest"
$ cat << EOF > "${pod_yaml}"
metadata:
name: busybox-sandbox1
EOF
$ cat << EOF > "${container_yaml}"
metadata:
name: busybox-test-swap
annotations:
io.katacontainers.container.resource.swappiness: "60"
io.katacontainers.container.resource.swap_in_bytes: "2147483648"
linux:
resources:
memory_limit_in_bytes: 1073741824
image:
image: "$image"
command:
- top
EOF
$ sudo crictl pull $image
$ podid=$(sudo crictl runp $pod_yaml)
$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
$ sudo crictl start $cid
```
Kata Container setups swap device for this container only when `io.katacontainers.container.resource.swappiness` is set.
The following table shows the swap size how to decide if `io.katacontainers.container.resource.swappiness` is set.
|`io.katacontainers.container.resource.swap_in_bytes`|`memory_limit_in_bytes`|swap size|
|---|---|---|
|set|set| `io.katacontainers.container.resource.swap_in_bytes` - `memory_limit_in_bytes`|
|not set|set| `memory_limit_in_bytes`|
|not set|not set| `io.katacontainers.config.hypervisor.default_memory`|
|set|not set|cgroup doesn't support this usage|

View File

@@ -3,7 +3,7 @@
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
The Kubernetes cluster will use the
[CRI containerd plugin](https://github.com/containerd/cri) and
[CRI containerd plugin](https://github.com/containerd/containerd/tree/main/pkg/cri) and
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
## Requirements

View File

@@ -47,7 +47,7 @@ and can be classified as potentially misunderstood behaviors rather than vulnera
VM templating can be enabled by changing your Kata Containers config file (`/usr/share/defaults/kata-containers/configuration.toml`,
overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
- `qemu-lite` is specified in `hypervisor.qemu`->`path` section
- `qemu` version `v4.1.0` or above is specified in `hypervisor.qemu`->`path` section
- `enable_template = true`
- `initrd =` is set
- `image =` option is commented out or removed

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 150 KiB

View File

@@ -0,0 +1,137 @@
# Kata Containers threat model
This document discusses threat models associated with the Kata Containers project.
Kata was designed to provide additional isolation of container workloads, protecting
the host infrastructure from potentially malicious container users or workloads. Since
Kata Containers adds a level of isolation on top of traditional containers, the focus
is on the additional layer provided, not on traditional container security.
This document provides a brief background on containers and layered security, describes
the interface to Kata from CRI runtimes, a review of utilized virtual machine interfaces, and then
a review of threats.
## Kata security objective
Kata seeks to prevent an untrusted container workload or user of that container workload to gain
control of, obtain information from, or tamper with the host infrastructure.
In our scenario, an asset is anything on the host system, or elsewhere in the cluster
infrastructure. The attacker is assumed to be either a malicious user or the workload itself
running within the container. The goal of Kata is to prevent attacks which would allow
any access to the defined assets.
## Background on containers, layered security
Traditional containers leverage several key Linux kernel features to provide isolation and
a view that the container workload is the only entity running on the host. Key features include
`Namespaces`, `cgroups`, `capablities`, `SELinux` and `seccomp`. The canonical runtime for creating such
a container is `runc`. In the remainder of the document, the term `traditional-container` will be used
to describe a container workload created by runc.
Kata Containers provides a second layer of isolation on top of those provided by traditional-containers.
The hardware virtualization interface is the basis of this additional layer. Kata launches a lightweight
virtual machine, and uses the guests Linux kernel to create a container workload, or workloads in the case
of multi-container pods. In Kubernetes and in the Kata implementation, the sandbox is carried out at the
pod level. In Kata, this sandbox is created using a virtual machine.
## Interface to Kata Containers: CRI, v2-shim, OCI
A typical Kata Containers deployment uses Kubernetes with a CRI implementation.
On every node, Kubelet will interact with a CRI implementor, which will in turn interface with
an OCI based runtime, such as Kata Containers. Typical CRI implementors are `cri-o` and `containerd`.
The CRI API, as defined at the Kubernetes [CRI-API repo](https://github.com/kubernetes/cri-api/),
results in a few constructs being supported by the CRI implementation, and ultimately in the OCI
runtime creating the workloads.
In order to run a container inside of the Kata sandbox, several virtual machine devices and interfaces
are required. Kata translates sandbox and container definitions to underlying virtualization technologies provided
by a set of virtual machine monitors (VMMs) and hypervisors. These devices and their underlying
implementations are discussed in detail in the following section.
## Interface to the Kata sandbox/virtual machine
In case of Kata, today the devices which we need in the guest are:
- Storage: In the current design of Kata Containers, we are reliant on the CRI implementor to
assist in image handling and volume management on the host. As a result, we need to support a way of passing to the sandbox the container rootfs, volumes requested
by the workload, and any other volumes created to facilitate sharing of secrets and `configmaps` with the containers. Depending on how these are managed, a block based device or file-system
sharing is required. Kata Containers does this by way of `virtio-blk` and/or `virtio-fs`.
- Networking: A method for enabling network connectivity with the workload is required. Typically this will be done providing a `TAP` device
to the VMM, and this will be exposed to the guest as a `virtio-net` device. It is feasible to pass in a NIC device directly, in which case `VFIO` is leveraged
and the device itself will be exposed to the guest.
- Control: In order to interact with the guest agent and retrieve `STDIO` from containers, a medium of communication is required.
This is available via `virtio-vsock`.
- Devices: `VFIO` is utilized when devices are passed directly to the virtual machine and exposed to the container.
- Dynamic Resource Management: `ACPI` is utilized to allow for dynamic VM resource management (for example: CPU, memory, device hotplug). This is required when containers are resized,
or more generally when containers are added to a pod.
How these devices are utilized varies depending on the VMM utilized. We clarify the default settings provided when integrating Kata
with the QEMU, Firecracker and Cloud Hypervisor VMMs in the following sections.
### Devices
Each virtio device is implemented by a backend, which may execute within userspace on the host (vhost-user), the VMM itself, or within the host kernel (vhost). While it may provide enhanced performance,
vhost devices are often seen as higher risk since an exploit would be already running within the kernel space. While VMM and vhost-user are both in userspace on the host, `vhost-user` generally allows for the back-end process to require less system calls and capabilities compared to a full VMM.
#### `virtio-blk` and `virtio-scsi`
The backend for `virtio-blk` and `virtio-scsi` are based in the VMM itself (ring3 in the context of x86) by default for Cloud Hypervisor, Firecracker and QEMU.
While `vhost` based back-ends are available for QEMU, it is not recommended. `vhost-user` back-ends are being added for Cloud Hypervisor, they are not utilized in Kata today.
#### `virtio-fs`
`virtio-fs` is supported in Cloud Hypervisor and QEMU. `virtio-fs`'s interaction with the host filesystem is done through a vhost-user daemon, `virtiofsd`.
The `virtio-fs` client, running in the guest, will generate requests to access files. `virtiofsd` will receive requests, open the file, and request the VMM
to `mmap` it into the guest. When DAX is utilized, the guest will access the host's page cache, avoiding the need for copy and duplication. DAX is still an experimental feature,
and is not enabled by default.
From the `virtiofsd` [documentation](https://qemu-project.gitlab.io/qemu/tools/virtiofsd.html):
```This program must be run as the root user. Upon startup the program will switch into a new file system namespace with the shared directory tree as its root. This prevents “file system escapes” due to symlinks and other file system objects that might lead to files outside the shared directory. The program also sandboxes itself using seccomp(2) to prevent ptrace(2) and other vectors that could allow an attacker to compromise the system after gaining control of the virtiofsd process.```
DAX-less support for `virtio-fs` is available as of the 5.4 Linux kernel. QEMU VMM supports virtio-fs as of v4.2. Cloud Hypervisor
supports `virtio-fs`.
#### `virtio-net`
`virtio-net` has many options, depending on the VMM and Kata configurations.
##### QEMU networking
While QEMU has options for `vhost`, `virtio-net` and `vhost-user`, the `virtio-net` backend
for Kata defaults to `vhost-net` for performance reasons. The default configuration is being
reevaluated.
##### Firecracker networking
For Firecracker, the `virtio-net` backend is within Firecracker's VMM.
##### Cloud Hypervisor networking
For Cloud Hypervisor, the current backend default is within the VMM. `vhost-user-net` support
is being added (written in rust, Cloud Hypervisor specific).
#### virtio-vsock
##### QEMU vsock
In QEMU, vsock is backed by `vhost_vsock`, which runs within the kernel itself.
##### Firecracker and Cloud Hypervisor
In Firecracker and Cloud Hypervisor, vsock is backed by a unix-domain-socket in the hosts userspace.
#### VFIO
Utilizing VFIO, devices can be passed through to the virtual machine. We will assess this separately. Exposure to
host is limited to gaps in device pass-through handling. This is supported in QEMU and Cloud Hypervisor, but not
Firecracker.
#### ACPI
ACPI is necessary for hotplug of CPU, memory and devices. ACPI is available in QEMU and Cloud Hypervisor. Device, CPU and memory hotplug
are not available in Firecracker.
## Devices and threat model
![Threat model](threat-model-boundaries.svg "threat-model")

View File

@@ -67,7 +67,7 @@ To use large BARs devices (for example, Nvidia Tesla P100), you need Kata versio
The following configuration in the Kata `configuration.toml` file as shown below can work:
Hotplug for PCI devices by `shpchp` (Linux's SHPC PCI Hotplug driver):
Hotplug for PCI devices by `acpi_pcihp` (Linux's ACPI PCI Hotplug driver):
```
machine_type = "q35"
@@ -91,7 +91,6 @@ The following kernel config options need to be enabled:
```
# Support PCI/PCIe device hotplug (Required for large BARs device)
CONFIG_HOTPLUG_PCI_PCIE=y
CONFIG_HOTPLUG_PCI_SHPC=y
# Support for loading modules (Required for load Nvidia drivers)
CONFIG_MODULES=y

View File

@@ -299,13 +299,13 @@ parts:
| xargs ./configure
# Copy QEMU configurations (Kconfigs)
case "$(branch)" in
case "${branch}" in
"v5.1.0")
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs
;;
*)
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* configs/devices/
;;
esac

19
src/agent/Cargo.lock generated
View File

@@ -544,6 +544,7 @@ dependencies = [
"rustjail",
"scan_fmt",
"scopeguard",
"serde",
"serde_json",
"slog",
"slog-scope",
@@ -552,6 +553,7 @@ dependencies = [
"thiserror",
"tokio",
"tokio-vsock",
"toml",
"tracing",
"tracing-opentelemetry",
"tracing-subscriber",
@@ -1323,18 +1325,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.126"
version = "1.0.129"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
checksum = "d1f72836d2aa753853178eda473a3b9d8e4eefdaf20523b919677e6de489f8f1"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.126"
version = "1.0.129"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
checksum = "e57ae87ad533d9a56427558b516d0adac283614e347abf85b0dc0cbbf0a249f3"
dependencies = [
"proc-macro2 1.0.26",
"quote 1.0.9",
@@ -1618,6 +1620,15 @@ dependencies = [
"vsock",
]
[[package]]
name = "toml"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
dependencies = [
"serde",
]
[[package]]
name = "tracing"
version = "0.1.26"

View File

@@ -58,6 +58,10 @@ tracing-opentelemetry = "0.13.0"
opentelemetry = { version = "0.14.0", features = ["rt-tokio-current-thread"]}
vsock-exporter = { path = "vsock-exporter" }
# Configuration
serde = { version = "1.0.129", features = ["derive"] }
toml = "0.5.8"
[dev-dependencies]
tempfile = "3.1.0"

View File

@@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -127,7 +127,7 @@ vendor:
#TARGET test: run cargo tests
test:
@cargo test --all --target $(TRIPLE)
@cargo test --all --target $(TRIPLE) -- --nocapture
##TARGET check: run test
check: clippy format

View File

@@ -46,6 +46,7 @@ message Route {
string device = 3;
string source = 4;
uint32 scope = 5;
IPFamily family = 6;
}
message ARPNeighbor {

View File

@@ -833,6 +833,20 @@ impl BaseContainer for LinuxContainer {
}
let linux = spec.linux.as_ref().unwrap();
if p.oci.capabilities.is_none() {
// No capabilities, inherit from container process
let process = spec
.process
.as_ref()
.ok_or_else(|| anyhow!("no process config"))?;
p.oci.capabilities = Some(
process
.capabilities
.clone()
.ok_or_else(|| anyhow!("missing process capabilities"))?,
);
}
let (pfd_log, cfd_log) = unistd::pipe().context("failed to create pipe")?;
let _ = fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))

View File

@@ -0,0 +1,21 @@
# This is an agent configuration file example.
dev_mode = true
server_addr = 'vsock://8:2048'
[endpoints]
# All endpoints are allowed
allowed = [ "CreateContainer", "StartContainer", "RemoveContainer",
"ExecProcess", "SignalProcess", "WaitProcess",
"UpdateContainer", "StatsContainer", "PauseContainer", "ResumeContainer",
"WriteStdin", "ReadStdout", "ReadStderr", "CloseStdin", "TtyWinResize",
"UpdateInterface", "UpdateRoutes", "ListInterfaces", "ListRoutes", "AddARPNeighbors",
"StartTracing", "StopTracing", "GetMetrics",
"CreateSandbox", "DestroySandbox",
"OnlineCPUMem",
"ReseedRandomDev",
"GetGuestDetails",
"MemHotplugByProbe",
"SetGuestDateTime",
"CopyFile",
"GetOOMEvent",
"AddSwap"]

View File

@@ -4,8 +4,11 @@
//
use crate::tracer;
use anyhow::{bail, ensure, Context, Result};
use serde::Deserialize;
use std::collections::HashSet;
use std::env;
use std::fs;
use std::str::FromStr;
use std::time;
use tracing::instrument;
@@ -19,6 +22,7 @@ const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
const LOG_VPORT_OPTION: &str = "agent.log_vport";
const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size";
const UNIFIED_CGROUP_HIERARCHY_OPTION: &str = "agent.unified_cgroup_hierarchy";
const CONFIG_FILE: &str = "agent.config_file";
const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
@@ -47,6 +51,17 @@ const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container p
const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
#[derive(Debug, Default, Deserialize)]
pub struct EndpointsConfig {
pub allowed: Vec<String>,
}
#[derive(Debug, Default)]
pub struct AgentEndpoints {
pub allowed: HashSet<String>,
pub all_allowed: bool,
}
#[derive(Debug)]
pub struct AgentConfig {
pub debug_console: bool,
@@ -59,6 +74,36 @@ pub struct AgentConfig {
pub server_addr: String,
pub unified_cgroup_hierarchy: bool,
pub tracing: tracer::TraceType,
pub endpoints: AgentEndpoints,
}
#[derive(Debug, Deserialize)]
pub struct AgentConfigBuilder {
pub debug_console: Option<bool>,
pub dev_mode: Option<bool>,
pub log_level: Option<String>,
pub hotplug_timeout: Option<time::Duration>,
pub debug_console_vport: Option<i32>,
pub log_vport: Option<i32>,
pub container_pipe_size: Option<i32>,
pub server_addr: Option<String>,
pub unified_cgroup_hierarchy: Option<bool>,
pub tracing: Option<tracer::TraceType>,
pub endpoints: Option<EndpointsConfig>,
}
macro_rules! config_override {
($builder:ident, $config:ident, $field:ident) => {
if let Some(v) = $builder.$field {
$config.$field = v;
}
};
($builder:ident, $config:ident, $field:ident, $func: ident) => {
if let Some(v) = $builder.$field {
$config.$field = $func(&v)?;
}
};
}
// parse_cmdline_param parse commandline parameters.
@@ -91,8 +136,8 @@ macro_rules! parse_cmdline_param {
};
}
impl AgentConfig {
pub fn new() -> AgentConfig {
impl Default for AgentConfig {
fn default() -> Self {
AgentConfig {
debug_console: false,
dev_mode: false,
@@ -104,33 +149,82 @@ impl AgentConfig {
server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
endpoints: Default::default(),
}
}
}
impl FromStr for AgentConfig {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let agent_config_builder: AgentConfigBuilder =
toml::from_str(s).map_err(anyhow::Error::new)?;
let mut agent_config: AgentConfig = Default::default();
// Overwrite default values with the configuration files ones.
config_override!(agent_config_builder, agent_config, debug_console);
config_override!(agent_config_builder, agent_config, dev_mode);
config_override!(
agent_config_builder,
agent_config,
log_level,
logrus_to_slog_level
);
config_override!(agent_config_builder, agent_config, hotplug_timeout);
config_override!(agent_config_builder, agent_config, debug_console_vport);
config_override!(agent_config_builder, agent_config, log_vport);
config_override!(agent_config_builder, agent_config, container_pipe_size);
config_override!(agent_config_builder, agent_config, server_addr);
config_override!(agent_config_builder, agent_config, unified_cgroup_hierarchy);
config_override!(agent_config_builder, agent_config, tracing);
// Populate the allowed endpoints hash set, if we got any from the config file.
if let Some(endpoints) = agent_config_builder.endpoints {
for ep in endpoints.allowed {
agent_config.endpoints.allowed.insert(ep);
}
}
Ok(agent_config)
}
}
impl AgentConfig {
#[instrument]
pub fn parse_cmdline(&mut self, file: &str) -> Result<()> {
pub fn from_cmdline(file: &str) -> Result<AgentConfig> {
let mut config: AgentConfig = Default::default();
let cmdline = fs::read_to_string(file)?;
let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
for param in params.iter() {
// If we get a configuration file path from the command line, we
// generate our config from it.
// The agent will fail to start if the configuration file is not present,
// or if it can't be parsed properly.
if param.starts_with(format!("{}=", CONFIG_FILE).as_str()) {
let config_file = get_string_value(param)?;
return AgentConfig::from_config_file(&config_file);
}
// parse cmdline flags
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, config.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, config.dev_mode);
// Support "bare" tracing option for backwards compatibility with
// Kata 1.x.
if param == &TRACE_MODE_OPTION {
self.tracing = tracer::TraceType::Isolated;
config.tracing = tracer::TraceType::Isolated;
continue;
}
parse_cmdline_param!(param, TRACE_MODE_OPTION, self.tracing, get_trace_type);
parse_cmdline_param!(param, TRACE_MODE_OPTION, config.tracing, get_trace_type);
// parse cmdline options
parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
parse_cmdline_param!(param, LOG_LEVEL_OPTION, config.log_level, get_log_level);
parse_cmdline_param!(
param,
SERVER_ADDR_OPTION,
self.server_addr,
config.server_addr,
get_string_value
);
@@ -138,7 +232,7 @@ impl AgentConfig {
parse_cmdline_param!(
param,
HOTPLUG_TIMOUT_OPTION,
self.hotplug_timeout,
config.hotplug_timeout,
get_hotplug_timeout,
|hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
);
@@ -147,14 +241,14 @@ impl AgentConfig {
parse_cmdline_param!(
param,
DEBUG_CONSOLE_VPORT_OPTION,
self.debug_console_vport,
config.debug_console_vport,
get_vsock_port,
|port| port > 0
);
parse_cmdline_param!(
param,
LOG_VPORT_OPTION,
self.log_vport,
config.log_vport,
get_vsock_port,
|port| port > 0
);
@@ -162,34 +256,47 @@ impl AgentConfig {
parse_cmdline_param!(
param,
CONTAINER_PIPE_SIZE_OPTION,
self.container_pipe_size,
config.container_pipe_size,
get_container_pipe_size
);
parse_cmdline_param!(
param,
UNIFIED_CGROUP_HIERARCHY_OPTION,
self.unified_cgroup_hierarchy,
config.unified_cgroup_hierarchy,
get_bool_value
);
}
if let Ok(addr) = env::var(SERVER_ADDR_ENV_VAR) {
self.server_addr = addr;
config.server_addr = addr;
}
if let Ok(addr) = env::var(LOG_LEVEL_ENV_VAR) {
if let Ok(level) = logrus_to_slog_level(&addr) {
self.log_level = level;
config.log_level = level;
}
}
if let Ok(value) = env::var(TRACE_TYPE_ENV_VAR) {
if let Ok(result) = value.parse::<tracer::TraceType>() {
self.tracing = result;
config.tracing = result;
}
}
Ok(())
// We did not get a configuration file: allow all endpoints.
config.endpoints.all_allowed = true;
Ok(config)
}
#[instrument]
pub fn from_config_file(file: &str) -> Result<AgentConfig> {
let config = fs::read_to_string(file)?;
AgentConfig::from_str(&config)
}
pub fn is_allowed_endpoint(&self, ep: &str) -> bool {
self.endpoints.all_allowed || self.endpoints.allowed.contains(ep)
}
}
@@ -371,7 +478,7 @@ mod tests {
#[test]
fn test_new() {
let config = AgentConfig::new();
let config: AgentConfig = Default::default();
assert!(!config.debug_console);
assert!(!config.dev_mode);
assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -379,7 +486,7 @@ mod tests {
}
#[test]
fn test_parse_cmdline() {
fn test_from_cmdline() {
const TEST_SERVER_ADDR: &str = "vsock://-1:1024";
#[derive(Debug)]
@@ -716,15 +823,6 @@ mod tests {
let dir = tempdir().expect("failed to create tmpdir");
// First, check a missing file is handled
let file_path = dir.path().join("enoent");
let filename = file_path.to_str().expect("failed to create filename");
let mut config = AgentConfig::new();
let result = config.parse_cmdline(&filename.to_owned());
assert!(result.is_err());
// Now, test various combinations of file contents and environment
// variables.
for (i, d) in tests.iter().enumerate() {
@@ -753,22 +851,7 @@ mod tests {
vars_to_unset.push(name);
}
let mut config = AgentConfig::new();
assert!(!config.debug_console, "{}", msg);
assert!(!config.dev_mode, "{}", msg);
assert!(!config.unified_cgroup_hierarchy, "{}", msg);
assert_eq!(
config.hotplug_timeout,
time::Duration::from_secs(3),
"{}",
msg
);
assert_eq!(config.container_pipe_size, 0, "{}", msg);
assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
assert_eq!(config.tracing, tracer::TraceType::Disabled, "{}", msg);
let result = config.parse_cmdline(filename);
assert!(result.is_ok(), "{}", msg);
let config = AgentConfig::from_cmdline(filename).expect("Failed to parse command line");
assert_eq!(d.debug_console, config.debug_console, "{}", msg);
assert_eq!(d.dev_mode, config.dev_mode, "{}", msg);
@@ -1276,4 +1359,35 @@ Caused by:
assert_result!(d.result, result, msg);
}
}
#[test]
fn test_config_builder_from_string() {
let config = AgentConfig::from_str(
r#"
dev_mode = true
server_addr = 'vsock://8:2048'
[endpoints]
allowed = ["CreateContainer", "StartContainer"]
"#,
)
.unwrap();
// Verify that the all_allowed flag is false
assert!(!config.endpoints.all_allowed);
// Verify that the override worked
assert!(config.dev_mode);
assert_eq!(config.server_addr, "vsock://8:2048");
assert_eq!(
config.endpoints.allowed,
vec!["CreateContainer".to_string(), "StartContainer".to_string()]
.iter()
.cloned()
.collect()
);
// Verify that the default values are valid
assert_eq!(config.hotplug_timeout, DEFAULT_HOTPLUG_TIMEOUT);
}
}

View File

@@ -17,10 +17,6 @@ use tokio::sync::Mutex;
#[cfg(target_arch = "s390x")]
use crate::ccw;
use crate::linux_abi::*;
use crate::mount::{
DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE,
DRIVER_SCSI_TYPE,
};
use crate::pci;
use crate::sandbox::Sandbox;
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
@@ -38,6 +34,19 @@ macro_rules! sl {
const VM_ROOTFS: &str = "/";
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
// VFIO device to be bound to a guest kernel driver
pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
#[derive(Debug)]
struct DevIndexEntry {
idx: usize,
@@ -47,11 +56,6 @@ struct DevIndexEntry {
#[derive(Debug)]
struct DevIndex(HashMap<String, DevIndexEntry>);
#[instrument]
pub fn rescan_pci_bus() -> Result<()> {
online_device(SYSFS_PCI_BUS_RESCAN_FILE)
}
#[instrument]
pub fn online_device(path: &str) -> Result<()> {
fs::write(path, "1")?;
@@ -67,7 +71,7 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<Str
let mut relpath = String::new();
for i in 0..pcipath.len() {
let bdf = format!("{}:{}.0", bus, pcipath[i]);
let bdf = format!("{}:{}", bus, pcipath[i]);
relpath = format!("{}/{}", relpath, bdf);
@@ -162,8 +166,6 @@ pub async fn get_virtio_blk_pci_device_name(
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);
rescan_pci_bus()?;
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}
@@ -255,6 +257,45 @@ pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str)
Ok(())
}
#[derive(Debug)]
struct PciMatcher {
devpath: String,
}
impl PciMatcher {
fn new(relpath: &str) -> Result<PciMatcher> {
let root_bus = create_pci_root_bus_path();
Ok(PciMatcher {
devpath: format!("{}{}", root_bus, relpath),
})
}
}
impl UeventMatcher for PciMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.devpath == self.devpath
}
}
pub async fn wait_for_pci_device(
sandbox: &Arc<Mutex<Sandbox>>,
pcipath: &pci::Path,
) -> Result<pci::Address> {
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = PciMatcher::new(&sysfs_rel_path)?;
let uev = wait_for_uevent(sandbox, matcher).await?;
let addr = uev
.devpath
.rsplit('/')
.next()
.ok_or_else(|| anyhow!("Bad device path {:?} in uevent", &uev.devpath))?;
let addr = pci::Address::from_str(addr)?;
Ok(addr)
}
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
#[instrument]
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
@@ -451,6 +492,37 @@ async fn virtio_nvdimm_device_handler(
update_spec_device_list(device, spec, devidx)
}
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
let mut tokens = opt.split('=');
let hostbdf = tokens.next()?;
let path = tokens.next()?;
if tokens.next().is_some() {
None
} else {
Some((hostbdf, path))
}
}
// device.options should have one entry for each PCI device in the VFIO group
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
// DDDD:BB:DD.F is the device's PCI address in the host
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
async fn vfio_gk_device_handler(
device: &Device,
_: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
_: &DevIndex,
) -> Result<()> {
for opt in device.options.iter() {
let (_, pcipath) =
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
let pcipath = pci::Path::from_str(pcipath)?;
wait_for_pci_device(sandbox, &pcipath).await?;
}
Ok(())
}
impl DevIndex {
fn new(spec: &Spec) -> DevIndex {
let mut map = HashMap::new();
@@ -520,6 +592,7 @@ async fn add_device(
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
DRIVER_VFIO_GK_TYPE => vfio_gk_device_handler(device, spec, sandbox, devidx).await,
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
}
}
@@ -1068,4 +1141,14 @@ mod tests {
assert!(!matcher_b.is_match(&uev_a));
assert!(!matcher_a.is_match(&uev_b));
}
#[test]
fn test_split_vfio_option() {
assert_eq!(
split_vfio_option("0000:01:00.0=02/01"),
Some(("0000:01:00.0", "02/01"))
);
assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None);
assert_eq!(split_vfio_option("0000:01:00.0"), None);
}
}

View File

@@ -9,7 +9,6 @@
use std::fs;
pub const SYSFS_DIR: &str = "/sys";
pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
#[cfg(any(
target_arch = "powerpc64",
target_arch = "s390x",

View File

@@ -77,11 +77,11 @@ mod rpc;
mod tracer;
const NAME: &str = "kata-agent";
const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
lazy_static! {
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
Arc::new(RwLock::new(config::AgentConfig::new()));
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> = Arc::new(RwLock::new(
AgentConfig::from_cmdline("/proc/cmdline").unwrap()
));
}
#[instrument]
@@ -134,15 +134,11 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
console::initialize();
lazy_static::initialize(&AGENT_CONFIG);
// support vsock log
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let (shutdown_tx, shutdown_rx) = channel(true);
let agent_config = AGENT_CONFIG.clone();
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
// dup a new file descriptor for this temporary logger writer,
@@ -163,20 +159,15 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
e
})?;
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
lazy_static::initialize(&AGENT_CONFIG);
init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
init_agent_as_init(&logger, AGENT_CONFIG.read().await.unified_cgroup_hierarchy)?;
drop(logger_async_guard);
} else {
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
lazy_static::initialize(&AGENT_CONFIG);
}
let config = agent_config.read().await;
let config = AGENT_CONFIG.read().await;
let log_vport = config.log_vport as u32;
let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
@@ -209,12 +200,12 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let _ = tracer::setup_tracing(NAME, &logger, &config)?;
}
let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
let root_span = span!(tracing::Level::TRACE, "root-span");
// XXX: Start the root trace transaction.
//
// XXX: Note that *ALL* spans needs to start after this point!!
let _enter = root.enter();
let span_guard = root_span.enter();
// Start the sandbox and wait for its ttRPC server to end
start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
@@ -244,6 +235,10 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
}
// force flushing spans
drop(span_guard);
drop(root_span);
if config.tracing != tracer::TraceType::Disabled {
tracer::end_tracing();
}

View File

@@ -4,28 +4,27 @@
//
use std::collections::HashMap;
use std::ffi::CString;
use std::fs;
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader};
use std::iter;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::Path;
use std::ptr::null;
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::Mutex;
use libc::{c_void, mount};
use nix::mount::{self, MsFlags};
use nix::mount::MsFlags;
use nix::unistd::Gid;
use regex::Regex;
use crate::device::{
get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
DRIVER_9P_TYPE, DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE,
DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE,
DRIVER_WATCHABLE_BIND_TYPE,
};
use crate::linux_abi::*;
use crate::pci;
@@ -37,17 +36,6 @@ use anyhow::{anyhow, Context, Result};
use slog::Logger;
use tracing::instrument;
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
pub const TYPE_ROOTFS: &str = "rootfs";
pub const MOUNT_GUEST_TAG: &str = "kataShared";
@@ -149,96 +137,53 @@ pub const STORAGE_HANDLER_LIST: &[&str] = &[
DRIVER_WATCHABLE_BIND_TYPE,
];
#[derive(Debug, Clone)]
pub struct BareMount<'a> {
source: &'a str,
destination: &'a str,
fs_type: &'a str,
#[instrument]
pub fn baremount(
source: &str,
destination: &str,
fs_type: &str,
flags: MsFlags,
options: &'a str,
logger: Logger,
}
options: &str,
logger: &Logger,
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "baremount"));
// mount mounts a source in to a destination. This will do some bookkeeping:
// * evaluate all symlinks
// * ensure the source exists
impl<'a> BareMount<'a> {
#[instrument]
pub fn new(
s: &'a str,
d: &'a str,
fs_type: &'a str,
flags: MsFlags,
options: &'a str,
logger: &Logger,
) -> Self {
BareMount {
source: s,
destination: d,
fs_type,
flags,
options,
logger: logger.new(o!("subsystem" => "baremount")),
}
if source.is_empty() {
return Err(anyhow!("need mount source"));
}
#[instrument]
pub fn mount(&self) -> Result<()> {
let source;
let dest;
let fs_type;
let mut options = null();
let cstr_options: CString;
let cstr_source: CString;
let cstr_dest: CString;
let cstr_fs_type: CString;
if self.source.is_empty() {
return Err(anyhow!("need mount source"));
}
if self.destination.is_empty() {
return Err(anyhow!("need mount destination"));
}
cstr_source = CString::new(self.source)?;
source = cstr_source.as_ptr();
cstr_dest = CString::new(self.destination)?;
dest = cstr_dest.as_ptr();
if self.fs_type.is_empty() {
return Err(anyhow!("need mount FS type"));
}
cstr_fs_type = CString::new(self.fs_type)?;
fs_type = cstr_fs_type.as_ptr();
if !self.options.is_empty() {
cstr_options = CString::new(self.options)?;
options = cstr_options.as_ptr() as *const c_void;
}
info!(
self.logger,
"mount source={:?}, dest={:?}, fs_type={:?}, options={:?}",
self.source,
self.destination,
self.fs_type,
self.options
);
let rc = unsafe { mount(source, dest, fs_type, self.flags.bits(), options) };
if rc < 0 {
return Err(anyhow!(
"failed to mount {:?} to {:?}, with error: {}",
self.source,
self.destination,
io::Error::last_os_error()
));
}
Ok(())
if destination.is_empty() {
return Err(anyhow!("need mount destination"));
}
if fs_type.is_empty() {
return Err(anyhow!("need mount FS type"));
}
info!(
logger,
"mount source={:?}, dest={:?}, fs_type={:?}, options={:?}",
source,
destination,
fs_type,
options
);
nix::mount::mount(
Some(source),
destination,
Some(fs_type),
flags,
Some(options),
)
.map_err(|e| {
anyhow!(
"failed to mount {:?} to {:?}, with error: {}",
source,
destination,
e
)
})
}
#[instrument]
@@ -486,17 +431,14 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
return Ok(());
}
match storage.fstype.as_str() {
DRIVER_9P_TYPE | DRIVER_VIRTIOFS_TYPE => {
let dest_path = Path::new(storage.mount_point.as_str());
if !dest_path.exists() {
fs::create_dir_all(dest_path).context("Create mount destination failed")?;
}
}
_ => {
ensure_destination_exists(storage.mount_point.as_str(), storage.fstype.as_str())?;
}
let mount_path = Path::new(&storage.mount_point);
let src_path = Path::new(&storage.source);
if storage.fstype == "bind" && !src_path.is_dir() {
ensure_destination_file_exists(mount_path)
} else {
fs::create_dir_all(mount_path).map_err(anyhow::Error::from)
}
.context("Could not create mountpoint")?;
let options_vec = storage.options.to_vec();
let options_vec = options_vec.iter().map(String::as_str).collect();
@@ -509,16 +451,14 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
"mount-options" => options.as_str(),
);
let bare_mount = BareMount::new(
baremount(
storage.source.as_str(),
storage.mount_point.as_str(),
storage.fstype.as_str(),
flags,
options.as_str(),
&logger,
);
bare_mount.mount()
)
}
/// Looks for `mount_point` entry in the /proc/mounts.
@@ -637,11 +577,9 @@ fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
let (flags, options) = parse_mount_flags_and_options(options_vec);
let bare_mount = BareMount::new(m.src, m.dest, m.fstype, flags, options.as_str(), logger);
fs::create_dir_all(Path::new(m.dest)).context("could not create directory")?;
bare_mount.mount().or_else(|e| {
baremount(m.src, m.dest, m.fstype, flags, &options, logger).or_else(|e| {
if m.src != "dev" {
return Err(e);
}
@@ -816,32 +754,27 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
#[instrument]
pub fn remove_mounts(mounts: &[String]) -> Result<()> {
for m in mounts.iter() {
mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
nix::mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
}
Ok(())
}
// ensure_destination_exists will recursively create a given mountpoint. If directories
// are created, their permissions are initialized to mountPerm(0755)
#[instrument]
fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
let d = Path::new(destination);
if d.exists() {
fn ensure_destination_file_exists(path: &Path) -> Result<()> {
if path.is_file() {
return Ok(());
}
let dir = d
.parent()
.ok_or_else(|| anyhow!("mount destination {} doesn't exist", destination))?;
if !dir.exists() {
fs::create_dir_all(dir).context(format!("create dir all {:?}", dir))?;
} else if path.exists() {
return Err(anyhow!("{:?} exists but is not a regular file", path));
}
if fs_type != "bind" || d.is_dir() {
fs::create_dir_all(d).context(format!("create dir all {:?}", d))?;
} else {
fs::File::create(d).context(format!("create file {:?}", d))?;
}
// The only way parent() can return None is if the path is /,
// which always exists, so the test above will already have caught
// it, thus the unwrap() is safe
let dir = path.parent().unwrap();
fs::create_dir_all(dir).context(format!("create_dir_all {:?}", dir))?;
fs::File::create(path).context(format!("create empty file {:?}", path))?;
Ok(())
}
@@ -865,8 +798,6 @@ fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
mod tests {
use super::*;
use crate::{skip_if_not_root, skip_loop_if_not_root, skip_loop_if_root};
use libc::umount;
use std::fs::metadata;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Write;
@@ -1006,7 +937,7 @@ mod tests {
std::fs::create_dir_all(d).expect("failed to created directory");
}
let bare_mount = BareMount::new(
let result = baremount(
&src_filename,
&dest_filename,
d.fs_type,
@@ -1015,25 +946,13 @@ mod tests {
&logger,
);
let result = bare_mount.mount();
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
// Cleanup
unsafe {
let cstr_dest =
CString::new(dest_filename).expect("failed to convert dest to cstring");
let umount_dest = cstr_dest.as_ptr();
let ret = umount(umount_dest);
let msg = format!("{}: umount result: {:?}", msg, result);
assert!(ret == 0, "{}", msg);
};
nix::mount::umount(dest_filename.as_str()).unwrap();
continue;
}
@@ -1103,7 +1022,7 @@ mod tests {
}
// Create an actual mount
let bare_mount = BareMount::new(
let result = baremount(
mnt_src_filename,
mnt_dest_filename,
"bind",
@@ -1111,8 +1030,6 @@ mod tests {
"",
&logger,
);
let result = bare_mount.mount();
assert!(result.is_ok(), "mount for test setup failed");
let tests = &[
@@ -1444,37 +1361,20 @@ mod tests {
}
#[test]
fn test_ensure_destination_exists() {
fn test_ensure_destination_file_exists() {
let dir = tempdir().expect("failed to create tmpdir");
let mut testfile = dir.into_path();
testfile.push("testfile");
let result = ensure_destination_exists(testfile.to_str().unwrap(), "bind");
let result = ensure_destination_file_exists(&testfile);
assert!(result.is_ok());
assert!(testfile.exists());
let result = ensure_destination_exists(testfile.to_str().unwrap(), "bind");
let result = ensure_destination_file_exists(&testfile);
assert!(result.is_ok());
let meta = metadata(testfile).unwrap();
assert!(meta.is_file());
let dir = tempdir().expect("failed to create tmpdir");
let mut testdir = dir.into_path();
testdir.push("testdir");
let result = ensure_destination_exists(testdir.to_str().unwrap(), "ext4");
assert!(result.is_ok());
assert!(testdir.exists());
let result = ensure_destination_exists(testdir.to_str().unwrap(), "ext4");
assert!(result.is_ok());
//let meta = metadata(testdir.to_str().unwrap()).unwrap();
let meta = metadata(testdir).unwrap();
assert!(meta.is_dir());
assert!(testfile.is_file());
}
}

View File

@@ -13,7 +13,7 @@ use std::fs::File;
use std::path::{Path, PathBuf};
use tracing::instrument;
use crate::mount::{BareMount, FLAGS};
use crate::mount::{baremount, FLAGS};
use slog::Logger;
const PERSISTENT_NS_DIR: &str = "/var/run/sandbox-ns";
@@ -129,8 +129,7 @@ impl Namespace {
}
};
let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
bare_mount.mount().map_err(|e| {
baremount(source, destination, "none", flags, "", &logger).map_err(|e| {
anyhow!(
"Failed to mount {} to {} with err:{:?}",
source,

View File

@@ -312,7 +312,6 @@ impl Handle {
for route in list {
let link = self.find_link(LinkFilter::Name(&route.device)).await?;
let is_v6 = is_ipv6(route.get_gateway()) || is_ipv6(route.get_dest());
const MAIN_TABLE: u8 = packet::constants::RT_TABLE_MAIN;
const UNICAST: u8 = packet::constants::RTN_UNICAST;
@@ -334,7 +333,7 @@ impl Handle {
// `rtnetlink` offers a separate request builders for different IP versions (IP v4 and v6).
// This if branch is a bit clumsy because it does almost the same.
if is_v6 {
if route.get_family() == IPFamily::v6 {
let dest_addr = if !route.dest.is_empty() {
Ipv6Network::from_str(&route.dest)?
} else {
@@ -594,10 +593,6 @@ fn format_address(data: &[u8]) -> Result<String> {
}
}
fn is_ipv6(str: &str) -> bool {
Ipv6Addr::from_str(str).is_ok()
}
fn parse_mac_address(addr: &str) -> Result<[u8; 6]> {
let mut split = addr.splitn(6, ':');
@@ -932,16 +927,6 @@ mod tests {
assert_eq!(bytes, [0xAB, 0x0C, 0xDE, 0x12, 0x34, 0x56]);
}
#[test]
fn check_ipv6() {
assert!(is_ipv6("::1"));
assert!(is_ipv6("2001:0:3238:DFE1:63::FEFB"));
assert!(!is_ipv6(""));
assert!(!is_ipv6("127.0.0.1"));
assert!(!is_ipv6("10.10.10.10"));
}
fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
// ip link delete dummy
Command::new("ip")

View File

@@ -9,51 +9,143 @@ use std::str::FromStr;
use anyhow::anyhow;
// The PCI spec reserves 5 bits for slot number (a.k.a. device
// number), giving slots 0..31
// The PCI spec reserves 5 bits (0..31) for slot number (a.k.a. device
// number)
const SLOT_BITS: u8 = 5;
const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
// Represents a PCI function's slot number (a.k.a. device number),
// giving its location on a single bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Slot(u8);
// The PCI spec reserves 3 bits (0..7) for function number
const FUNCTION_BITS: u8 = 3;
const FUNCTION_MAX: u8 = (1 << FUNCTION_BITS) - 1;
impl Slot {
pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
if let Ok(v8) = v.try_into() {
if v8 <= SLOT_MAX {
return Ok(Slot(v8));
// Represents a PCI function's slot (a.k.a. device) and function
// numbers, giving its location on a single logical bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SlotFn(u8);
impl SlotFn {
pub fn new<T, U>(ss: T, f: U) -> anyhow::Result<Self>
where
T: TryInto<u8> + fmt::Display + Copy,
U: TryInto<u8> + fmt::Display + Copy,
{
let ss8 = match ss.try_into() {
Ok(ss8) if ss8 <= SLOT_MAX => ss8,
_ => {
return Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
ss,
SLOT_MAX
));
}
}
Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
v,
SLOT_MAX
))
};
let f8 = match f.try_into() {
Ok(f8) if f8 <= FUNCTION_MAX => f8,
_ => {
return Err(anyhow!(
"PCI function {} should be in range [0..{:#x}]",
f,
FUNCTION_MAX
));
}
};
Ok(SlotFn(ss8 << FUNCTION_BITS | f8))
}
pub fn slot(self) -> u8 {
self.0 >> FUNCTION_BITS
}
pub fn function(self) -> u8 {
self.0 & FUNCTION_MAX
}
}
impl FromStr for Slot {
impl FromStr for SlotFn {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let v = isize::from_str_radix(s, 16)?;
Slot::new(v)
let mut tokens = s.split('.').fuse();
let slot = tokens.next();
let func = tokens.next();
if slot.is_none() || tokens.next().is_some() {
return Err(anyhow!(
"PCI slot/function {} should have the format SS.F",
s
));
}
let slot = isize::from_str_radix(slot.unwrap(), 16)?;
let func = match func {
Some(func) => isize::from_str_radix(func, 16)?,
None => 0,
};
SlotFn::new(slot, func)
}
}
impl fmt::Display for Slot {
impl fmt::Display for SlotFn {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:02x}", self.0)
write!(f, "{:02x}.{:01x}", self.slot(), self.function())
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Address {
domain: u16,
bus: u8,
slotfn: SlotFn,
}
impl Address {
pub fn new(domain: u16, bus: u8, slotfn: SlotFn) -> Self {
Address {
domain,
bus,
slotfn,
}
}
}
impl FromStr for Address {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let mut tokens = s.split(':').fuse();
let domain = tokens.next();
let bus = tokens.next();
let slotfn = tokens.next();
if domain.is_none() || bus.is_none() || slotfn.is_none() || tokens.next().is_some() {
return Err(anyhow!(
"PCI address {} should have the format DDDD:BB:SS.F",
s
));
}
let domain = u16::from_str_radix(domain.unwrap(), 16)?;
let bus = u8::from_str_radix(bus.unwrap(), 16)?;
let slotfn = SlotFn::from_str(slotfn.unwrap())?;
Ok(Address::new(domain, bus, slotfn))
}
}
impl fmt::Display for Address {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:04x}:{:02x}:{}", self.domain, self.bus, self.slotfn)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path(Vec<Slot>);
pub struct Path(Vec<SlotFn>);
impl Path {
pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
pub fn new(slots: Vec<SlotFn>) -> anyhow::Result<Self> {
if slots.is_empty() {
return Err(anyhow!("PCI path must have at least one element"));
}
@@ -63,7 +155,7 @@ impl Path {
// Let Path be treated as a slice of Slots
impl Deref for Path {
type Target = [Slot];
type Target = [SlotFn];
fn deref(&self) -> &Self::Target {
&self.0
@@ -85,83 +177,170 @@ impl FromStr for Path {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
let rslots: anyhow::Result<Vec<SlotFn>> = s.split('/').map(SlotFn::from_str).collect();
Path::new(rslots?)
}
}
#[cfg(test)]
mod tests {
use crate::pci::{Path, Slot};
use super::*;
use std::str::FromStr;
#[test]
fn test_slot() {
fn test_slotfn() {
// Valid slots
let slot = Slot::new(0x00).unwrap();
assert_eq!(format!("{}", slot), "00");
let sf = SlotFn::new(0x00, 0x0).unwrap();
assert_eq!(format!("{}", sf), "00.0");
let slot = Slot::from_str("00").unwrap();
assert_eq!(format!("{}", slot), "00");
let sf = SlotFn::from_str("00.0").unwrap();
assert_eq!(format!("{}", sf), "00.0");
let slot = Slot::new(31).unwrap();
let slot2 = Slot::from_str("1f").unwrap();
assert_eq!(slot, slot2);
let sf = SlotFn::from_str("00").unwrap();
assert_eq!(format!("{}", sf), "00.0");
let sf = SlotFn::new(31, 7).unwrap();
let sf2 = SlotFn::from_str("1f.7").unwrap();
assert_eq!(sf, sf2);
// Bad slots
let slot = Slot::new(-1);
assert!(slot.is_err());
let sf = SlotFn::new(-1, 0);
assert!(sf.is_err());
let slot = Slot::new(32);
assert!(slot.is_err());
let sf = SlotFn::new(32, 0);
assert!(sf.is_err());
let slot = Slot::from_str("20");
assert!(slot.is_err());
let sf = SlotFn::from_str("20.0");
assert!(sf.is_err());
let slot = Slot::from_str("xy");
assert!(slot.is_err());
let sf = SlotFn::from_str("20");
assert!(sf.is_err());
let slot = Slot::from_str("00/");
assert!(slot.is_err());
let sf = SlotFn::from_str("xy.0");
assert!(sf.is_err());
let slot = Slot::from_str("");
assert!(slot.is_err());
let sf = SlotFn::from_str("xy");
assert!(sf.is_err());
// Bad functions
let sf = SlotFn::new(0, -1);
assert!(sf.is_err());
let sf = SlotFn::new(0, 8);
assert!(sf.is_err());
let sf = SlotFn::from_str("00.8");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.x");
assert!(sf.is_err());
// Bad formats
let sf = SlotFn::from_str("");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.0.0");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.0/");
assert!(sf.is_err());
let sf = SlotFn::from_str("00/");
assert!(sf.is_err());
}
#[test]
fn test_address() {
// Valid addresses
let sf0_0 = SlotFn::new(0, 0).unwrap();
let sf1f_7 = SlotFn::new(0x1f, 7).unwrap();
let addr = Address::new(0, 0, sf0_0);
assert_eq!(format!("{}", addr), "0000:00:00.0");
let addr2 = Address::from_str("0000:00:00.0").unwrap();
assert_eq!(addr, addr2);
let addr = Address::new(0xffff, 0xff, sf1f_7);
assert_eq!(format!("{}", addr), "ffff:ff:1f.7");
let addr2 = Address::from_str("ffff:ff:1f.7").unwrap();
assert_eq!(addr, addr2);
// Bad addresses
let addr = Address::from_str("10000:00:00.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:100:00.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:20.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:00.8");
assert!(addr.is_err());
let addr = Address::from_str("xyz");
assert!(addr.is_err());
let addr = Address::from_str("xyxy:xy:xy.z");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:00.0:00");
assert!(addr.is_err());
}
#[test]
fn test_path() {
let slot3 = Slot::new(0x03).unwrap();
let slot4 = Slot::new(0x04).unwrap();
let slot5 = Slot::new(0x05).unwrap();
let sf3_0 = SlotFn::new(0x03, 0).unwrap();
let sf4_0 = SlotFn::new(0x04, 0).unwrap();
let sf5_0 = SlotFn::new(0x05, 0).unwrap();
let sfa_5 = SlotFn::new(0x0a, 5).unwrap();
let sfb_6 = SlotFn::new(0x0b, 6).unwrap();
let sfc_7 = SlotFn::new(0x0c, 7).unwrap();
// Valid paths
let pcipath = Path::new(vec![slot3]).unwrap();
assert_eq!(format!("{}", pcipath), "03");
let pcipath = Path::new(vec![sf3_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0");
let pcipath2 = Path::from_str("03.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 1);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[0], sf3_0);
let pcipath = Path::new(vec![slot3, slot4]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04");
let pcipath = Path::new(vec![sf3_0, sf4_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0/04.0");
let pcipath2 = Path::from_str("03.0/04.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 2);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[0], sf3_0);
assert_eq!(pcipath[1], sf4_0);
let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04/05");
let pcipath = Path::new(vec![sf3_0, sf4_0, sf5_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0/04.0/05.0");
let pcipath2 = Path::from_str("03.0/04.0/05.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04/05").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[2], slot5);
assert_eq!(pcipath[0], sf3_0);
assert_eq!(pcipath[1], sf4_0);
assert_eq!(pcipath[2], sf5_0);
let pcipath = Path::new(vec![sfa_5, sfb_6, sfc_7]).unwrap();
assert_eq!(format!("{}", pcipath), "0a.5/0b.6/0c.7");
let pcipath2 = Path::from_str("0a.5/0b.6/0c.7").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], sfa_5);
assert_eq!(pcipath[1], sfb_6);
assert_eq!(pcipath[2], sfc_7);
// Bad paths
assert!(Path::new(vec!()).is_err());
assert!(Path::from_str("20").is_err());
assert!(Path::from_str("00.8").is_err());
assert!(Path::from_str("//").is_err());
assert!(Path::from_str("xyz").is_err());
}

View File

@@ -3,7 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::pci;
use async_trait::async_trait;
use rustjail::{pipestream::PipeStream, process::StreamType};
use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf};
@@ -21,7 +20,7 @@ use ttrpc::{
use anyhow::{anyhow, Context, Result};
use oci::{LinuxNamespace, Root, Spec};
use protobuf::{RepeatedField, SingularPtrField};
use protobuf::{Message, RepeatedField, SingularPtrField};
use protocols::agent::{
AddSwapRequest, AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, Metrics,
OOMEvent, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse,
@@ -44,12 +43,13 @@ use nix::sys::stat;
use nix::unistd::{self, Pid};
use rustjail::process::ProcessOperations;
use crate::device::{add_devices, pcipath_to_sysfs, rescan_pci_bus, update_device_cgroup};
use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_device_cgroup};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::{add_storages, remove_mounts, BareMount, STORAGE_HANDLER_LIST};
use crate::mount::{add_storages, baremount, remove_mounts, STORAGE_HANDLER_LIST};
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
use crate::network::setup_guest_dns;
use crate::pci;
use crate::random;
use crate::sandbox::Sandbox;
use crate::version::{AGENT_VERSION, API_VERSION};
@@ -86,6 +86,21 @@ macro_rules! sl {
};
}
macro_rules! is_allowed {
($req:ident) => {
if !AGENT_CONFIG
.read()
.await
.is_allowed_endpoint($req.descriptor().name())
{
return Err(ttrpc_error(
ttrpc::Code::UNIMPLEMENTED,
format!("{} is blocked", $req.descriptor().name()),
));
}
};
}
#[derive(Clone, Debug)]
pub struct AgentService {
sandbox: Arc<Mutex<Sandbox>>,
@@ -134,10 +149,6 @@ impl AgentService {
info!(sl!(), "receive createcontainer, spec: {:?}", &oci);
// re-scan PCI bus
// looking for hidden devices
rescan_pci_bus().context("Could not rescan PCI bus")?;
// Some devices need some extra processing (the ones invoked with
// --device for instance), and that's what this call is doing. It
// updates the devices listed in the OCI spec, so that they actually
@@ -535,6 +546,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CreateContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "create_container", req);
is_allowed!(req);
match self.do_create_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -547,6 +559,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StartContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "start_container", req);
is_allowed!(req);
match self.do_start_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -559,6 +572,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::RemoveContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "remove_container", req);
is_allowed!(req);
match self.do_remove_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -571,6 +585,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ExecProcessRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "exec_process", req);
is_allowed!(req);
match self.do_exec_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -583,6 +598,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::SignalProcessRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "signal_process", req);
is_allowed!(req);
match self.do_signal_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -595,6 +611,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::WaitProcessRequest,
) -> ttrpc::Result<WaitProcessResponse> {
trace_rpc_call!(ctx, "wait_process", req);
is_allowed!(req);
self.do_wait_process(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -606,6 +623,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "update_container", req);
is_allowed!(req);
let cid = req.container_id.clone();
let res = req.resources;
@@ -641,6 +659,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StatsContainerRequest,
) -> ttrpc::Result<StatsContainerResponse> {
trace_rpc_call!(ctx, "stats_container", req);
is_allowed!(req);
let cid = req.container_id;
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -662,6 +681,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::PauseContainerRequest,
) -> ttrpc::Result<protocols::empty::Empty> {
trace_rpc_call!(ctx, "pause_container", req);
is_allowed!(req);
let cid = req.get_container_id();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -685,6 +705,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ResumeContainerRequest,
) -> ttrpc::Result<protocols::empty::Empty> {
trace_rpc_call!(ctx, "resume_container", req);
is_allowed!(req);
let cid = req.get_container_id();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -707,6 +728,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::WriteStreamRequest,
) -> ttrpc::Result<WriteStreamResponse> {
is_allowed!(req);
self.do_write_stream(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -717,6 +739,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::ReadStreamRequest,
) -> ttrpc::Result<ReadStreamResponse> {
is_allowed!(req);
self.do_read_stream(req, true)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -727,6 +750,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::ReadStreamRequest,
) -> ttrpc::Result<ReadStreamResponse> {
is_allowed!(req);
self.do_read_stream(req, false)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -738,6 +762,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CloseStdinRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "close_stdin", req);
is_allowed!(req);
let cid = req.container_id.clone();
let eid = req.exec_id;
@@ -774,6 +799,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::TtyWinResizeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "tty_win_resize", req);
is_allowed!(req);
let cid = req.container_id.clone();
let eid = req.exec_id.clone();
@@ -814,6 +840,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateInterfaceRequest,
) -> ttrpc::Result<Interface> {
trace_rpc_call!(ctx, "update_interface", req);
is_allowed!(req);
let interface = req.interface.into_option().ok_or_else(|| {
ttrpc_error(
@@ -841,6 +868,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateRoutesRequest,
) -> ttrpc::Result<Routes> {
trace_rpc_call!(ctx, "update_routes", req);
is_allowed!(req);
let new_routes = req
.routes
@@ -881,6 +909,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ListInterfacesRequest,
) -> ttrpc::Result<Interfaces> {
trace_rpc_call!(ctx, "list_interfaces", req);
is_allowed!(req);
let list = self
.sandbox
@@ -908,6 +937,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ListRoutesRequest,
) -> ttrpc::Result<Routes> {
trace_rpc_call!(ctx, "list_routes", req);
is_allowed!(req);
let list = self
.sandbox
@@ -930,14 +960,16 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StartTracingRequest,
) -> ttrpc::Result<Empty> {
info!(sl!(), "start_tracing {:?}", req);
is_allowed!(req);
Ok(Empty::new())
}
async fn stop_tracing(
&self,
_ctx: &TtrpcContext,
_req: protocols::agent::StopTracingRequest,
req: protocols::agent::StopTracingRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
Ok(Empty::new())
}
@@ -947,6 +979,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CreateSandboxRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "create_sandbox", req);
is_allowed!(req);
{
let sandbox = self.sandbox.clone();
@@ -1012,6 +1045,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::DestroySandboxRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "destroy_sandbox", req);
is_allowed!(req);
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -1033,6 +1067,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::AddARPNeighborsRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "add_arp_neighbors", req);
is_allowed!(req);
let neighs = req
.neighbors
@@ -1066,6 +1101,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
ctx: &TtrpcContext,
req: protocols::agent::OnlineCPUMemRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
let s = Arc::clone(&self.sandbox);
let sandbox = s.lock().await;
trace_rpc_call!(ctx, "online_cpu_mem", req);
@@ -1083,6 +1119,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ReseedRandomDevRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "reseed_random_dev", req);
is_allowed!(req);
random::reseed_rng(req.data.as_slice())
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1096,6 +1133,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::GuestDetailsRequest,
) -> ttrpc::Result<GuestDetailsResponse> {
trace_rpc_call!(ctx, "get_guest_details", req);
is_allowed!(req);
info!(sl!(), "get guest details!");
let mut resp = GuestDetailsResponse::new();
@@ -1124,6 +1162,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::MemHotplugByProbeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "mem_hotplug_by_probe", req);
is_allowed!(req);
do_mem_hotplug_by_probe(&req.memHotplugProbeAddr)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1137,6 +1176,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::SetGuestDateTimeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "set_guest_date_time", req);
is_allowed!(req);
do_set_guest_date_time(req.Sec, req.Usec)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1150,6 +1190,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CopyFileRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "copy_file", req);
is_allowed!(req);
do_copy_file(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1162,6 +1203,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::GetMetricsRequest,
) -> ttrpc::Result<Metrics> {
trace_rpc_call!(ctx, "get_metrics", req);
is_allowed!(req);
match get_metrics(&req) {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
@@ -1176,8 +1218,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
async fn get_oom_event(
&self,
_ctx: &TtrpcContext,
_req: protocols::agent::GetOOMEventRequest,
req: protocols::agent::GetOOMEventRequest,
) -> ttrpc::Result<OOMEvent> {
is_allowed!(req);
let sandbox = self.sandbox.clone();
let s = sandbox.lock().await;
let event_rx = &s.event_rx.clone();
@@ -1203,8 +1246,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::AddSwapRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "add_swap", req);
is_allowed!(req);
do_add_swap(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
do_add_swap(&self.sandbox, &req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
Ok(Empty::new())
}
@@ -1557,43 +1603,13 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
Ok(())
}
pub fn path_name_lookup<P: std::clone::Clone + AsRef<Path> + std::fmt::Debug>(
path: P,
lookup: &str,
) -> Result<(PathBuf, String)> {
for entry in fs::read_dir(path.clone())? {
let entry = entry?;
if let Some(name) = entry.path().file_name() {
if let Some(name) = name.to_str() {
if Some(0) == name.find(lookup) {
return Ok((entry.path(), name.to_string()));
}
}
}
}
Err(anyhow!("cannot get {} dir in {:?}", lookup, path))
}
fn do_add_swap(req: &AddSwapRequest) -> Result<()> {
// re-scan PCI bus
// looking for hidden devices
rescan_pci_bus().context("Could not rescan PCI bus")?;
async fn do_add_swap(sandbox: &Arc<Mutex<Sandbox>>, req: &AddSwapRequest) -> Result<()> {
let mut slots = Vec::new();
for slot in &req.PCIPath {
slots.push(pci::Slot::new(*slot as u8)?);
slots.push(pci::SlotFn::new(*slot, 0)?);
}
let pcipath = pci::Path::new(slots)?;
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = format!(
"{}{}",
root_bus_sysfs,
pcipath_to_sysfs(&root_bus_sysfs, &pcipath)?
);
let (mut virtio_path, _) = path_name_lookup(sysfs_rel_path, "virtio")?;
virtio_path.push("block");
let (_, dev_name) = path_name_lookup(virtio_path, "vd")?;
let dev_name = format!("/dev/{}", dev_name);
let dev_name = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
let c_str = CString::new(dev_name)?;
let ret = unsafe { libc::swapon(c_str.as_ptr() as *const c_char, 0) };
@@ -1624,15 +1640,14 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
let rootfs_path = bundle_path.join("rootfs");
fs::create_dir_all(&rootfs_path)?;
BareMount::new(
baremount(
&spec_root.path,
rootfs_path.to_str().unwrap(),
"bind",
MsFlags::MS_BIND,
"",
&sl!(),
)
.mount()?;
)?;
spec.root = Some(Root {
path: rootfs_path.to_str().unwrap().to_owned(),
readonly: spec_root.readonly,

View File

@@ -449,7 +449,7 @@ fn online_memory(logger: &Logger) -> Result<()> {
#[cfg(test)]
mod tests {
use super::Sandbox;
use crate::{mount::BareMount, skip_if_not_root};
use crate::{mount::baremount, skip_if_not_root};
use anyhow::Error;
use nix::mount::MsFlags;
use oci::{Linux, Root, Spec};
@@ -461,8 +461,7 @@ mod tests {
use tempfile::Builder;
fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> {
let baremount = BareMount::new(src, dst, "bind", MsFlags::MS_BIND, "", logger);
baremount.mount()
baremount(src, dst, "bind", MsFlags::MS_BIND, "", logger)
}
#[tokio::test]

View File

@@ -7,6 +7,7 @@ use crate::config::AgentConfig;
use anyhow::Result;
use opentelemetry::sdk::propagation::TraceContextPropagator;
use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
use serde::Deserialize;
use slog::{info, o, Logger};
use std::collections::HashMap;
use std::error::Error;
@@ -17,7 +18,7 @@ use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Registry;
use ttrpc::r#async::TtrpcContext;
#[derive(Debug, PartialEq)]
#[derive(Debug, Deserialize, PartialEq)]
pub enum TraceType {
Disabled,
Isolated,

View File

@@ -111,10 +111,13 @@ pub async fn wait_for_uevent(
sandbox: &Arc<Mutex<Sandbox>>,
matcher: impl UeventMatcher,
) -> Result<Uevent> {
let logprefix = format!("Waiting for {:?}", &matcher);
info!(sl!(), "{}", logprefix);
let mut sb = sandbox.lock().await;
for uev in sb.uevent_map.values() {
if matcher.is_match(uev) {
info!(sl!(), "Device {:?} found in device map", uev);
info!(sl!(), "{}: found {:?} in uevent map", logprefix, &uev);
return Ok(uev.clone());
}
}
@@ -129,7 +132,8 @@ pub async fn wait_for_uevent(
sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
drop(sb); // unlock
info!(sl!(), "Waiting on channel for uevent notification\n");
info!(sl!(), "{}: waiting on channel", logprefix);
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
@@ -146,6 +150,7 @@ pub async fn wait_for_uevent(
}
};
info!(sl!(), "{}: found {:?} on channel", logprefix, &uev);
Ok(uev)
}

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#![allow(clippy::unknown_clippy_lints)]
#![allow(unknown_lints)]
use std::collections::HashMap;
use std::path::{Path, PathBuf};
@@ -20,7 +20,7 @@ use tokio::sync::Mutex;
use tokio::task;
use tokio::time::{self, Duration};
use crate::mount::BareMount;
use crate::mount::baremount;
use crate::protocols::agent as protos;
/// The maximum number of file system entries agent will watch for each mount.
@@ -193,14 +193,6 @@ impl Storage {
size += metadata.len();
ensure!(
self.watched_files.len() <= MAX_ENTRIES_PER_STORAGE,
WatcherError::MountTooManyFiles {
count: self.watched_files.len(),
mnt: self.source_mount_point.display().to_string()
}
);
// Insert will return old entry if any
if let Some(old_st) = self.watched_files.insert(path.to_path_buf(), modified) {
if modified > old_st {
@@ -211,6 +203,14 @@ impl Storage {
debug!(logger, "New entry: {}", path.display());
update_list.push(PathBuf::from(&path))
}
ensure!(
self.watched_files.len() <= MAX_ENTRIES_PER_STORAGE,
WatcherError::MountTooManyFiles {
count: self.watched_files.len(),
mnt: self.source_mount_point.display().to_string()
}
);
} else {
// Scan dir recursively
let mut entries = fs::read_dir(path)
@@ -269,6 +269,19 @@ impl SandboxStorages {
let entry = Storage::new(storage)
.await
.with_context(|| "Failed to add storage")?;
// If the storage source is a directory, let's create the target mount point:
if entry.source_mount_point.as_path().is_dir() {
fs::create_dir_all(&entry.target_mount_point)
.await
.with_context(|| {
format!(
"Unable to mkdir all for {}",
entry.target_mount_point.display()
)
})?;
}
self.0.push(entry);
}
@@ -314,16 +327,14 @@ impl SandboxStorages {
}
}
match BareMount::new(
match baremount(
entry.source_mount_point.to_str().unwrap(),
entry.target_mount_point.to_str().unwrap(),
"bind",
MsFlags::MS_BIND,
"bind",
logger,
)
.mount()
{
) {
Ok(_) => {
entry.watch = false;
info!(logger, "watchable mount replaced with bind mount")
@@ -427,15 +438,14 @@ impl BindWatcher {
async fn mount(&self, logger: &Logger) -> Result<()> {
fs::create_dir_all(WATCH_MOUNT_POINT_PATH).await?;
BareMount::new(
baremount(
"tmpfs",
WATCH_MOUNT_POINT_PATH,
"tmpfs",
MsFlags::empty(),
"",
logger,
)
.mount()?;
)?;
Ok(())
}
@@ -475,6 +485,85 @@ mod tests {
Ok((storage, src_path))
}
#[tokio::test]
async fn test_empty_sourcedir_check() {
//skip_if_not_root!();
let dir = tempfile::tempdir().expect("failed to create tempdir");
let logger = slog::Logger::root(slog::Discard, o!());
let src_path = dir.path().join("src");
let dest_path = dir.path().join("dest");
let src_filename = src_path.to_str().expect("failed to create src filename");
let dest_filename = dest_path.to_str().expect("failed to create dest filename");
std::fs::create_dir_all(src_filename).expect("failed to create path");
let storage = protos::Storage {
source: src_filename.to_string(),
mount_point: dest_filename.to_string(),
..Default::default()
};
let mut entries = SandboxStorages {
..Default::default()
};
entries
.add(std::iter::once(storage), &logger)
.await
.unwrap();
assert!(entries.check(&logger).await.is_ok());
assert_eq!(entries.0.len(), 1);
assert_eq!(std::fs::read_dir(src_path).unwrap().count(), 0);
assert_eq!(std::fs::read_dir(dest_path).unwrap().count(), 0);
assert_eq!(std::fs::read_dir(dir.path()).unwrap().count(), 2);
}
#[tokio::test]
async fn test_single_file_check() {
//skip_if_not_root!();
let dir = tempfile::tempdir().expect("failed to create tempdir");
let logger = slog::Logger::root(slog::Discard, o!());
let src_file_path = dir.path().join("src.txt");
let dest_file_path = dir.path().join("dest.txt");
let src_filename = src_file_path
.to_str()
.expect("failed to create src filename");
let dest_filename = dest_file_path
.to_str()
.expect("failed to create dest filename");
let storage = protos::Storage {
source: src_filename.to_string(),
mount_point: dest_filename.to_string(),
..Default::default()
};
//create file
fs::write(src_file_path, "original").unwrap();
let mut entries = SandboxStorages::default();
entries
.add(std::iter::once(storage), &logger)
.await
.unwrap();
assert!(entries.check(&logger).await.is_ok());
assert_eq!(entries.0.len(), 1);
// there should only be 2 files
assert_eq!(std::fs::read_dir(dir.path()).unwrap().count(), 2);
assert_eq!(fs::read_to_string(dest_file_path).unwrap(), "original");
}
#[tokio::test]
async fn test_watch_entries() {
skip_if_not_root!();

View File

@@ -12,7 +12,7 @@
// payload, which allows the forwarder to know how many bytes it must read to
// consume the trace span. The payload is a serialised version of the trace span.
#![allow(clippy::unknown_clippy_lints)]
#![allow(unknown_lints)]
use async_trait::async_trait;
use byteorder::{ByteOrder, NetworkEndian};

View File

@@ -5,16 +5,10 @@ coverage.txt
coverage.html
.git-commit
.git-commit.tmp
/cli/config/configuration-acrn.toml
/cli/config/configuration-clh.toml
/cli/config/configuration-fc.toml
/cli/config/configuration-qemu.toml
/cli/config/configuration-clh.toml
/cli/config-generated.go
/cli/containerd-shim-kata-v2/config-generated.go
/cli/coverage.html
/config/*.toml
config-generated.go
/containerd-shim-kata-v2
/containerd-shim-v2/monitor_address
/pkg/containerd-shim-v2/monitor_address
/data/kata-collect-data.sh
/kata-monitor
/kata-netmon
@@ -23,7 +17,4 @@ coverage.html
/virtcontainers/hack/virtc/virtc
/virtcontainers/hook/mock/hook
/virtcontainers/profile.cov
/virtcontainers/shim/mock/cc-shim/cc-shim
/virtcontainers/shim/mock/kata-shim/kata-shim
/virtcontainers/shim/mock/shim
/virtcontainers/utils/supportfiles

View File

@@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -51,12 +51,13 @@ PROJECT_DIR = $(PROJECT_TAG)
IMAGENAME = $(PROJECT_TAG).img
TARGET = $(BIN_PREFIX)-runtime
TARGET_OUTPUT = $(CURDIR)/$(TARGET)
RUNTIME_OUTPUT = $(CURDIR)/$(TARGET)
RUNTIME_DIR = $(CLI_DIR)/$(TARGET)
BINLIST += $(TARGET)
NETMON_DIR = netmon
NETMON_DIR = $(CLI_DIR)/netmon
NETMON_TARGET = $(PROJECT_TYPE)-netmon
NETMON_TARGET_OUTPUT = $(CURDIR)/$(NETMON_TARGET)
NETMON_RUNTIME_OUTPUT = $(CURDIR)/$(NETMON_TARGET)
BINLIBEXECLIST += $(NETMON_TARGET)
DESTDIR ?= /
@@ -200,7 +201,7 @@ FEATURE_SELINUX ?= check
SED = sed
CLI_DIR = cli
CLI_DIR = cmd
SHIMV2 = containerd-shim-kata-v2
SHIMV2_OUTPUT = $(CURDIR)/$(SHIMV2)
SHIMV2_DIR = $(CLI_DIR)/$(SHIMV2)
@@ -225,7 +226,7 @@ ifneq (,$(QEMUCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_QEMU)
CONFIG_FILE_QEMU = configuration-qemu.toml
CONFIG_QEMU = $(CLI_DIR)/config/$(CONFIG_FILE_QEMU)
CONFIG_QEMU = config/$(CONFIG_FILE_QEMU)
CONFIG_QEMU_IN = $(CONFIG_QEMU).in
CONFIG_PATH_QEMU = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU))
@@ -248,7 +249,7 @@ ifneq (,$(CLHCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_CLH)
CONFIG_FILE_CLH = configuration-clh.toml
CONFIG_CLH = $(CLI_DIR)/config/$(CONFIG_FILE_CLH)
CONFIG_CLH = config/$(CONFIG_FILE_CLH)
CONFIG_CLH_IN = $(CONFIG_CLH).in
CONFIG_PATH_CLH = $(abspath $(CONFDIR)/$(CONFIG_FILE_CLH))
@@ -271,7 +272,7 @@ ifneq (,$(FCCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_FC)
CONFIG_FILE_FC = configuration-fc.toml
CONFIG_FC = $(CLI_DIR)/config/$(CONFIG_FILE_FC)
CONFIG_FC = config/$(CONFIG_FILE_FC)
CONFIG_FC_IN = $(CONFIG_FC).in
CONFIG_PATH_FC = $(abspath $(CONFDIR)/$(CONFIG_FILE_FC))
@@ -294,7 +295,7 @@ ifneq (,$(ACRNCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_ACRN)
CONFIG_FILE_ACRN = configuration-acrn.toml
CONFIG_ACRN = $(CLI_DIR)/config/$(CONFIG_FILE_ACRN)
CONFIG_ACRN = config/$(CONFIG_FILE_ACRN)
CONFIG_ACRN_IN = $(CONFIG_ACRN).in
CONFIG_PATH_ACRN = $(abspath $(CONFDIR)/$(CONFIG_FILE_ACRN))
@@ -522,12 +523,12 @@ containerd-shim-v2: $(SHIMV2_OUTPUT)
monitor: $(MONITOR_OUTPUT)
netmon: $(NETMON_TARGET_OUTPUT)
netmon: $(NETMON_RUNTIME_OUTPUT)
$(NETMON_TARGET_OUTPUT): $(SOURCES) VERSION
$(NETMON_RUNTIME_OUTPUT): $(SOURCES) VERSION
$(QUIET_BUILD)(cd $(NETMON_DIR) && go build $(BUILDFLAGS) -o $@ -ldflags "-X main.version=$(VERSION)" $(KATA_LDFLAGS))
runtime: $(TARGET_OUTPUT) $(CONFIGS)
runtime: $(RUNTIME_OUTPUT) $(CONFIGS)
.DEFAULT: default
build: default
@@ -558,16 +559,12 @@ define MAKE_KERNEL_VIRTIOFS_NAME
$(if $(findstring uncompressed,$1),vmlinux-virtiofs.container,vmlinuz-virtiofs.container)
endef
GENERATED_CONFIG = $(abspath $(CLI_DIR)/config-generated.go)
GENERATED_FILES += $(GENERATED_CONFIG)
GENERATED_FILES += pkg/katautils/config-settings.go
$(TARGET_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary
$(QUIET_BUILD)(cd $(CLI_DIR) && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)
$(RUNTIME_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary
$(QUIET_BUILD)(cd $(RUNTIME_DIR) && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)
$(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST)
$(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && ln -fs $(GENERATED_CONFIG))
$(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)
$(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit
@@ -610,10 +607,11 @@ ifeq ($(shell id -u), 0)
endif
go-test: $(GENERATED_FILES)
go clean -testcache
go test -v -mod=vendor ./...
check-go-static:
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cli
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cmd/kata-runtime
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./virtcontainers
coverage:
@@ -663,7 +661,6 @@ clean:
$(NETMON_TARGET) \
$(MONITOR) \
$(SHIMV2) \
$(SHIMV2_DIR)/$(notdir $(GENERATED_CONFIG)) \
$(TARGET) \
.git-commit .git-commit.tmp

View File

@@ -26,8 +26,7 @@ to work seamlessly with both Docker and Kubernetes respectively.
## License
The code is licensed under an Apache 2.0 license.
See [the license file](LICENSE) for further details.
See [the license file](https://github.com/kata-containers/kata-containers/blob/main/LICENSE) for further details.
## Platform support

View File

@@ -1,40 +0,0 @@
//
// Copyright (c) 2018-2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
// WARNING: This file is auto-generated - DO NOT EDIT!
//
// Note that some variables are "var" to allow them to be modified
// by the tests.
package main
// name is the name of the runtime
const name = "@RUNTIME_NAME@"
// name of the project
const project = "@PROJECT_NAME@"
// prefix used to denote non-standard CLI commands and options.
const projectPrefix = "@PROJECT_TYPE@"
// original URL for this project
const projectURL = "@PROJECT_URL@"
// Project URL's organisation name
const projectORG = "@PROJECT_ORG@"
const defaultRootDirectory = "@PKGRUNDIR@"
// commit is the git commit the runtime is compiled from.
var commit = "@COMMIT@"
// version is the runtime version.
var version = "@VERSION@"
// Default config file used by stateless systems.
var defaultRuntimeConfiguration = "@CONFIG_PATH@"
// Alternate config file that takes precedence over
// defaultRuntimeConfiguration.
var defaultSysConfRuntimeConfiguration = "@SYSCONFIG@"

View File

@@ -1,30 +0,0 @@
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
"os"
"github.com/containerd/containerd/runtime/v2/shim"
containerdshim "github.com/kata-containers/kata-containers/src/runtime/containerd-shim-v2"
"github.com/kata-containers/kata-containers/src/runtime/pkg/types"
)
func shimConfig(config *shim.Config) {
config.NoReaper = true
config.NoSubreaper = true
}
func main() {
if len(os.Args) == 2 && os.Args[1] == "--version" {
fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", project, types.DefaultKataRuntimeName, version, commit)
os.Exit(0)
}
shim.Run(types.DefaultKataRuntimeName, containerdshim.New, shimConfig)
}

View File

@@ -0,0 +1,32 @@
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
"os"
shimapi "github.com/containerd/containerd/runtime/v2/shim"
shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/types"
)
func shimConfig(config *shimapi.Config) {
config.NoReaper = true
config.NoSubreaper = true
}
func main() {
if len(os.Args) == 2 && os.Args[1] == "--version" {
fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", katautils.PROJECT, types.DefaultKataRuntimeName, katautils.VERSION, katautils.COMMIT)
os.Exit(0)
}
shimapi.Run(types.DefaultKataRuntimeName, shim.New, shimConfig)
}

View File

@@ -7,6 +7,7 @@ package main
import (
"flag"
"fmt"
"net/http"
"os"
goruntime "runtime"
@@ -25,7 +26,7 @@ var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug
var (
appName = "kata-monitor"
// version is the kata monitor version.
version = "0.1.0"
version = "0.2.0"
GitCommit = "unknown-commit"
)
@@ -54,6 +55,15 @@ func printVersion(ver versionInfo) {
}
}
type endpoint struct {
handler http.HandlerFunc
path string
desc string
}
// global variable endpoints contains all available endpoints
var endpoints []endpoint
func main() {
ver := versionInfo{
AppName: appName,
@@ -97,19 +107,62 @@ func main() {
panic(err)
}
// setup handlers, now only metrics is supported
// setup handlers, currently only metrics are supported
m := http.NewServeMux()
m.Handle("/metrics", http.HandlerFunc(km.ProcessMetricsRequest))
m.Handle("/sandboxes", http.HandlerFunc(km.ListSandboxes))
m.Handle("/agent-url", http.HandlerFunc(km.GetAgentURL))
endpoints = []endpoint{
{
path: "/metrics",
desc: "Get metrics from sandboxes.",
handler: km.ProcessMetricsRequest,
},
{
path: "/sandboxes",
desc: "List all Kata Containers sandboxes.",
handler: km.ListSandboxes,
},
{
path: "/agent-url",
desc: "Get sandbox agent URL.",
handler: km.GetAgentURL,
},
{
path: "/debug/vars",
desc: "Golang pprof `/debug/vars` endpoint for kata runtime shim process.",
handler: km.ExpvarHandler,
},
{
path: "/debug/pprof/",
desc: "Golang pprof `/debug/pprof/` endpoint for kata runtime shim process.",
handler: km.PprofIndex,
},
{
path: "/debug/pprof/cmdline",
desc: "Golang pprof `/debug/pprof/cmdline` endpoint for kata runtime shim process.",
handler: km.PprofCmdline,
},
{
path: "/debug/pprof/profile",
desc: "Golang pprof `/debug/pprof/profile` endpoint for kata runtime shim process.",
handler: km.PprofProfile,
},
{
path: "/debug/pprof/symbol",
desc: "Golang pprof `/debug/pprof/symbol` endpoint for kata runtime shim process.",
handler: km.PprofSymbol,
},
{
path: "/debug/pprof/trace",
desc: "Golang pprof `/debug/pprof/trace` endpoint for kata runtime shim process.",
handler: km.PprofTrace,
},
}
// for debug shim process
m.Handle("/debug/vars", http.HandlerFunc(km.ExpvarHandler))
m.Handle("/debug/pprof/", http.HandlerFunc(km.PprofIndex))
m.Handle("/debug/pprof/cmdline", http.HandlerFunc(km.PprofCmdline))
m.Handle("/debug/pprof/profile", http.HandlerFunc(km.PprofProfile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(km.PprofSymbol))
m.Handle("/debug/pprof/trace", http.HandlerFunc(km.PprofTrace))
for _, endpoint := range endpoints {
m.Handle(endpoint.path, endpoint.handler)
}
// root index page to show all endpoints in kata-monitor
m.Handle("/", http.HandlerFunc(indexPage))
// listening on the server
svr := &http.Server{
@@ -119,6 +172,23 @@ func main() {
logrus.Fatal(svr.ListenAndServe())
}
func indexPage(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("Available HTTP endpoints:\n"))
spacing := 0
for _, endpoint := range endpoints {
if len(endpoint.path) > spacing {
spacing = len(endpoint.path)
}
}
spacing = spacing + 3
formattedString := fmt.Sprintf("%%-%ds: %%s\n", spacing)
for _, endpoint := range endpoints {
w.Write([]byte(fmt.Sprintf(formattedString, endpoint.path, endpoint.desc)))
}
}
// initLog setup logger
func initLog() {
kataMonitorLog := logrus.WithFields(logrus.Fields{

View File

@@ -25,7 +25,6 @@ import (
"strings"
"syscall"
"github.com/containerd/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@@ -62,9 +61,9 @@ type vmContainerCapableDetails struct {
const (
moduleParamDir = "parameters"
successMessageCapable = "System is capable of running " + project
successMessageCreate = "System can currently create " + project
failMessage = "System is not capable of running " + project
successMessageCapable = "System is capable of running " + katautils.PROJECT
successMessageCreate = "System can currently create " + katautils.PROJECT
failMessage = "System is not capable of running " + katautils.PROJECT
kernelPropertyCorrect = "Kernel property value correct"
// these refer to fields in the procCPUINFO file
@@ -229,7 +228,7 @@ func checkKernelModules(modules map[string]kernelModule, handler kernelParamHand
}
if !haveKernelModule(module) {
kataLog.WithFields(fields).Error("kernel property not found")
kataLog.WithFields(fields).Errorf("kernel property %s not found", module)
if details.required {
count++
}
@@ -292,11 +291,9 @@ func genericHostIsVMContainerCapable(details vmContainerCapableDetails) error {
errorCount := uint32(0)
count := checkCPUAttribs(cpuinfo, details.requiredCPUAttribs)
errorCount += count
count = checkCPUFlags(cpuFlags, details.requiredCPUFlags)
errorCount += count
count, err = checkKernelModules(details.requiredKernelModules, archKernelParamHandler)
@@ -316,7 +313,7 @@ func genericHostIsVMContainerCapable(details vmContainerCapableDetails) error {
var kataCheckCLICommand = cli.Command{
Name: "check",
Aliases: []string{"kata-check"},
Usage: "tests if system can run " + project,
Usage: "tests if system can run " + katautils.PROJECT,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "check-version-only",
@@ -375,14 +372,14 @@ EXAMPLES:
$ %s check --only-list-releases --include-all-releases
`,
project,
katautils.PROJECT,
noNetworkEnvVar,
name,
name,
name,
name,
name,
name,
katautils.NAME,
katautils.NAME,
katautils.NAME,
katautils.NAME,
katautils.NAME,
katautils.NAME,
),
Action: func(context *cli.Context) error {
@@ -401,7 +398,7 @@ EXAMPLES:
if os.Geteuid() == 0 {
kataLog.Warn("Not running network checks as super user")
} else {
err := HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
err := HandleReleaseVersions(cmd, katautils.VERSION, context.Bool("include-all-releases"))
if err != nil {
return err
}
@@ -417,11 +414,6 @@ EXAMPLES:
return errors.New("check: cannot determine runtime config")
}
// check if cgroup can work use the same logic for creating containers
if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly {
return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.")
}
err := setCPUtype(runtimeConfig.HypervisorType)
if err != nil {
return err

View File

@@ -161,6 +161,16 @@ func setCPUtype(hypervisorType vc.HypervisorType) error {
required: false,
},
}
case "mock":
archRequiredCPUFlags = map[string]string{
cpuFlagVMX: "Virtualization support",
cpuFlagLM: "64Bit CPU",
cpuFlagSSE4_1: "SSE4.1",
}
archRequiredCPUAttribs = map[string]string{
archGenuineIntel: "Intel Architecture CPU",
}
default:
return fmt.Errorf("setCPUtype: Unknown hypervisor type %s", hypervisorType)
}
@@ -292,6 +302,8 @@ func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error {
return kvmIsUsable()
case "acrn":
return acrnIsUsable()
case "mock":
return nil
default:
return fmt.Errorf("archHostCanCreateVMContainer: Unknown hypervisor type %s", hypervisorType)
}

View File

@@ -317,11 +317,12 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
}
}
setupCheckHostIsVMContainerCapable(assert, cpuInfoFile, cpuData, moduleData)
// remove the modules to force a failure
err = os.RemoveAll(sysModuleDir)
// to check if host is capable for Kata Containers, must setup CPU info first.
_, config, err := makeRuntimeConfig(dir)
assert.NoError(err)
setCPUtype(config.HypervisorType)
setupCheckHostIsVMContainerCapable(assert, cpuInfoFile, cpuData, moduleData)
details := vmContainerCapableDetails{
cpuInfoFile: cpuInfoFile,
@@ -332,6 +333,12 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
err = hostIsVMContainerCapable(details)
assert.Nil(err)
// remove the modules to force a failure
err = os.RemoveAll(sysModuleDir)
assert.NoError(err)
err = hostIsVMContainerCapable(details)
assert.Error(err)
}
func TestArchKernelParamHandler(t *testing.T) {

View File

@@ -28,9 +28,9 @@ func setupCheckHostIsVMContainerCapable(assert *assert.Assertions, cpuInfoFile s
func TestCCCheckCLIFunction(t *testing.T) {
var cpuData []testCPUData
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), true, ""},
{filepath.Join(sysModuleDir, "vhost"), true, ""},
{filepath.Join(sysModuleDir, "vhost_net"), true, ""},
{filepath.Join(sysModuleDir, "kvm"), "", true},
{filepath.Join(sysModuleDir, "vhost"), "", true},
{filepath.Join(sysModuleDir, "vhost_net"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)

View File

@@ -10,7 +10,7 @@ vendor_id : IBM/S390
# processors : 4
bogomips per cpu: 20325.00
max thread id : 0
features : esan3 zarch stfle msa ldisp eimm dfp edat etf3eh highgprs te vx sie
features : esan3 zarch stfle msa ldisp eimm dfp edat etf3eh highgprs te vx sie
cache0 : level=1 type=Data scope=Private size=128K line_size=256 associativity=8
cache1 : level=1 type=Instruction scope=Private size=96K line_size=256 associativity=6
cache2 : level=2 type=Data scope=Private size=2048K line_size=256 associativity=8

View File

@@ -39,7 +39,8 @@ func testSetCPUTypeGeneric(t *testing.T) {
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
setCPUtype(config.HypervisorType)
err = setCPUtype(config.HypervisorType)
assert.NoError(err)
assert.Equal(archRequiredCPUFlags, savedArchRequiredCPUFlags)
assert.Equal(archRequiredCPUAttribs, savedArchRequiredCPUAttribs)

View File

@@ -47,8 +47,8 @@ func TestCCCheckCLIFunction(t *testing.T) {
}
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm_hv"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm"), "", true},
{filepath.Join(sysModuleDir, "kvm_hv"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)
@@ -58,51 +58,51 @@ func TestArchKernelParamHandler(t *testing.T) {
assert := assert.New(t)
type testData struct {
onVMM bool
expectIgnore bool
fields logrus.Fields
msg string
onVMM bool
expectIgnore bool
}
data := []testData{
{true, false, logrus.Fields{}, ""},
{false, false, logrus.Fields{}, ""},
{logrus.Fields{}, "", true, false},
{logrus.Fields{}, "", false, false},
{
false,
false,
logrus.Fields{
// wrong type
"parameter": 123,
},
"foo",
false,
false,
},
{
false,
false,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
false,
false,
},
{
true,
true,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
true,
true,
},
{
false,
true,
logrus.Fields{
"parameter": "nested",
},
"",
false,
true,
},
}

View File

@@ -47,7 +47,7 @@ func TestCCCheckCLIFunction(t *testing.T) {
}
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)
@@ -57,51 +57,51 @@ func TestArchKernelParamHandler(t *testing.T) {
assert := assert.New(t)
type testData struct {
onVMM bool
expectIgnore bool
fields logrus.Fields
msg string
onVMM bool
expectIgnore bool
}
data := []testData{
{true, false, logrus.Fields{}, ""},
{false, false, logrus.Fields{}, ""},
{logrus.Fields{}, "", true, false},
{logrus.Fields{}, "", false, false},
{
false,
false,
logrus.Fields{
// wrong type
"parameter": 123,
},
"foo",
false,
false,
},
{
false,
false,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
false,
false,
},
{
true,
true,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
true,
true,
},
{
false,
true,
logrus.Fields{
"parameter": "nested",
},
"",
false,
true,
},
}

View File

@@ -17,8 +17,10 @@ import (
"strings"
"testing"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/urfave/cli"
@@ -247,6 +249,13 @@ func genericCheckCLIFunction(t *testing.T, cpuData []testCPUData, moduleData []t
flagSet := &flag.FlagSet{}
ctx := createCLIContext(flagSet)
ctx.App.Name = "foo"
if katatestutils.IsInGitHubActions() {
// only set to mock if on GitHub
t.Logf("running tests under GitHub actions")
config.HypervisorType = vc.MockHypervisor
}
ctx.App.Metadata["runtimeConfig"] = config
// create buffer to save logger output

View File

@@ -13,14 +13,16 @@ import (
"strings"
"github.com/BurntSushi/toml"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/procfs"
"github.com/urfave/cli"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/procfs"
"github.com/urfave/cli"
)
// Semantic version for the output of the command.
@@ -169,8 +171,8 @@ func getMetaInfo() MetaInfo {
}
func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo {
runtimeVersionInfo := constructVersionInfo(version)
runtimeVersionInfo.Commit = commit
runtimeVersionInfo := constructVersionInfo(katautils.VERSION)
runtimeVersionInfo.Commit = katautils.COMMIT
runtimeVersion := RuntimeVersionInfo{
Version: runtimeVersionInfo,

View File

@@ -314,8 +314,8 @@ func getExpectedKernel(config oci.RuntimeConfig) KernelInfo {
func getExpectedRuntimeDetails(config oci.RuntimeConfig, configFile string) RuntimeInfo {
runtimePath, _ := os.Executable()
runtimeVersionInfo := constructVersionInfo(version)
runtimeVersionInfo.Commit = commit
runtimeVersionInfo := constructVersionInfo(katautils.VERSION)
runtimeVersionInfo.Commit = katautils.COMMIT
return RuntimeInfo{
Version: RuntimeVersionInfo{
Version: runtimeVersionInfo,

View File

@@ -39,18 +39,18 @@ const arch = goruntime.GOARCH
var usage = fmt.Sprintf(`%s runtime
%s is a command line program for running applications packaged
according to the Open Container Initiative (OCI).`, name, name)
according to the Open Container Initiative (OCI).`, katautils.NAME, katautils.NAME)
var notes = fmt.Sprintf(`
NOTES:
- Commands starting "%s-" and options starting "--%s-" are `+project+` extensions.
- Commands starting "%s-" and options starting "--%s-" are `+katautils.PROJECT+` extensions.
URL:
The canonical URL for this project is: %s
`, projectPrefix, projectPrefix, projectURL)
`, katautils.PROJECTPREFIX, katautils.PROJECTPREFIX, katautils.PROJECTURL)
// kataLog is the logger used to record all messages
var kataLog *logrus.Entry
@@ -82,7 +82,7 @@ var defaultErrorFile = os.Stderr
var runtimeFlags = []cli.Flag{
cli.StringFlag{
Name: "config, kata-config",
Usage: project + " config file path",
Usage: katautils.PROJECT + " config file path",
},
cli.StringFlag{
Name: "log",
@@ -96,7 +96,7 @@ var runtimeFlags = []cli.Flag{
},
cli.StringFlag{
Name: "root",
Value: defaultRootDirectory,
Value: katautils.DEFAULTROOTDIRECTORY,
Usage: "root directory for storage of container state (this should be located in tmpfs)",
},
cli.StringFlag{
@@ -145,7 +145,7 @@ var savedCLIErrWriter = cli.ErrWriter
func init() {
kataLog = logrus.WithFields(logrus.Fields{
"name": name,
"name": katautils.NAME,
"source": "runtime",
"arch": arch,
"pid": os.Getpid(),
@@ -222,7 +222,7 @@ func beforeSubcommands(c *cli.Context) error {
var runtimeConfig oci.RuntimeConfig
var err error
katautils.SetConfigOptions(name, defaultRuntimeConfiguration, defaultSysConfRuntimeConfiguration)
katautils.SetConfigOptions(katautils.NAME, katautils.DEFAULTRUNTIMECONFIGURATION, katautils.DEFAULTSYSCONFRUNTIMECONFIGURATION)
handleShowConfig(c)
@@ -302,8 +302,8 @@ func beforeSubcommands(c *cli.Context) error {
args := strings.Join(c.Args(), " ")
fields := logrus.Fields{
"version": version,
"commit": commit,
"version": katautils.VERSION,
"commit": katautils.COMMIT,
"arguments": `"` + args + `"`,
}
@@ -365,14 +365,14 @@ func commandNotFound(c *cli.Context, command string) {
func makeVersionString() string {
v := make([]string, 0, 3)
versionStr := version
versionStr := katautils.VERSION
if versionStr == "" {
versionStr = unknown
}
v = append(v, name+" : "+versionStr)
v = append(v, katautils.NAME+" : "+versionStr)
commitStr := commit
commitStr := katautils.COMMIT
if commitStr == "" {
commitStr = unknown
}
@@ -411,7 +411,7 @@ func setCLIGlobals() {
func createRuntimeApp(ctx context.Context, args []string) error {
app := cli.NewApp()
app.Name = name
app.Name = katautils.NAME
app.Writer = defaultOutputFile
app.Usage = usage
app.CommandNotFound = runtimeCommandNotFound

View File

@@ -57,19 +57,19 @@ var (
var testingImpl = &vcmock.VCMock{}
func init() {
if version == "" {
if katautils.VERSION == "" {
panic("ERROR: invalid build: version not set")
}
if commit == "" {
if katautils.COMMIT == "" {
panic("ERROR: invalid build: commit not set")
}
if defaultSysConfRuntimeConfiguration == "" {
if katautils.DEFAULTSYSCONFRUNTIMECONFIGURATION == "" {
panic("ERROR: invalid build: defaultSysConfRuntimeConfiguration not set")
}
if defaultRuntimeConfiguration == "" {
if katautils.DEFAULTRUNTIMECONFIGURATION == "" {
panic("ERROR: invalid build: defaultRuntimeConfiguration not set")
}
@@ -82,7 +82,7 @@ func init() {
var err error
fmt.Printf("INFO: creating test directory\n")
testDir, err = ioutil.TempDir("", fmt.Sprintf("%s-", name))
testDir, err = ioutil.TempDir("", fmt.Sprintf("%s-", katautils.NAME))
if err != nil {
panic(fmt.Sprintf("ERROR: failed to create test directory: %v", err))
}
@@ -153,8 +153,8 @@ func runUnitTests(m *testing.M) {
func TestMain(m *testing.M) {
// If the test binary name is kata-runtime.coverage, we've are being asked to
// run the coverage-instrumented kata-runtime.
if path.Base(os.Args[0]) == name+".coverage" ||
path.Base(os.Args[0]) == name {
if path.Base(os.Args[0]) == katautils.NAME+".coverage" ||
path.Base(os.Args[0]) == katautils.NAME {
main()
exit(0)
}
@@ -210,7 +210,7 @@ func newTestHypervisorConfig(dir string, create bool) (vc.HypervisorConfig, erro
KernelPath: kernelPath,
ImagePath: imagePath,
HypervisorPath: hypervisorPath,
HypervisorMachineType: "pc-lite",
HypervisorMachineType: "q35",
}, nil
}
@@ -666,9 +666,9 @@ func TestMainBeforeSubCommandsShowCCConfigPaths(t *testing.T) {
for i, line := range lines {
switch i {
case 0:
assert.Equal(line, defaultSysConfRuntimeConfiguration)
assert.Equal(line, katautils.DEFAULTSYSCONFRUNTIMECONFIGURATION)
case 1:
assert.Equal(line, defaultRuntimeConfiguration)
assert.Equal(line, katautils.DEFAULTRUNTIMECONFIGURATION)
}
}
}
@@ -715,7 +715,7 @@ func testVersionString(assert *assert.Assertions, versionString, expectedVersion
foundCommit := false
foundOCIVersion := false
versionRE := regexp.MustCompile(fmt.Sprintf(`%s\s*:\s*%v`, name, expectedVersion))
versionRE := regexp.MustCompile(fmt.Sprintf(`%s\s*:\s*%v`, katautils.NAME, expectedVersion))
commitRE := regexp.MustCompile(fmt.Sprintf(`%s\s*:\s*%v`, "commit", expectedCommit))
ociRE := regexp.MustCompile(fmt.Sprintf(`%s\s*:\s*%v`, "OCI specs", expectedOCIVersion))
@@ -753,37 +753,37 @@ func TestMainMakeVersionString(t *testing.T) {
v := makeVersionString()
testVersionString(assert, v, version, commit, specs.Version)
testVersionString(assert, v, katautils.VERSION, katautils.COMMIT, specs.Version)
}
func TestMainMakeVersionStringNoVersion(t *testing.T) {
assert := assert.New(t)
savedVersion := version
version = ""
savedVersion := katautils.VERSION
katautils.VERSION = ""
defer func() {
version = savedVersion
katautils.VERSION = savedVersion
}()
v := makeVersionString()
testVersionString(assert, v, unknown, commit, specs.Version)
testVersionString(assert, v, unknown, katautils.COMMIT, specs.Version)
}
func TestMainMakeVersionStringNoCommit(t *testing.T) {
assert := assert.New(t)
savedCommit := commit
commit = ""
savedCommit := katautils.COMMIT
katautils.COMMIT = ""
defer func() {
commit = savedCommit
katautils.COMMIT = savedCommit
}()
v := makeVersionString()
testVersionString(assert, v, version, unknown, specs.Version)
testVersionString(assert, v, katautils.VERSION, unknown, specs.Version)
}
func TestMainMakeVersionStringNoOCIVersion(t *testing.T) {
@@ -798,7 +798,7 @@ func TestMainMakeVersionStringNoOCIVersion(t *testing.T) {
v := makeVersionString()
testVersionString(assert, v, version, commit, unknown)
testVersionString(assert, v, katautils.VERSION, katautils.COMMIT, unknown)
}
func TestMainCreateRuntimeApp(t *testing.T) {
@@ -824,7 +824,7 @@ func TestMainCreateRuntimeApp(t *testing.T) {
defaultOutputFile = savedOutputFile
}()
args := []string{name}
args := []string{katautils.NAME}
err = createRuntimeApp(context.Background(), args)
assert.NoError(err, "%v", args)
@@ -849,7 +849,7 @@ func TestMainCreateRuntimeAppInvalidSubCommand(t *testing.T) {
}()
// calls fatal() so no return
_ = createRuntimeApp(context.Background(), []string{name, "i-am-an-invalid-sub-command"})
_ = createRuntimeApp(context.Background(), []string{katautils.NAME, "i-am-an-invalid-sub-command"})
assert.NotEqual(exitStatus, 0)
}
@@ -869,7 +869,7 @@ func TestMainCreateRuntime(t *testing.T) {
savedBefore := runtimeBeforeSubcommands
savedCommands := runtimeCommands
os.Args = []string{name, cmd}
os.Args = []string{katautils.NAME, cmd}
exitFunc = func(status int) { exitStatus = status }
// disable
@@ -920,10 +920,10 @@ func TestMainVersionPrinter(t *testing.T) {
setCLIGlobals()
err = createRuntimeApp(context.Background(), []string{name, "--version"})
err = createRuntimeApp(context.Background(), []string{katautils.NAME, "--version"})
assert.NoError(err)
err = grep(fmt.Sprintf(`%s\s*:\s*%s`, name, version), output)
err = grep(fmt.Sprintf(`%s\s*:\s*%s`, katautils.NAME, katautils.VERSION), output)
assert.NoError(err)
}
@@ -968,7 +968,7 @@ func TestMainFatalWriter(t *testing.T) {
setCLIGlobals()
err := createRuntimeApp(context.Background(), []string{name, cmd})
err := createRuntimeApp(context.Background(), []string{katautils.NAME, cmd})
assert.Error(err)
re := regexp.MustCompile(

View File

@@ -16,6 +16,8 @@ import (
"strings"
"github.com/blang/semver"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
)
type ReleaseCmd int
@@ -29,7 +31,7 @@ type releaseDetails struct {
const (
// A release URL is expected to be prefixed with this value
projectAPIURL = "https://api.github.com/repos/" + projectORG
projectAPIURL = "https://api.github.com/repos/" + katautils.PROJECTORG
releasesSuffix = "/releases"
downloadsSuffix = releasesSuffix + "/download"
@@ -37,12 +39,12 @@ const (
// Kata 1.x
kata1xRepo = "runtime"
kataLegacyReleaseURL = projectAPIURL + "/" + kata1xRepo + releasesSuffix
kataLegacyDownloadURL = projectURL + "/" + kata1xRepo + downloadsSuffix
kataLegacyDownloadURL = katautils.PROJECTURL + "/" + kata1xRepo + downloadsSuffix
// Kata 2.x or newer
kata2xRepo = "kata-containers"
kataReleaseURL = projectAPIURL + "/" + kata2xRepo + releasesSuffix
kataDownloadURL = projectURL + "/" + kata2xRepo + downloadsSuffix
kataDownloadURL = katautils.PROJECTURL + "/" + kata2xRepo + downloadsSuffix
// Environment variable that can be used to override a release URL
ReleaseURLEnvVar = "KATA_RELEASE_URL"
@@ -377,7 +379,7 @@ func HandleReleaseVersions(cmd ReleaseCmd, currentVersion string, includeAll boo
currentSemver, err := semver.Make(currentVersion)
if err != nil {
return fmt.Errorf("BUG: Current version of %s (%s) has invalid SemVer version: %v", name, currentVersion, err)
return fmt.Errorf("BUG: Current version of %s (%s) has invalid SemVer version: %v", katautils.NAME, currentVersion, err)
}
releaseURL, err := getReleaseURL(currentSemver)

View File

@@ -12,6 +12,7 @@ import (
"testing"
"github.com/blang/semver"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/stretchr/testify/assert"
)
@@ -20,7 +21,7 @@ var expectedReleasesURL string
func init() {
var err error
currentSemver, err = semver.Make(version)
currentSemver, err = semver.Make(katautils.VERSION)
if err != nil {
panic(fmt.Sprintf("failed to create semver for testing: %v", err))
@@ -307,7 +308,7 @@ func TestDownloadURLIsValid(t *testing.T) {
{"foo", true},
{"foo bar", true},
{"https://google.com", true},
{projectURL, true},
{katautils.PROJECTURL, true},
{validKata1xDownload, false},
{validKata2xDownload, false},
}

View File

@@ -170,7 +170,7 @@ func newNetmon(params netmonParams) (*netmon, error) {
func (n *netmon) cleanup() {
os.RemoveAll(n.storagePath)
n.netHandler.Delete()
n.netHandler.Close()
close(n.linkDoneCh)
close(n.rtDoneCh)
}

View File

@@ -348,7 +348,7 @@ func TestScanNetwork(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
idx, expected := testCreateDummyNetwork(t, handler)
@@ -480,7 +480,7 @@ func TestActionsCLI(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
n.netHandler = handler
@@ -569,7 +569,7 @@ func TestHandleRTMNewLink(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
n.netHandler = handler
err = n.handleRTMNewLink(ev)
assert.NotNil(t, err)
@@ -690,7 +690,7 @@ func TestHandleRouteEvent(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
n.netHandler = handler

View File

@@ -109,6 +109,11 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
# or nvdimm.
block_device_driver = "virtio-blk"
# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.
#enable_hugepages = true
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available.
#

View File

@@ -24,6 +24,11 @@ machine_type = "@MACHINETYPE@"
# Default false
# confidential_guest = true
# Enable running QEMU VMM as a non-root user.
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
# rootless = true
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -360,7 +365,7 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
# is bigger than 0.
# The size of the swap device should be
# The size of the swap device should be
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should

View File

@@ -8,13 +8,14 @@ require (
github.com/blang/semver/v4 v4.0.0
github.com/containerd/cgroups v1.0.1
github.com/containerd/console v1.0.2
github.com/containerd/containerd v1.5.4
github.com/containerd/containerd v1.5.7
github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5
github.com/containerd/fifo v1.0.0
github.com/containerd/ttrpc v1.0.2
github.com/containerd/typeurl v1.0.2
github.com/containernetworking/plugins v0.9.1
github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af
github.com/fsnotify/fsnotify v1.4.9
github.com/go-ini/ini v1.28.2
github.com/go-openapi/errors v0.18.0
github.com/go-openapi/runtime v0.18.0
@@ -24,9 +25,9 @@ require (
github.com/gogo/protobuf v1.3.2
github.com/hashicorp/go-multierror v1.0.0
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9
github.com/kata-containers/govmm v0.0.0-20210804035756-3c64244cbb48
github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7
github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
github.com/opencontainers/runc v1.0.1
github.com/opencontainers/runc v1.0.2
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
github.com/opencontainers/selinux v1.8.2
github.com/pkg/errors v0.9.1
@@ -39,9 +40,8 @@ require (
github.com/smartystreets/goconvey v1.6.4 // indirect
github.com/stretchr/testify v1.6.1
github.com/urfave/cli v1.22.2
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852
github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb
go.opentelemetry.io/otel v0.15.0
go.opentelemetry.io/otel/exporters/trace/jaeger v0.15.0
go.opentelemetry.io/otel/sdk v0.15.0
@@ -54,7 +54,7 @@ require (
)
replace (
github.com/containerd/containerd => github.com/containerd/containerd v1.5.4
github.com/containerd/containerd => github.com/containerd/containerd v1.5.7
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.1
github.com/uber-go/atomic => go.uber.org/atomic v1.5.1
google.golang.org/genproto => google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8

View File

@@ -53,8 +53,8 @@ github.com/Microsoft/go-winio v0.4.17 h1:iT12IBVClFevaf8PuVyi3UmZOVh4OqnaLxDTW2O
github.com/Microsoft/go-winio v0.4.17/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg=
github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600=
github.com/Microsoft/hcsshim v0.8.18 h1:cYnKADiM1869gvBpos3YCteeT6sZLB48lB5dmMMs8Tg=
github.com/Microsoft/hcsshim v0.8.18/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4=
github.com/Microsoft/hcsshim v0.8.21 h1:btRfUDThBE5IKcvI8O8jOiIkujUsAMBSRsYDYmEi6oM=
github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
@@ -91,14 +91,12 @@ github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/checkpoint-restore/go-criu/v5 v5.0.0 h1:TW8f/UvntYoVDMN1K2HlT82qH1rb0sOjpGw3m6Ym+i4=
github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.6.2 h1:iHsfF/t4aW4heW2YKfeHrVPGdtYTL4C4KocpM8KTSnI=
github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@@ -111,8 +109,8 @@ github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f2
github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw=
github.com/containerd/console v1.0.2 h1:Pi6D+aZXM+oUw1czuKgH5IJ+y0jhYcwBJfx5/Ghn9dE=
github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ=
github.com/containerd/containerd v1.5.4 h1:uPF0og3ByFzDnaStfiQj3fVGTEtaSNyU+bW7GR/nqGA=
github.com/containerd/containerd v1.5.4/go.mod h1:sx18RgvW6ABJ4iYUw7Q5x7bgFOAB9B6G7+yO0XBc4zw=
github.com/containerd/containerd v1.5.7 h1:rQyoYtj4KddB3bxG6SAqd4+08gePNyJjRqvOIfV3rkM=
github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0QMhscqVp1AR9c=
github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EXlVlkqNba9rJe3j7w3Xa924itAMLgZH4UD/Q4PExuQ=
github.com/containerd/continuity v0.1.0 h1:UFRRY5JemiAhPZrr/uE0n8fMTLcZsUvySPr1+D7pgr8=
github.com/containerd/continuity v0.1.0/go.mod h1:ICJu0PwR54nI0yPEnJ6jcS+J7CZAUXrLh8lPo2knzsM=
@@ -158,7 +156,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af h1:H6nLV96F1LkWizYLQtrMtqJBrlJxnpjgisHsTsOS2HU=
github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af/go.mod h1:POmDVglzQ2jWTlL9ZCfZ8d1QjLhmk0oB36O8T0oG75Y=
github.com/cyphar/filepath-securejoin v0.2.2 h1:jCwT2GTP+PY5nBz3c/YL5PAIbusElVrPujOBSCj8xRg=
github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ=
github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s=
@@ -188,7 +185,6 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
@@ -347,7 +343,7 @@ github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 h1:x9HFDMDCsaxTvC4X3o0ZN6mw99dT/wYnTItGwhBRmg0=
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9/go.mod h1:RmeVYf9XrPRbRc3XIx0gLYA8qOFvNoPOfaEZduRlEp4=
@@ -361,8 +357,8 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kata-containers/govmm v0.0.0-20210804035756-3c64244cbb48 h1:+tb5btBYMjZ1C5zBqK7ygCb03yqZtC5Mz0W6riq6T5k=
github.com/kata-containers/govmm v0.0.0-20210804035756-3c64244cbb48/go.mod h1:A6QaNB6N6PRQ9mTRpFtUxiF5T5CJpzLALjxBrUQPlFI=
github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7 h1:lrtaReMyoviyn/Gtd9iAmQ9qNSTaS3QC1NgQ+h5fliI=
github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7/go.mod h1:A6QaNB6N6PRQ9mTRpFtUxiF5T5CJpzLALjxBrUQPlFI=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
@@ -409,7 +405,6 @@ github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJ
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
@@ -439,7 +434,6 @@ github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo=
github.com/opencontainers/selinux v1.8.2 h1:c4ca10UMgRcvZ6h0K4HtS15UaVSBEaE+iln2LVpAuGc=
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8=
github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
@@ -488,7 +482,6 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8 h1:2c1EFnZHIPCW8qKWgHMH/fX2PkSabFc5mrVzfUNdg5U=
github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/seccomp/libseccomp-golang v0.9.1 h1:NJjM5DNFOs0s3kYE1WUOr6G8V97sdt46rlXTMfXGWBo=
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
@@ -528,7 +521,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
@@ -539,14 +531,12 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo=
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 h1:cPXZWzzG0NllBLdjWoD1nDfaqu98YMv+OneaKc8sPOA=
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868 h1:FFT5/l13iFxg+2dzyoiXZPmMtoclsyBKnUqTEzYpDXw=
github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3Cym0ZtKyq7L16eZUtYKs+BaHDN6mAns=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=

View File

@@ -11,12 +11,18 @@ import (
"context"
"fmt"
"os"
"os/user"
"path"
"path/filepath"
"strconv"
"syscall"
containerd_types "github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/mount"
taskAPI "github.com/containerd/containerd/runtime/v2/task"
"github.com/containerd/typeurl"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
@@ -33,6 +39,13 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
)
type startManagementServerFunc func(s *service, ctx context.Context, ociSpec *specs.Spec)
var defaultStartManagementServerFunc startManagementServerFunc = func(s *service, ctx context.Context, ociSpec *specs.Spec) {
go s.startManagementServer(ctx, ociSpec)
shimLog.Info("management server started")
}
func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*container, error) {
rootFs := vc.RootFs{}
if len(r.Rootfs) == 1 {
@@ -103,6 +116,12 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
}()
katautils.HandleFactory(ctx, vci, s.config)
rootless.SetRootless(s.config.HypervisorConfig.Rootless)
if rootless.IsRootless() {
if err := configureNonRootHypervisor(s.config); err != nil {
return nil, err
}
}
// Pass service's context instead of local ctx to CreateSandbox(), since local
// ctx will be canceled after this rpc service call, but the sandbox will live
@@ -119,7 +138,9 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
}
s.hpid = uint32(pid)
go s.startManagementServer(ctx, ociSpec)
if defaultStartManagementServerFunc != nil {
defaultStartManagementServerFunc(s, ctx, ociSpec)
}
case vc.PodContainer:
span, ctx := katatrace.Trace(s.ctx, shimLog, "create", shimTracingTags)
@@ -259,3 +280,69 @@ func doMount(mounts []*containerd_types.Mount, rootfs string) error {
}
return nil
}
func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig) error {
userName, err := utils.CreateVmmUser()
if err != nil {
return err
}
defer func() {
if err != nil {
if err2 := utils.RemoveVmmUser(userName); err2 != nil {
shimLog.WithField("userName", userName).WithError(err).Warn("failed to remove user")
}
}
}()
u, err := user.Lookup(userName)
if err != nil {
return err
}
uid, err := strconv.Atoi(u.Uid)
if err != nil {
return err
}
gid, err := strconv.Atoi(u.Gid)
if err != nil {
return err
}
runtimeConfig.HypervisorConfig.Uid = uint32(uid)
runtimeConfig.HypervisorConfig.Gid = uint32(gid)
userTmpDir := path.Join("/run/user/", fmt.Sprint(uid))
dir, err := os.Stat(userTmpDir)
if os.IsNotExist(err) {
if err = os.Mkdir(userTmpDir, vc.DirMode); err != nil {
return err
}
defer func() {
if err != nil {
if err = os.RemoveAll(userTmpDir); err != nil {
shimLog.WithField("userTmpDir", userTmpDir).WithError(err).Warn("failed to remove userTmpDir")
}
}
}()
if err = syscall.Chown(userTmpDir, uid, gid); err != nil {
return err
}
}
if dir != nil && !dir.IsDir() {
return fmt.Errorf("%s is expected to be a directory", userTmpDir)
}
if err := os.Setenv("XDG_RUNTIME_DIR", userTmpDir); err != nil {
return err
}
info, err := os.Stat("/dev/kvm")
if err != nil {
return err
}
if stat, ok := info.Sys().(*syscall.Stat_t); ok {
// Add the kvm group to the hypervisor supplemental group so that the hypervisor process can access /dev/kvm
runtimeConfig.HypervisorConfig.Groups = append(runtimeConfig.HypervisorConfig.Groups, stat.Gid)
return nil
}
return fmt.Errorf("failed to get the gid of /dev/kvm")
}

Some files were not shown because too many files have changed in this diff Show More