Compare commits

...

210 Commits

Author SHA1 Message Date
Bin Liu
aa295c91f2 Merge pull request #992 from bergwolf/2.0.0-branch-bump
# Kata Containers 2.0.0
2020-10-19 16:02:09 +08:00
Ubuntu
6648c8c7fc release: Kata Containers 2.0.0
- backport 2.0-dev commits to stable-2.0.0

dbfe85e snap: install libseccomp-dev
0c3b6a9 package: drop qemu-virtiofs shim
f751c98 packaging: install virtiofsd for normal qemu build as well
08361c5 runtime: enable virtiofs by default
da9bfb2 runtime: Pass `--thread-pool-size=1` to virtiofsd
7347d43 packaging: Apply virtiofs performance related fixes to 5.x
c7bb1e2 tools: Improve agent-ctl README
e6f7ddd tools: Make agent-ctl support more APIs
46cfed5 tools: Remove commented out code in agent-ctl
81fb2c9 tools: Log request in agent-ctl tool if debug enabled
0c43215 tools: Rename agent-ctl command to GetGuestDetails
6511ffe tools: Fix comment in agent-ctl
ee59378 kernel: update to 5.4.71
ef11213 config: make virtio-fs part of standard kernel
1fb6730 agent: remove `unwrap()` for `e.as_errno()`
05e9fe0 agent: Use `?` instead of `match` when the error returns directly
d658129 kata-monitor: use regexp to check if runtime is kata containers
ae2d89e agent: use anyhow `context` to attach context to `Error` instead of `match`
095d4ad agent: remove useless match
bd816df agent: Use `ok_or_else` instead of match for Option -> Result
d413bf7 agent: Fix crasher if AddARPNeighbors request empty
76408c0 agent: Fix crasher if UpdateRoutes request empty
6e4da19 agent: Fix crasher if UpdateInterface request empty
8f8061d agent: replace `match Result` with `or_else`
64e4b2f agent: replace unnecessary `match Result` with `map_err`
7c0d68f agent: replace check! with map_err for readability
82ed34a agent: remove `check!` in child process because we cant' see logs.
9def624 agent: replace `if let Err` with `or_else`
6926914 agent: refactor namespace::setup to optimize error handling
e733c13 agent: replace `if let Err` with `map_err`
ba069f9 rustjail: add length check for uid_mappings in rootless euid mapping
cc8ec7b versions: Update Kubernetes, containerd, cri-o and cri-tools
8a364d2 annotations: Correct unit tests to validate new protections
0cc6297 annotations: Split addHypervisorOverrides to reduce complexity
b6059f3 annotations: Add unit test for checkPathIsInGlobs
c6afad2 annotations: Add unit test for regexpContains function
451608f makefile: Add missing generated vars to `USER_VARS`
8328136 makefile: Improve names of config entries for annotation checks
a92a630 annotations: Give better names to local variabes in search functions
997f7c4 annotations: Rename checkPathIsInGlobList with checkPathIsInGlobs
74d4065 config: Add better comments in the template files
73bb3fd config: Whitelist hypervisor annotations by name
5a587ba config: Use glob instead of regexp to match paths in annotations
29f5dec annotations: Fix typo in comment
d71f9e1 config: Add makefile variables for path lists
28c386c config: Protect file_mem_backend against annotation attacks
c2a186b config: Protect vhost_user_store_path against annotation attacks
8cd094c config: Add security warning on configuration examples
b5f2a1e config: Protect ctlpath from annotation attack
2d65b3b config: Protect jailer_path annotation
fe5e1cf config: Add examples for path_list configuration
3f7bcf5 annotations: Simplify negative logic
80144fc config: Add hypervisor path override through annotations
2f5f356 config: Fix typo in function name
2faafbd config: Protect virtio_fs_daemon annotation
9e5ed41 config: Add 'List' alternates for hypervisor configuration paths
b33d4fe agent: fix panic on malformed device resource in container update
1838233 cpuset: add cpuset pkg
bfbbe8b cpuset: don't set cpuset.mems in the guest
5c21ec2 sandbox: consider cpusets if quota is not enforced
9bb0d48 cpuset: support setting mems for sandbox
64a2ef6 virtcontainers: add method for calculating cpuset for sandbox
a441f21 cpuset: add cpuset pkg
ce54090 docs: Update upgrading guide
e884fef docs: update the build kata containers kernel document
9c16643 agent/device: Check type as well as major:minor when looking up devices
4978c90 agent/device: Index all devices in spec before updating them
a7ba362 agent/device: Forward port update_spec_device_list() unit test
230a983 agent/device: update_spec_device_list() should error if dev not found
a6d9fd4 sandbox: don't constrain cpus, mem only cpuset, devices
8f0cb2f cgroups: add ability to update CPUSet
cbdae44 agent: fix errorneous parsing for guest block size
97acaa8 docs: Add containerd install guide
2324666 agent: use ok_or/map_err instead of match
ebe5ad1 rustjail: use Iterator to manipulate vector elements
c9497c8 rustjail: delete codes commented out
d5d9928 rustjail: delete unused test code
f70892a agent: use chain of Result to avoid early return
ab64780 agent: update not accurate comments
9e064ba agent: use macro to simplify parse_cmdline function in config.rs
42c48f5 agent: add blank lines between methods
d3a36fa agent: delete unused field in agentService
fa54660 agent: use no-named closure to reduce codes
efddcb4 agent: use a local fn to reduce duplicated codes
7bb3e56 packaging: fix cloud-hypervisor binary path
7b53041 packaging: fix missing cloud_hypervisor_repo
38212ba packaging: apply qemu v5.1 stable fixes
fb7e9b4 agent: fix aarch64 build
0cfcbf7 docs: add namespace key to pod/container config files
997f1f6 docs: Add crictl example json files
f60f43a runtime: Clear the VCMock 1.x API Methods from 2.0
1789527 ci: snap: add event filtering
999f67d agent: do not follow link when mounting container proc and sysfs
cb2255f agent: set init process non-dumpable
2a6c9ee agent-ctl: include cargo lock updates
eaff5de versions: add plugins section
4f1d23b virtiofs: Disable DAX
6d80df9 snap: specify python version
a116ce0 osbuilder: Create target directory for agent
4dc3bc0 rust-agent: Treat warnings as error
8f7a484 rust-agent: Identify unused results in tests
ce54e5d rust-agent: Log returned errors rather than ignore them
9adb7b7 rust-agent: Remove unused imports
73ab9b1 rust-agent: Report errors to caller if possible
4db3f9e rust-agent: Ignore write errors while writing to the logs
19cb657 rust-agent: Remove unused code that has undefined behavior
86bc151 rust-agent: Remove 'mut' where not needed
8d8adb6 rust-agent: Remove uses of deprecated functions
76298c1 rust-agent: Remove or rename unused parameters
7d303ec rust-agent: Remove or rename unused variables
e0b79eb rust-agent: Remove unused functions
8ed61b1 rust-agent: Remove useless braces
cc4f02e rust-agent: Remove unused macros
ace6f1e clh: Support VFIO device unplug
47cfeaa clh: Remove unnecessary VmmPing
63c4757 versions: cloud-hypervisor: Bump to version 6d30fe05
059b89c docs: Change kata_tap0 to tap0_kata
4ff3ed5 docs: update networking description
de8dcb1 dev-guide: update kata-agent install details
c488cc4 docs: Update docs for enabling agent debug console
e5acb12 docs: update dev guide for agent build
1bddde7 ci: add github action to test the snap
9517b0a versions: cloud-hypervisor: bump version
f5a7175 runtime: cloud-hypervisor: tag openapi-generator-cli container

Signed-off-by: Ubuntu <ubuntu@ip-172-31-19-197.ap-southeast-1.compute.internal>
2020-10-19 06:18:08 +00:00
Xu Wang
49776f76bf Merge pull request #984 from bergwolf/prepare-release
backport 2.0-dev commits to stable-2.0.0
2020-10-18 13:46:16 +08:00
Peng Tao
dbfe85e705 snap: install libseccomp-dev
To build qemu with virtio-fs support.

Depends-on: github.com/kata-containers/tests#2979
Fixes: #982
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:43:15 +08:00
Peng Tao
0c3b6a94b3 package: drop qemu-virtiofs shim
We have enabled qemu-virtiofs by default.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:43:15 +08:00
Peng Tao
f751c98da3 packaging: install virtiofsd for normal qemu build as well
For experimental-virtiofs, we use it to test virtiofs with DAX. Let's
rename its virtiofsd to virtiofsd-dax.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:43:15 +08:00
Peng Tao
08361c5948 runtime: enable virtiofs by default
We've been shipping it for a long time. It's time to make it default
replacing the old obsolet 9pfs.

Fixes: #935
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:43:15 +08:00
Fabiano Fidêncio
da9bfb27ed runtime: Pass --thread-pool-size=1 to virtiofsd
Dave Gilbert brough up that passing --thread-pool-size=1 to virtiofsd
may result in a performance improvement especially when using
`cache=none`. While our current default is `cache=auto`, Dave mentioned
that he seems no harm in having it set and he also mentiond that it may
use a lot less stack space on aarch/arm.

Fixes: #943

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-18 00:43:15 +08:00
Fabiano Fidêncio
7347d43cf9 packaging: Apply virtiofs performance related fixes to 5.x
Vivek Goyal found out that using "shared" thread pool, instead of
"exclusive" results in better performance.

Knowning that and with the plan to have virtio-fs as the default fs for
the 2.0, let's bring this patch in for both 5.0 and 5.1.

Fixes: #944

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
c7bb1e2790 tools: Improve agent-ctl README
Add a summary to help understand how to use the `agent-ctl` tool.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
e6f7ddd9a2 tools: Make agent-ctl support more APIs
Added new `agent-ctl` commands to allow the following agent API calls to
be made:

- `AddARPNeighborsRequest`
- `CloseStdinRequest`
- `CopyFileRequest`
- `GetMetricsRequest`
- `GetOOMEventRequest`
- `MemHotplugByProbeRequest`
- `OnlineCPUMemRequest`
- `ReadStreamRequest`
- `ReseedRandomDevRequest`
- `SetGuestDateTimeRequest`
- `TtyWinResizeRequest`
- `UpdateContainerRequest`
- `WriteStreamRequest`

Fixes: #969.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
46cfed5025 tools: Remove commented out code in agent-ctl
Remove a few lines of commented out code.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
81fb2c9980 tools: Log request in agent-ctl tool if debug enabled
Display the API request before making the call so users can see what is
sent to the agent.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
0c432153df tools: Rename agent-ctl command to GetGuestDetails
Rename the `GuestDetails` command to `GetGuestDetails` to match the
actual agent API name.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
6511ffe89d tools: Fix comment in agent-ctl
Correct a comment in the agent control tool.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
Eric Ernst
ee59378232 kernel: update to 5.4.71
vsock fix was backported to 5.4 stable, so we can drop this patch.

Fixes: #973

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:43:15 +08:00
Eric Ernst
ef11213a4e config: make virtio-fs part of standard kernel
Basic virtio-fs support has made it upstream in the Linux kernel, as
well as in QEMU and Cloud Hypervisor. Let's go ahead and add it to the
standard configuration.

Since the device driver / DAX handling is still in progress for
upstream, we will want to still build a seperate experimental kernel for
those who are comfortable trading off bleeding edge stability/kernel
updates for improved FIO numbers.

Fixes: #963

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:43:15 +08:00
Tim Zhang
1fb6730984 agent: remove unwrap() for e.as_errno()
Use `{:?}` to print `e.as_errno()` instead of using `{}`
to print `e.as_errno().unwrap().desc()`.

Avoid panic only caused by error's content.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
05e9fe0591 agent: Use ? instead of match when the error returns directly
It's more clear and more readable.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
bin liu
d658129695 kata-monitor: use regexp to check if runtime is kata containers
To support a few common configurations for Kata, including:

- `io.containerd.kata.v2`
- `io.containerd.kata-qemu.v2`
- `io.containerd.kata-clh.v2`

`kata-monintor` changes to use regexp instead of direct string comparison.

Fixes: #957

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
ae2d89e95e agent: use anyhow context to attach context to Error instead of match
Context is clearer than match for these situations.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
095d4ad08d agent: remove useless match
Remove useless match.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
bd816dfcec agent: Use ok_or_else instead of match for Option -> Result
Using ok_or is clearer than match.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
d413bf7d44 agent: Fix crasher if AddARPNeighbors request empty
Check if the ARP neighbours specified in the `AddARPNeighbors` API is
set before using it to avoid crashing the agent.

Fixes: #955.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
76408c0f13 agent: Fix crasher if UpdateRoutes request empty
Check if the routes specified in the `UpdateRoutes` API is set before
using it to avoid crashing the agent.

Fixes: #949.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
James O. D. Hunt
6e4da19fa5 agent: Fix crasher if UpdateInterface request empty
Check if the interface specified in the `UpdateInterface` API is set
before using it to avoid crashing the agent.

Fixes: #950.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:43:15 +08:00
Tim Zhang
8f8061da08 agent: replace match Result with or_else
`or_else` is suitable for more complicated situations.
We can use it to return Ok in Err handling.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
64e4b2fa83 agent: replace unnecessary match Result with map_err
Replace `match Result` whose Ok hand is useless.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
7c0d68f7f7 agent: replace check! with map_err for readability
It's ambiguous and not easy to read to call method use macro.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
82ed34aee1 agent: remove check! in child process because we cant' see logs.
The check macro will log the errors but the log in child process can't
be seen, just ignore it.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
9def624c05 agent: replace if let Err with or_else
Fixes #934

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
6926914683 agent: refactor namespace::setup to optimize error handling
- Replace the return value with anyhow::Result.
- Remove if let Err.
- Remove match.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
Tim Zhang
e733c13cf7 agent: replace if let Err with map_err
Fixes #934

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-18 00:43:15 +08:00
bin liu
ba069f9baa rustjail: add length check for uid_mappings in rootless euid mapping
This might be a copy miss, gid_mappings is checked twice, one should
be uid_mappings.

Fixes: #952

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:43:15 +08:00
Salvador Fuentes
cc8ec7b0e9 versions: Update Kubernetes, containerd, cri-o and cri-tools
Kubernetes: from 1.17.3 to 1.18.9
CRI-O: from 0eec454168e381e460b3d6de07bf50bfd9b0d082 (1.17) to 1.18.3
Containerd: from 3a4acfbc99aa976849f51a8edd4af20ead51d8d7 (1.3.3) to 1.3.7
cri-tools: from 1.17.0 to 1.18.0

Fixes: #960.
Signed-off-by: Salvador Fuentes <salvador.fuentes@intel.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
8a364d2145 annotations: Correct unit tests to validate new protections
Add the verification of some basic protections, namely that:
- EnableAnnotations is honored
- Dangerous paths cannot be modified if no match
- Errors are returned when expected

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
0cc6297716 annotations: Split addHypervisorOverrides to reduce complexity
Warning from gocyclo during make check:
 virtcontainers/pkg/oci/utils.go:404:1: cyclomatic complexity 37 of func `addHypervisorConfigOverrides` is high (> 30) (gocyclo)
 func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, runtime RuntimeConfig) error {
^

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
b6059f3566 annotations: Add unit test for checkPathIsInGlobs
There are a few interesting corner cases to consider for this
function.

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
c6afad2a06 annotations: Add unit test for regexpContains function
James O.D Hunt: "But also, regexpContains() and
checkPathIsInGlobList() seem like good candidates for some unit
tests. The "look" obvious, but a few boundary condition tests would be
useful I think (filenames with spaces, backslashes, special
characters, and relative & absolute paths are also an interesting
thought here)."

There aren't that many boundary conditions on a list with regexps,
if you assume the regexp match function itself works. However, the
tests is useful in documenting expectations.

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
451608fb28 makefile: Add missing generated vars to USER_VARS
This was discovered while checking a massive change in variables.
The root cause for the error is a very long list of manual
replacements, that is best replaced with a $(foreach).

All individual variables in the output configuration files were
checked against the old build using diff.

This is a forward port of a makefile fix included in
PR https://github.com/kata-containers/runtime/issues/3004
for issue https://github.com/kata-containers/runtime/issues/2943

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
8328136575 makefile: Improve names of config entries for annotation checks
The entries used to be things like PATH_LIST, which are too generic.
Replace them with more precise name with a distinguishing keyword,
namely VALID. For example valid_hypervisor_paths.

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
a92a63031d annotations: Give better names to local variabes in search functions
Use more meaningful variable names for clarity.

Fixes: #901

Suggested-by: James O.D. Hunt james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
997f7c4433 annotations: Rename checkPathIsInGlobList with checkPathIsInGlobs
The name is shorter and more specific

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
74d4065197 config: Add better comments in the template files
When there is a default value from the code (usually empty) that
differs from a possible suggested value from the distro, then the
wording "default: empty" is confusing.

Fixes: #901

Suggested-by: Julio Montes <julio.montes@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:15 +08:00
Christophe de Dinechin
73bb3fdbee config: Whitelist hypervisor annotations by name
Add a field "enable_annotations" to the runtime configuration that can
be used to whitelist annotations using a list of regular expressions,
which are used to match any part of the base annotation name, i.e. the
part after "io.katacontainers.config.hypervisor."

For example, the following configuraiton will match "virtio_fs_daemon",
"initrd" and "jailer_path", but not "path" nor "firmware":

  enable_annotations = [ "virtio.*", "initrd", "_path" ]

The default is an empty list of enabled annotations, which disables
annotations entirely.

If an anontation is rejected, the message is something like:

  annotation io.katacontainers.config.hypervisor.virtio_fs_daemon is not enabled

Fixes: #901

Suggested-by: Peng Tao <tao.peng@linux.alibaba.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:43:10 +08:00
Christophe de Dinechin
5a587ba506 config: Use glob instead of regexp to match paths in annotations
When filtering annotations that correspond to paths,
e.g. hypervisor.path, it is better to use a glob syntax than a regexp
syntax, as it is more usual for paths, and prevents classes of matches
that are undesirable in our case, such as matching .. against .*

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
29f5dec38f annotations: Fix typo in comment
A comment talking about runtime related annotations describes them as
being related to the agent. A similar comment for the agent
annotations is missing.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
d71f9e1155 config: Add makefile variables for path lists
Add variables to override defaults at build time for the various lists
used to control path annotations.

Fixes: #901

Suggested-by: Fabiano Fidencio <fidencio@redhat.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
28c386c51f config: Protect file_mem_backend against annotation attacks
This one could theoretically be used to overwrite data on the host.
It seems somewhat less risky than the earlier ones for a number
of reasons, but worth protecting a little anyway.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
c2a186b18c config: Protect vhost_user_store_path against annotation attacks
This path could be used to overwrite data on the host.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
8cd094cf06 config: Add security warning on configuration examples
Add the following text explaining the risk of using regular
expressions in path lists:

Each member of the list can be a regular expression, but prefer names.
Otherwise, please read and understand the following carefully.
SECURITY WARNING: If you use regular expressions, be mindful that
an attacker could craft an annotation that uses .. to escape the paths
you gave. For example, if your regexp is /bin/qemu.* then if there is
a directory named /bin/qemu.d/, then an attacker can pass an annotation
containing /bin/qemu.d/../put-any-binary-name-here and attack your host.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
b5f2a1e8c4 config: Protect ctlpath from annotation attack
This also adds annotation for ctlpath which were not present
before. It's better to implement the code consistenly right now to make
sure that we don't end up with a leaky implementation tacked on later.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
2d65b3bfd8 config: Protect jailer_path annotation
The jailer_path annotation can be used to execute arbitrary code on
the host. Add a jailer_path_list configuration entry providing a list
of regular expressions that can be used to filter annotations that
represent valid file names.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
fe5e1cf2e1 config: Add examples for path_list configuration
The path_list configuration gives a series of regular expressions that
limit which values are acceptable through annotations in order to
avoid kata launching arbitrary binaries on the host when receiving an
annotation.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
3f7bcf54f0 annotations: Simplify negative logic
Replace strange negative logic  (!ok -> continue) with positive
logic (ok -> do it)

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
80144fc415 config: Add hypervisor path override through annotations
The annotation is provided, so it should be respected.
Furthermore, it is important to implement it with the appropriate
protetions similar to what was done for virtiofsd.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
2f5f35608a config: Fix typo in function name
There was an extra 'p' in addHypervisorVirtioFsOverrides.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
2faafbdd3a config: Protect virtio_fs_daemon annotation
Sending the virtio_fs_daemon annotation can be used to execute
arbitrary code on the host. In order to prevent this, restrict the
values of the annotation to a list provided by the configuration
file.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
9e5ed41511 config: Add 'List' alternates for hypervisor configuration paths
Paths mentioned in the hypervisor configuration can be overriden
using annotations, which is potentially dangerous. For each path,
add a 'List' variant that specifies the list of acceptable values
from annotations.

Bug: https://bugs.launchpad.net/katacontainers.io/+bug/1878234

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Peng Tao
b33d4fe708 agent: fix panic on malformed device resource in container update
Somehow containerd is sending a malformed device in update API. While it
should not happen, we should not panic either.

Fixes: #946
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Eric Ernst
183823398d cpuset: add cpuset pkg
Pulled from 1.18.4 Kubernetes, adding the cpuset pkg for managing
CPUSet calculations on the host. Go mod'ing the original code from
k8s.io/kubernetes was very painful, and this is very static, so let's
just pull in what we need.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
bfbbe8ba6b cpuset: don't set cpuset.mems in the guest
Kata doesn't map any numa topologies in the guest. Let's make sure we
clear the Cpuset fields before passing container updates to the
guest.

Note, in the future we may want to have a vCPU to guest CPU mapping and
still include the cpuset.Cpus. Until we have this support, clear this as
well.

Fixes: #932

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
5c21ec278c sandbox: consider cpusets if quota is not enforced
CPUSet cgroup allows for pinning the memory associated with a cpuset to
a given numa node. Similar to cpuset.cpus, we should take cpuset.mems
into account for the sandbox-cgroup that Kata creates.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
9bb0d48d56 cpuset: support setting mems for sandbox
CPUSet cgroup allows for pinning the memory associated with a cpuset to
a given numa node. Similar to cpuset.cpus, we should take cpuset.mems
into account for the sandbox-cgroup that Kata creates.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
64a2ef62e0 virtcontainers: add method for calculating cpuset for sandbox
Calculate sandbox's CPUSet as the union of each of the container's
CPUSets.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
a441f21c40 cpuset: add cpuset pkg
Pulled from 1.18.4 Kubernetes, adding the cpuset pkg for managing
CPUSet calculations on the host. Go mod'ing the original code from
k8s.io/kubernetes was very painful, and this is very static, so let's
just pull in what we need.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
James O. D. Hunt
ce54090f25 docs: Update upgrading guide
Update the upgrading guide for 2.0.

Fixes: #928.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:40:16 +08:00
Ychau Wang
e884fef483 docs: update the build kata containers kernel document
Update the build kata containers kernel document for 2.0 release. Fixed
the 1.x release project paths and urls, using the kata-containers
project file paths and urls.

Fixes: #929

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-18 00:40:16 +08:00
David Gibson
9c16643c12 agent/device: Check type as well as major:minor when looking up devices
To update device resource entries from host to guest, we search for
the right entry by host major:minor numbers, then later update it.
However block and character devices exist in separate major:minor
namespaces so we could have one block and one character device with
matching major:minor and thus incorrectly update both with the details
for whichever device is processed second.

Add a check on device type to prevent this.

Port from the Kata 1 Go agent
https://github.com/kata-containers/agent/commit/27ebdc9d2761

Fixes: #703

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-18 00:40:16 +08:00
David Gibson
4978c9092c agent/device: Index all devices in spec before updating them
The agent needs to update device entries in the OCI spec so that it
has the correct major:minor numbers for the guest, which may differ
from the host.

Entries in the main device list are looked up by device path, but
entries in the device resources list are looked up by (host)
major:minor.  This is done one device at a time, updating as we go in
update_spec_device_list().

But since the host and guest have different namespaces, one device
might have the same major:minor as a different device on the host.  In
that case we could update one resource entry to the correct guest
values, then mistakenly update it again because it now matches a
different host device.

To avoid this, rather than looking up and updating one by one, we make
all the lookups in advance, creating a map from (host) device path to
the indices in the spec where the device and resource entries can be
found.

Port from the Go agent in Kata 1,
https://github.com/kata-containers/agent/commit/d88d46849130

Fixes: #703

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-18 00:40:16 +08:00
David Gibson
a7ba362f92 agent/device: Forward port update_spec_device_list() unit test
The Kata 1 Go agent included a unit test for updateSpecDeviceList, but no
such unit test exists for the Rust agent's equivalent
update_spec_device_list().  Port the Kata1 test to Rust.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-18 00:40:16 +08:00
David Gibson
230a9833f8 agent/device: update_spec_device_list() should error if dev not found
If update_spec_device_list() is given a device that can't be found in the
OCI spec, it currently does nothing, and returns Ok(()).  That doesn't
seem like what we'd expect and is not what the Go agent in Kata 1 does.

Change it to return an error in that case, like Kata 1.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-18 00:40:16 +08:00
Eric Ernst
a6d9fd4118 sandbox: don't constrain cpus, mem only cpuset, devices
Allow for constraining the cpuset as well as the devices-whitelist . Revert
sandbox constraints for cpu/memory, as they break the K8S use case. Can
re-add behind a non-default flag in the future.

The sandbox CPUSet should be updated every time a container is created,
updated, or removed.

To facilitate this without rewriting the 'non constrained cgroup'
handling, let's add to the Sandbox's cgroupsUpdate function.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
8f0cb2f1ea cgroups: add ability to update CPUSet
Add function for applying a cpuset change to a cgroup

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
cbdae44992 agent: fix errorneous parsing for guest block size
We were assuming base 10 string before, when the block size from sysfs
is actually a hex string. Let's fix that.

Fixes: #908

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
James O. D. Hunt
97acaa8124 docs: Add containerd install guide
Create a containerd installation guide and a new `kata-manager` script
for 2.0 that automated the steps outlined in the guide.

Also cleaned up and improved the installation documentation in various
ways, the most significant being:

- Added legacy install link for 1.x installs.
- Official packages section:
  - Removed "Contact" column (since it was empty!)
  - Reworded "Versions" column to clarify the versions are a minimum
    (to reduce maintenance burden).
  - Add a column to show which installation methods receive automatic updates.
  - Modified order of installation options in table and document to
    de-emphasise automatic installation and promote official packages
    and snap more.
- Removed sections no longer relevant for 2.0.

Fixes: #738.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-18 00:40:16 +08:00
bin liu
23246662b2 agent: use ok_or/map_err instead of match
Sometimes `Option.or_or` and `Result.map_err` may be simpler
than match statement. Especially in rpc.rs, there are
many `ctr.get_process` and `sandbox.get_container` which
are using `match`.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
ebe5ad1386 rustjail: use Iterator to manipulate vector elements
Use Iterator can save codes, and make code more readable

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
c9497c88e4 rustjail: delete codes commented out
There are some uses/codes/struct fields are commented out, and
may not turn into  un-comment these codes, so delete these comments.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
d5d9928f97 rustjail: delete unused test code
The auto generated test code is no meanings, delete it.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
f70892a5bb agent: use chain of Result to avoid early return
Use rust `Result`'s `or_else`/`and_then` can write clean codes.
And can avoid early return by check wether the `Result`
is `Ok` or `Err`.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
ab64780a0b agent: update not accurate comments
This commit includes:
- update comments that not matched the function name
- file path with doubled slash

Fixes: #922

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
9e064ba192 agent: use macro to simplify parse_cmdline function in config.rs
In function parse_cmdline there are some similar codes, if we want
to add more commandline arguments, the code will grow too long.
Use macro can reduce some codes with the same logic/processing.

Fixes: #914

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
42c48f54ed agent: add blank lines between methods
In rpc.rs, there are no blank lines between methods, this commit
add blank lines for these methods.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
d3a36fa06f agent: delete unused field in agentService
The code is for test, and not needed now.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
fa546600ff agent: use no-named closure to reduce codes
For simple closures, inline closures can save codes.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
efddcb4ab8 agent: use a local fn to reduce duplicated codes
The same codes used twices, aggregated into a function can
reduce codes.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
Peng Tao
7bb3e562bc packaging: fix cloud-hypervisor binary path
1. ensure build-static-clh.sh puts cloud-hypervisor under ./cloud-hypervisor directory
2. install cloud-hypervisor/cloud-hypervisor binary

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Peng Tao
7b53041bad packaging: fix missing cloud_hypervisor_repo
It is needed in order to build from source.

Fixes: #916
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Peng Tao
38212ba6d8 packaging: apply qemu v5.1 stable fixes
Qemu v5.1 was released with an affending commit 9b3a35ec82
(virtio: verify that legacy support is not accidentally on).
As a result, it breaks commandline compatiblilities for old qemu
users. Upstream qemu has fixed it but no release has been put out yet.
Let's apply these fixes by hand for now.

Refs: https://www.mail-archive.com/qemu-devel@nongnu.org/msg729556.html

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Jianyong Wu
fb7e9b4f32 agent: fix aarch64 build
aarch64 needs libgcc to resolve some non-builtin symbols.

Fixes: #909
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
0cfcbf79b8 docs: add namespace key to pod/container config files
If no namespace field in config files, CRI-O will failed:
 setting pod sandbox name and id: cannot generate pod name without namespace

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
bin liu
997f1f6cd0 docs: Add crictl example json files
Add basic sample pod/container config files to show
how to use `crictl` with Kata containers.

Fixes: #881

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-18 00:40:16 +08:00
Ychau Wang
f60f43af6b runtime: Clear the VCMock 1.x API Methods from 2.0
Clear the 1.x branch api methods in the 2.0. Keep the same methods to
the VC interface, like the VCImpl struct.

Fixes: #751

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-18 00:40:16 +08:00
Julio Montes
1789527d61 ci: snap: add event filtering
Run the snap CI on every PR is not needed. Don't run the snap CI
on PRs that don't change the source code (*.go/*.rs), a configuration
file or Makefile.

fixes #896

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-18 00:40:16 +08:00
Peng Tao
999f67d573 agent: do not follow link when mounting container proc and sysfs
Attackers might use it to explore other containers in the same pod.
While it is still safe to allow it, we can just close the race window
like runc does.

Fixes: #885
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Peng Tao
cb2255f199 agent: set init process non-dumpable
On old kernels (like v4.9), kernel applies CLOECEC in wrong order w.r.t.
dumpable task flags. As a result, we might leak guest file descriptor to
containers. This is a former runc CVE-2016-9962 and still applies to
kata agent. Although Kata container is still valid at protecting the
host, we should not leak extra resources to user containers.

This sets the init processes that join and setup the container's
namespaces as non-dumpable before they setns to the container's pid (or
any other ) namespace.

This settings is automatically reset to the default after the Exec in
the container so that it does not change functionality for the
applications that are running inside, just our init processes.

This prevents parent processes, the pid 1 of the container, to ptrace
the init process before it drops caps and other sets LSMs.

The order during the exec syscall is that the process is set back to
dumpable before O_CLOEXEC are processed.

Refs:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=613cc2b6f272c1a8ad33aefa21cad77af23139f7
https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
opencontainers/runc@50a19c6
https://nvd.nist.gov/vuln/detail/CVE-2016-9962

Fixes: #890
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Peng Tao
2a6c9eec74 agent-ctl: include cargo lock updates
Simply running `make` would generate some cargo lock updates for
agent-ctl. Let's include them so that we have fixed dependencies.

Fixes: #883
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-18 00:40:16 +08:00
Julio Montes
eaff5de37a versions: add plugins section
plugins sections contains the details of plugins required for
the components or testing.

Add sriov-network-device-plugin url and version that are consumed
by the VFIO test in the tests repository.

fixes #879

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-18 00:40:16 +08:00
Jose Carlos Venegas Munoz
4f1d23b651 virtiofs: Disable DAX
virtiofs DAX support is not stable today, there are
a few corner cases to make it default.

Fixes: #862
Fixes: #875

Signed-off-by: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com>
2020-10-18 00:40:16 +08:00
Julio Montes
6d80df9831 snap: specify python version
In order to avoid `unmet dependencies` error in the CI,
the python version must be specified in the yaml.

fixes #877

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-18 00:40:16 +08:00
Ralf Haferkamp
a116ce0b75 osbuilder: Create target directory for agent
When building with AGENT_SOURCE_BIN pointing to an already built
kata-agent binary, the target directory needs to be created in the
rootfs tree.

Fixes #873

Signed-off-by: Ralf Haferkamp <rhafer@suse.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
4dc3bc0020 rust-agent: Treat warnings as error
Avoid the accumulation of warnings we had, as reported in #750.

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
8f7a4842c2 rust-agent: Identify unused results in tests
Assign unused results to _ in order to silence warnings.

This addresses the following warnings:

    warning: unused `std::result::Result` that must be used
        --> rustjail/src/mount.rs:1182:16
         |
    1182 |         defer!(unistd::chdir(&olddir););
         |                ^^^^^^^^^^^^^^^^^^^^^^^
         |
         = note: `#[warn(unused_must_use)]` on by default
         = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
        --> rustjail/src/mount.rs:1183:9
         |
    1183 |         unistd::chdir(tempdir.path());
         |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         |
         = note: this `Result` may be an `Err` variant, which should be handled

While in regular code, we want to log possible errors, in test code
it's OK to simply ignore the returned value.

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
ce54e5dd57 rust-agent: Log returned errors rather than ignore them
In a number of cases, we have functions that return a Result<...>
and where the possible error case is simply ignored. This is a bit
unhealthy.

Add a `check!` macro that allows us to not ignore error values
that we want to log, while not interrupting the flow by returning
them. This is useful for low-level functions such as `signal::kill` or
`unistd::close` where an error is probably significant, but should not
necessarily interrupt the flow of the program (i.e. using `call()?` is
not the right answer.

The check! macro is then used on low-level calls. This addresses the
following warnings from #750:

This addresses the following warning:

    warning: unused `std::result::Result` that must be used
       --> /home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail/src/container.rs:903:17
        |
    903 |                 signal::kill(Pid::from_raw(p.pid), Some(Signal::SIGKILL));
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> /home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail/src/container.rs:916:17
        |
    916 |                 signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL));
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:340:13
        |
    340 |             write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:554:13
        |
    554 | /             write_sync(
    555 | |                 cwfd,
    556 | |                 SYNC_FAILED,
    557 | |                 format!("setgroups failed: {:?}", e).as_str(),
    558 | |             );
        | |______________^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:340:13
        |
    340 |             write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:340:13
        |
    340 |             write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:554:13
        |
    554 | /             write_sync(
    555 | |                 cwfd,
    556 | |                 SYNC_FAILED,
    557 | |                 format!("setgroups failed: {:?}", e).as_str(),
    558 | |             );
        | |______________^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:626:5
        |
    626 |     unistd::close(cfd_log);
        |     ^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:627:5
        |
    627 |     unistd::close(crfd);
        |     ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:628:5
        |
    628 |     unistd::close(cwfd);
        |     ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:770:9
        |
    770 |         fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:799:9
        |
    799 |         fcntl::fcntl(prfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:800:9
        |
    800 |         fcntl::fcntl(pwfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:803:13
        |
    803 |             unistd::close(prfd);
        |             ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:930:9
        |
    930 |         log_handler.join();
        |         ^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:803:13
        |
    803 |             unistd::close(prfd);
        |             ^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:804:13
        |
    804 |             unistd::close(pwfd);
        |             ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:842:13
        |
    842 |             sched::setns(old_pid_ns, CloneFlags::CLONE_NEWPID);
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:843:13
        |
    843 |             unistd::close(old_pid_ns);
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

Fixes: #844
Fixes: #750

Suggested-by: Tim Zhang <tim@hyper.sh>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
9adb7b7c28 rust-agent: Remove unused imports
This addresses the following warnings (and similar ones)::

    Compiling rustjail v0.1.0 (/home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail)
    warning: unused import: `debug`
      --> rustjail/src/container.rs:57:12
       |
    57 | use slog::{debug, info, o, Logger};
       |            ^^^^^

    warning: unused imports: `AddressFamily`, `SockFlag`, `SockType`, `self`
      --> rustjail/src/process.rs:18:24
       |
    18 | use nix::sys::socket::{self, AddressFamily, SockFlag, SockType};
       |                        ^^^^  ^^^^^^^^^^^^^  ^^^^^^^^  ^^^^^^^^

    warning: unused import: `nix::Error`
      --> rustjail/src/process.rs:23:5
       |
    23 | use nix::Error;
       |     ^^^^^^^^^^

    warning: unused import: `protobuf::RepeatedField`
      --> rustjail/src/validator.rs:11:5
       |
    11 | use protobuf::RepeatedField;
       |     ^^^^^^^^^^^^^^^^^^^^^^^

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
73ab9b1d6d rust-agent: Report errors to caller if possible
Various recently added error-causing calls

This addresses the following warning:

    warning: unused `std::result::Result` that must be used
      --> rustjail/src/cgroups/fs/mod.rs:93:9
       |
    93 |         cg.add_task(CgroupPid::from(pid as u64));
       |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       |
       = note: `#[warn(unused_must_use)]` on by default
       = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:196:17
        |
    196 |                 freezer_controller.thaw();
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:199:17
        |
    199 |                 freezer_controller.freeze();
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:365:9
        |
    365 |         cpuset_controller.set_cpus(&cpu.cpus);
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:369:9
        |
    369 |         cpuset_controller.set_mems(&cpu.mems);
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:381:13
        |
    381 |             cpu_controller.set_shares(shares);
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:385:5
        |
    385 |     cpu_controller.set_cfs_quota_and_period(cpu.quota, cpu.period);
        |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
        --> rustjail/src/cgroups/fs/mod.rs:1061:13
         |
    1061 |             cpuset_controller.set_cpus(cpuset_cpus);
         |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         |
         = note: this `Result` may be an `Err` variant, which should be handled

The specific case of cpu_controller.set_cfs_quota_and_period is
addressed in a way that changes the logic following a suggestion by
Liu Bin, who had just added the code.

Fixes: #750

Suggested-by: Liu Bin <bin@hyper.sh>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
4db3f9e226 rust-agent: Ignore write errors while writing to the logs
When we are writing to the logs and there is an error doing so, there
is not much we can do. Chances are that a panic would make things
worse. So let it go through.

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/sync.rs:26:9
        |
    26  |         write_count(lfd, log_str.as_bytes(), log_str.len());
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
       ::: rustjail/src/container.rs:339:13
        |
    339 |             log_child!(cfd_log, "child exit: {:?}", e);
        |             ------------------------------------------- in this macro invocation
        |
        = note: this `Result` may be an `Err` variant, which should be handled
        = note: this warning originates in a macro (in Nightly builds, run with -Z macro-backtrace for more info)

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
19cb657299 rust-agent: Remove unused code that has undefined behavior
Some functions have undefined behavior and are not actually used.

This addresses the following warning:
    warning: the type `oci::User` does not permit zero-initialization
      --> rustjail/src/lib.rs:99:18
       |
    99 |         unsafe { MaybeUninit::zeroed().assume_init() }
       |                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       |                  |
       |                  this code causes undefined behavior when executed
       |                  help: use `MaybeUninit<T>` instead, and only call `assume_init` after initialization is done
       |
       = note: `#[warn(invalid_value)]` on by default
    note: `std::ptr::Unique<u32>` must be non-null (in this struct field)

    warning: the type `protocols::oci::Process` does not permit zero-initialization
       --> rustjail/src/lib.rs:146:14
        |
    146 |     unsafe { MaybeUninit::zeroed().assume_init() }
        |              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |              |
        |              this code causes undefined behavior when executed
        |              help: use `MaybeUninit<T>` instead, and only call `assume_init` after initialization is done
        |
    note: `std::ptr::Unique<std::string::String>` must be non-null (in this struct field)

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
86bc151787 rust-agent: Remove 'mut' where not needed
Addresses the following warning (and a few similar ones):
    warning: variable does not need to be mutable
       --> rustjail/src/container.rs:369:9
        |
    369 |     let mut oci_process: oci::Process = serde_json::from_str(process_str)?;
        |         ----^^^^^^^^^^^
        |         |
        |         help: remove this `mut`
        |
        = note: `#[warn(unused_mut)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
8d8adb6887 rust-agent: Remove uses of deprecated functions
This addresses the following:

    warning: use of deprecated item 'std::error::Error::description': use the Display impl or to_string()
        --> rustjail/src/container.rs:1598:31
         |
    1598 | ...                   e.description(),
         |                         ^^^^^^^^^^^
         |
         = note: `#[warn(deprecated)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
76298c12b7 rust-agent: Remove or rename unused parameters
Parameters that are never used were removed.
Parameters that are unused, but necessary because of some common
interface were renamed with a _ prefix.
In one case, consume the parameter by adding an info! call, and fix a
minor typo in a message in the same function.

This addresses the following warning:

    warning: unused variable: `child`
        --> rustjail/src/container.rs:1128:5
         |
    1128 |     child: &mut Child,
         |     ^^^^^ help: if this is intentional, prefix it with an underscore: `_child`

    warning: unused variable: `logger`
        --> rustjail/src/container.rs:1049:22
         |
    1049 | fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> {
         |                      ^^^^^^ help: if this is intentional, prefix it with an underscore: `_logger`

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
7d303ec2d0 rust-agent: Remove or rename unused variables
Remove variables that are simply not used.
Rename as _ variables where only initialization matters.

This addresses the following warnings:

    warning: unused variable: `writer`
       --> src/main.rs:130:9
        |
    130 |     let writer = unsafe { File::from_raw_fd(wfd) };
        |         ^^^^^^ help: if this is intentional, prefix it with an underscore: `_writer`
        |
        = note: `#[warn(unused_variables)]` on by default

    warning: unused variable: `ctx`
       --> src/rpc.rs:782:9
        |
    782 |         ctx: &ttrpc::TtrpcContext,
        |         ^^^ help: if this is intentional, prefix it with an underscore: `_ctx`

    warning: unused variable: `ctx`
       --> src/rpc.rs:808:9
        |
    808 |         ctx: &ttrpc::TtrpcContext,
        |         ^^^ help: if this is intentional, prefix it with an underscore: `_ctx`

    warning: unused variable: `dns_list`
        --> src/rpc.rs:1152:16
         |
    1152 |             Ok(dns_list) => {
         |                ^^^^^^^^ help: if this is intentional, prefix it with an underscore: `_dns_list`

    warning: value assigned to `child_stdin` is never read
       --> rustjail/src/container.rs:807:13
        |
    807 |         let mut child_stdin = std::process::Stdio::null();
        |             ^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_assignments)]` on by default
        = help: maybe it is overwritten before being read?

    warning: value assigned to `child_stdout` is never read
       --> rustjail/src/container.rs:808:13
        |
    808 |         let mut child_stdout = std::process::Stdio::null();
        |             ^^^^^^^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `child_stderr` is never read
       --> rustjail/src/container.rs:809:13
        |
    809 |         let mut child_stderr = std::process::Stdio::null();
        |             ^^^^^^^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `stdin` is never read
       --> rustjail/src/container.rs:810:13
        |
    810 |         let mut stdin = -1;
        |             ^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `stdout` is never read
       --> rustjail/src/container.rs:811:13
        |
    811 |         let mut stdout = -1;
        |             ^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `stderr` is never read
       --> rustjail/src/container.rs:812:13
        |
    812 |         let mut stderr = -1;
        |             ^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
e0b79eb57f rust-agent: Remove unused functions
Fixes the following warning:

   Compiling logging v0.1.0 (/home/ddd/go/src/github.com/kata-containers-2.0/pkg/logging)
   warning: associated function is never used: `set_level`
      --> /home/ddd/go/src/github.com/kata-containers-2.0/pkg/logging/src/lib.rs:186:8
       |
   186 |     fn set_level(&self, level: slog::Level) {
       |        ^^^^^^^^^
       |
       = note: `#[warn(dead_code)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
8ed61b1bb9 rust-agent: Remove useless braces
This addresses the following warning:

    warning: unnecessary braces around assigned value
        --> src/rpc.rs:1411:26
         |
    1411 |     detail.init_daemon = { unistd::getpid() == Pid::from_raw(1) };
         |                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove these braces
         |
         = note: `#[warn(unused_braces)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Christophe de Dinechin
cc4f02e2b6 rust-agent: Remove unused macros
This addresses the following warnings:

   Compiling rustjail v0.1.0 (/home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail)
   warning: unused `#[macro_use]` import
     --> rustjail/src/lib.rs:15:1
      |
   15 | #[macro_use]
      | ^^^^^^^^^^^^
      |
      = note: `#[warn(unused_imports)]` on by default

   warning: unused macro definition
     --> rustjail/src/lib.rs:38:1
      |
   38 | / macro_rules! sl {
   39 | |     () => {
   40 | |         slog_scope::logger().new(o!("subsystem" => "rustjail"))
   41 | |     };
   42 | | }
      | |_^
      |
      = note: `#[warn(unused_macros)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-18 00:40:16 +08:00
Bo Chen
ace6f1e66e clh: Support VFIO device unplug
This patch adds the support of VFIO device unplug when using
cloud-hypervisor.

Fixes: #860

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-18 00:40:16 +08:00
Bo Chen
47cfeaaf18 clh: Remove unnecessary VmmPing
We can rely on the error handling of the actual HTTP API calls to catch
errors, and don't need to call VmmPing explicitly in advance.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-18 00:40:16 +08:00
Bo Chen
63c475786f versions: cloud-hypervisor: Bump to version 6d30fe05
The cloud-hypervisor commit `6d30fe05` introduced a fix on its API for
VFIO device hotplug (`VmAddDevice`), which is required for supporting
VFIO unplug through openAPI calls in kata.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-18 00:40:16 +08:00
Chelsea Mafrica
059b89cd03 docs: Change kata_tap0 to tap0_kata
Tap device's should be tap0_kata for architecture.md

Fixes #797

Signed-off-by: duanquanfeng <duanquanfeng_yewu@cmss.chinamobile.com>
Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2020-10-18 00:40:16 +08:00
Chelsea Mafrica
4ff3ed5101 docs: update networking description
First, most people don't care about CNM. Move that out of main doc.

Second, tc-filter is the default. Let's add a bit more background on
our usage of tc-filter (and clarify why we use this instead of macvtap).

Fixes #797

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
de8dcb1549 dev-guide: update kata-agent install details
Install paths were wrong. Updated based on new agent...

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Archana Shinde
c488cc48a2 docs: Update docs for enabling agent debug console
The systemd method of adding a debug console is not really
user friendly. Since we have added a much more straightforward
method to enable agent debug console, update developer guide to
reflect this.

Fixes #834

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
e5acb1257f docs: update dev guide for agent build
Include details on setting up rust.

Fixes: #851

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-18 00:40:16 +08:00
Julio Montes
1bddde729b ci: add github action to test the snap
Add github action to test that the snap package was generated
correctly, this CI don't test the snap, it just build it.

fixes #838

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-18 00:40:16 +08:00
Julio Montes
9517b0a933 versions: cloud-hypervisor: bump version
Use commit c54452c08a467a3e35d8d72f2a91d424e9718c57 as
version for cloud-hypervisor.
Bring openapi fix cloud-hypervisor/cloud-hypervisor#1760 to
support SGX.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-18 00:40:16 +08:00
Julio Montes
f5a7175f92 runtime: cloud-hypervisor: tag openapi-generator-cli container
Tag openapi-generator-cli container to v4.3.1 that is the latest
stable, this way we can have reproducible builds and the same
generated code in all the systems

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-18 00:40:16 +08:00
Eric Ernst
9b969bb7da packaging: fix image build script
Relative paths are error prone. Fix error.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 17:57:28 -07:00
Eric Ernst
fb2f3cfce2 release: Kata Containers 2.0.0-rc1
ae6ccbe8 rust-agent: Update README
3faef791 docs: drop docker installation guide
f3466b87 docs: fix static check errors in docs/install/README.md
89ec614d docs: update architecture.md
1ed73179 qemu: upgrade qemu version to 5.1.0 for arm64.
cb79dddf agent: Fix OCI Windows network shared container name typo
c50aee9d github: Remove issue template and use central one
2a4c3e6a docs: fix broken links
9e2a314e Packaging: release notes script using error kernel path urls
aed20f43 rust-agent: Replaces improper use of match for non-constant patterns
868d0248 devices: fix go test warning in manager_test.go
14164392 action: Allow long lines if non-alphabetic
2ece152c agent: remove unreachable code
033925f9 agent: Change do_exec return type to ! because it will never return
c90fff82 agent: propagate the internal detail errors to users
c0ea9102 packaging: Stop providing OBS packages
ca54edef install: Add contacts to the distribution packages
b5ece037 install: Update information about Community Packages
378e429d install: Update SUSE information
567f8587 install: Update openSUSE information
18f32d13 install: Update RHEL information
8280523c install: Update Fedora information
578db2fc install: Update CentOS information
781d6eca ci: fix clone_tests_repo function
c18c5e2c agent: Set LIBC=gnu for ppc64le arch by default
a378ba53 fc: integrate Firecracker's metrics
9991f4b5 static-build/qemu-virtiofs: Refactor apply virtiofs patches
4a0fd6c2 packaging/qemu: Add common code to apply patches
37acc030 static-build/qemu-virtiofs: Fix to apply QEMU patches
6c275c92 runtime: fix TestNewConsole UT failure
0479a4cb travis: skip static checker for ppc64
b3e52844 runtime: fix golint errors
d36d3486 agent: fix cargo fmt
e1094d7f ci: always checkout 2.0-dev of test repository
c8ba30f9 docs: fix static check errors
eaa5c433 runtime: fix make check
07caa2f2 gitignore: ignore agent service file
f34e2e66 agent: fix UT failures due to chdir
442e5906 agent: Only allow proc mount if it is procfs
f2850668 rustjail: make the mount error info much more clear
73414554 runtime: add enable_debug_console configuration item for agent
0b62f5a9 runtime: add debug console service
c23a401e runtime: Call s.newStore.Destroy if globalSandboxList.addSandbox
80879197 shimv2: add a comment in checkAndMount()
b6066cbc osbuilder: specify default toolchain verion in rust-init.
1290d007 runtime: Update cloud-hypervisor client pkg to version v0.10.0
afeece42 agent/oci: Don't use deprecated Error::description() method
a4075f0f runtime: Fix linter errors in release files
01df3c1d packaging: Build from source if the clh release binary is missing
bacd41bb runtime: add podman configuration to data collection script
d9746f31 ci: use export command to export envs instead of env config item
ca2a1176 ci: use Travis cache to reduce build time
67af593a agent: update cgroups crate
cabc60f3 docs: Update the reference path of kata-deploy in the packaging
a5859197 runtime: make kata-check check for newer release
08d194b8 how-to: add privileged_without_host_devices to containerd guide
89ade8f3 travis: enable RUST_BACKTRACE
4b30001d agent/rustjail: add more unit tests
232c8213 agent/rustjail: remove makedev function
74bcd510 agent/rustjail: add unit tests for ms_move_rootfs and mask_path
a36f93c9 agent/rustjail: implement functions to chroot
fe0f2198 agent/rustjail: add unit test for pivot_rootfs
5770c2a2 agent/rustjail: implement functions to pivot_root
838b1794 agent/rustjail: add unit test for mount_cgroups
1a60c1de agent/rustjail: add unit test for init_rootfs
77ecfed2 agent/rustjail/mount: don't use unwrap
fa7079bc agent/rustjail: add tempfile crate as depedency
c23bac5c rustjail: implement functions to mount and umount files
e99f3e79 docs: Fix the kata-pkgsync tool's docs script path
d05a7cda docs: fix k8s containerd howto links
f6877fa4 docs: fix up developer guide for 2.0
6d326f21 gitignore: ignore agent version.rs
407cb9a3 agent: fix agent panic running as init
38eb1df4 packaging: use local version file for kata 2.0 in Makefile
313dfee3 docs: fix release process doc
0c4e7b21 packaging: fix release notes

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2020-10-06 17:54:13 -07:00
Eric Ernst
f32a741c76 actions: add kata deploy test
Pull over kata-deploy-test from the 1.x packaging repository. This is
intended to be used for testing any changes to the kata-deploy
scripting, and does not exercise any new source code changes.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 17:54:13 -07:00
Eric Ernst
512e79f61a packaging: cleaning, updating based on new filepaths
Update scripts to take into account some files being moved, and some
general cleanup.

Fixes: #866

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 17:54:13 -07:00
Eric Ernst
aa70080423 packaging: remove obs-packaging
No longer required -- let's remove them.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 17:54:13 -07:00
Eric Ernst
34015bae12 packaging: pull versions, build-image out from obs dir
These are still required; let's pull them out.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 17:54:13 -07:00
Eric Ernst
93b60a8327 packaging: Revert "packaging: Stop providing OBS packages"
This reverts commit c0ea910273.

Two scripts are still required for release and testing, which should
have never been under obs-packaging dir in the first place.  Let's
revert, move the scripts / update references to it, and then we can
remove the remaining obs-packaging/ tooling.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 17:54:13 -07:00
Yang Bo
aa9951f2cd rust-agent: Update README
rust agent does not use grpc as submodule for a while, update README
to reflect the change.

Fixes: #196
Signed-off-by: Yang Bo <bo@hyper.sh>
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
9d8c72998b docs: drop docker installation guide
We have removed cli support and that means dockder support is dropped
for now. Also it doesn't make sense to have so many duplications on each
distribution as we can simply refer to the official docker guide on how
to install docker.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
033ed13202 docs: fix static check errors in docs/install/README.md
It was merged in while the static checker is disabled.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
c058d04b94 docs: update architecture.md
To match the current architecture of Kata Containers 2.0.

Fixes: #831
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Edmond AK Dantes
9d2bb0c452 qemu: upgrade qemu version to 5.1.0 for arm64.
Now, the qemu version used in arm is so old. As some new features have merged
in current qemu, so it's time to upgrade it. As obs-packaging has been removed,
I put the qemu patch under qemu/patch/5.1.x.
As vxfs has been Deprecated in qemu-5.1, it will be no longer exist in
configuration-hyperversior.sh when qemu version larger than 5.0.

Fixes: #816
Signed-off-by: Edmond AK Dantes <edmond.dantes.ak47@outlook.com>
2020-10-06 17:54:13 -07:00
James O. D. Hunt
627d062fb2 agent: Fix OCI Windows network shared container name typo
Correct the typo which would break the Windows-specific OCI network
shared container name feature.

See:

- https://github.com/opencontainers/runtime-spec/blob/master/config-windows.md#network

Fixes: #685.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-06 17:54:13 -07:00
James O. D. Hunt
96afe62576 github: Remove issue template and use central one
Remove the GitHub issue template from this repository. We already have a
central set of templates [1] that are being used so the template in this
repository is redundant.

[1] - https://github.com/kata-containers/.github/tree/master/.github/ISSUE_TEMPLATE/

Fixes: #728.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
d946016eb7 docs: fix broken links
Some sections and files were removed in a previous commit,
remove all reference to such sections and files to fix the
check-markdown test.

fixes #826

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Ychau Wang
37f1a77a6a Packaging: release notes script using error kernel path urls
2.0 Packaging runtime-release-notes.sh script is using 1.x Packaging
kernel urls. Fix these urls to 2.0 branch Packaging urls.

Fixes: #829

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-06 17:54:13 -07:00
Christophe de Dinechin
450a81cc54 rust-agent: Replaces improper use of match for non-constant patterns
The code used `match` as a switch with variable patterns `ev_fd` and
`cf_fd`, but the way Rust interprets the code is that the first
pattern matches all values. The code does not perform as expected.

This addresses the following warning:

   warning: unreachable pattern
      --> rustjail/src/cgroups/notifier.rs:114:21
       |
   107 |                     ev_fd => {
       |                     ----- matches any value
   ...
   114 |                     cg_fd => {
       |                     ^^^^^ unreachable pattern
       |
       = note: `#[warn(unreachable_patterns)]` on by default

Fixes: #750
Fixes: #793

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-06 17:54:13 -07:00
zhanghj
c09f02e6f6 devices: fix go test warning in manager_test.go
Create "class" and "config" file in temporary device BDF dir,
and remove dir created  by ioutil.TempDir() when test finished.

fixes: #746

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2020-10-06 17:54:13 -07:00
James O. D. Hunt
58c7469110 action: Allow long lines if non-alphabetic
Overly long commit lines are annoying. But sometimes,
we need to be able to force the use of long lines
(for example to reference a URL).

Ironically, I can't refer to the URL that explains this
because of ... the long line check! Hence:

```sh
$ cat <<EOT | tr -d '\n'; echo
See: https://github.com/kata-containers/tests/tree/master/
cmd/checkcommits#handling-long-lines
EOT
```

Maximum body length updated to 150 bytes for parity with:

https://github.com/kata-containers/tests/pull/2848

Fixes: #687.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-06 17:54:13 -07:00
Tim Zhang
c36ea0968d agent: remove unreachable code
The code in the end of init_child is unreachable and need to be removed.
The code after do_exec is unreachable and need to be removed.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-06 17:54:13 -07:00
Tim Zhang
ba197302e2 agent: Change do_exec return type to ! because it will never return
Indicates unreachable code.

Fixes #819

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-06 17:54:13 -07:00
fupan.lfp
725ad067c1 agent: propagate the internal detail errors to users
It's should propagate the detail errors to users when
the rpc call failed.

Fixes: #824

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
9858c23c59 packaging: Stop providing OBS packages
The community has discussed and took the decision in favour of promoting
kata-deploy as the way of distributing and using kata for distros that
officially don't maintain the project.

Fixes: #623
Fixes: https://github.com/kata-containers/packaging/issues/1120

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
fc8f1ff03c install: Add contacts to the distribution packages
Let's add a new column to the Official packages table, and let the
maintainers of the official distro packages to jump in and add their
names there.

This will help us to ping & redirect to the right people possible issues
that are reported against the official packages.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
f7b4f76082 install: Update information about Community Packages
Kata Containers will stop distributing the community packages in favour
of kata-deploy.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
4fd66fa689 install: Update SUSE information
Following up a conversation with Ralf Haferkamp, we can safely drop the
instructions for using Kata Containers on SLES 12 SP3 in favour of using
the official builds provided for SLE 15 SP1, and SLE 15 SP2.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
e6ff42b8ad install: Update openSUSE information
Let's update the openSUSE Installation Guide to reflect the current
information on how to install kata packages provided by the distro
itself.

The official packages are present on Leap 15.2 and Tumbleweed, and can
be just installed. Leap 15.1 is slightly different, as the .repo file
has to be added before the packages can be installed.

Leap 15.0 has been removed as it already reached its EOL.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
6710d87c6a install: Update RHEL information
Although the community packages are present for RHEL, everything about
them is extremely unsupported on the Red Hat side.

Knowing this, we'd be better to simply not mentioned those and, if users
really want to try kata-containers on RHEL, they can simply follow the
CentOS installation guide.

In the future, if the Fedora packages make their way to RHEL, we can add
the information here. However, if we're recommending something
unsupported we'd be better recommending kata-deploy instead.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
178b79f122 install: Update Fedora information
Let's update the Fedora Installation Guide to reflect the current
information on how to install kata packages provided by the distro
itself.

These are official packages and we, as Fedora members, recommend using
kata-containers on Fedora 32 and onwards, as from this version
everything works out-of-the-box. Also, Fedora 31 will reach its EOL as
soon as Fedora 33 is out, which should happen on October.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Fabiano Fidêncio
bc545c6549 install: Update CentOS information
Let's update the CentOS Installation Guide to reflect the current
information on how to install kata packages provided by the
Virtualiation Special Interest Group.

These are not official CentOS packages, as those are not coming from Red
Hat Enterprise Linux. These are the same packages we have on Fedora and
we have decided to keep them up-to-date and sync'ed on both Fedora and
CentOS, so people can give Kata Containers a try also on CentOS.

The nature of these packages makes me think that those are "as official
as they can be", so that's the reason I've decided to add the
instructions to the "official" table.

Together with the change in the Installation Guide, let's also update
the README and reflect the fact we **strongly recommend** using CentOS
8, with the packages provided by the Virtualization Special Interest
Group, instead of using the CentOS 7 with packages built on OBS.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-06 17:54:13 -07:00
Salvador Fuentes
585481990a ci: fix clone_tests_repo function
We should not checkout to 2.0-dev branch in the clone_tests_repo
function when running in Jenkins CI as it discards changes from
tests repo.

Fixes: #818.

Signed-off-by: Salvador Fuentes <salvador.fuentes@intel.com>
2020-10-06 17:54:13 -07:00
Pradipta Kr. Banerjee
0057f86cfa agent: Set LIBC=gnu for ppc64le arch by default
Fixes: #812

Signed-off-by: Pradipta Kr. Banerjee <pradipta.banerjee@gmail.com>
2020-10-06 17:54:13 -07:00
bin liu
fa0401793f fc: integrate Firecracker's metrics
Firecracker expose metrics through fifo file
and using a JSON format. This PR will parse the
Firecracker's metrics and convert to Prometheus metrics.

Fixes: #472

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-06 17:54:13 -07:00
Wainer dos Santos Moschetta
60b7265961 static-build/qemu-virtiofs: Refactor apply virtiofs patches
In static-build/qemu-virtiofs/Dockerfile the code which
applies the virtiofs specific patches is spread in several
RUN instructions. Refactor this code so that it runs in a
single RUN and produce a single overlay image.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-10-06 17:54:13 -07:00
Wainer dos Santos Moschetta
57b53dbae8 packaging/qemu: Add common code to apply patches
The qemu and qemu-virtiofs Dockerfile files repeat the code to apply
patches based on QEMU stable branch being built. Instead, this adds
a common script (qemu/apply_patches.sh) and make it called by the
respective Dockerfile files.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-10-06 17:54:13 -07:00
Wainer dos Santos Moschetta
ddf1a545d1 static-build/qemu-virtiofs: Fix to apply QEMU patches
Fix a bug on qemu-virtiofs Dockerfile which end up not applying
the QEMU patches.

Fixes #786

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-10-06 17:54:13 -07:00
Peng Tao
cbdf6400ae runtime: fix TestNewConsole UT failure
It needs root.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
ceeecf9c66 travis: skip static checker for ppc64
As we have already run it on x64.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
7c53baea8a runtime: fix golint errors
Need to run gofmt -s on them.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
b549d354bf agent: fix cargo fmt
Otherwise travis fails.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
9f3113e1f6 ci: always checkout 2.0-dev of test repository
We use 2.0-dev in the tests repository now. Always make sure
we use the right branch.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
ef94742320 docs: fix static check errors
Somehow we are not running static checks for a long time.
And that ended up with a lot for errors.

* Ensure debug options are valid is dropped
* fix snap links
* drop extra CONTRIBUTING.md
* reference kata-pkgsync
* move CODEOWNERS to proper place
* remove extra CODE_OF_CONDUCT.md.
* fix spell checker error on Developer-Guide.md

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
d71764985d runtime: fix make check
Need to use the correct script path.

Fixes: #802
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
0fc04a269d gitignore: ignore agent service file
As it is auto-generated.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
8d7ac5f01c agent: fix UT failures due to chdir
Current working directory is a process level resource. We cannot call
chdir in parallel from multiple threads, which would cause cwd confusion
and result in UT failures.

The agent code itself is correct that chdir is only called from spawned
child init process. Well, there is one exception that it is also called
in do_create_container() but it is safe to assume that containers are
never created in parallel (at least for now).

Fixes: #782
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
fupan.lfp
612acbe319 agent: Only allow proc mount if it is procfs
This only allows some whitelists files bind mounted under proc
and prevent other malicious mount to procfs.

Fixes: #807

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-10-06 17:54:13 -07:00
fupan.lfp
f3a487cd41 rustjail: make the mount error info much more clear
Make the invalid mount destination's error info much
more clear.

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-10-06 17:54:13 -07:00
bin liu
3a559521d1 runtime: add enable_debug_console configuration item for agent
Set enable_debug_console=true in Kata's congiguration file,
runtime will pass `agent.debug_console`
and `agent.debug_console_vport=1026` to agent.

Fixes: #245

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-06 17:54:13 -07:00
bin liu
567daf5a42 runtime: add debug console service
Add `kata-runtime exec` to enter guest OS
through shell started by agent

Fixes: #245

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-06 17:54:13 -07:00
Shukui Yang
c7d913f436 runtime: Call s.newStore.Destroy if globalSandboxList.addSandbox
Fixes: #696

Signed-off-by: Shukui Yang <keloyangsk@gmail.com>
2020-10-06 17:54:13 -07:00
Qian Cai
7bd410c725 shimv2: add a comment in checkAndMount()
In checkAndMount(), it is not clear why we check IsBlockDevice() and if
DisableBlockDeviceUse == false and then only return "false, nil" instead
of "false, err". Adding a comment to make it a bit more readable.

Fixes: #732
Signed-off-by: Qian Cai <cai@redhat.com>
2020-10-06 17:54:13 -07:00
zhanghj
7fbc789855 osbuilder: specify default toolchain verion in rust-init.
Specify default toolchain version in rust-init.

Fixes: #799

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2020-10-06 17:54:13 -07:00
Bo Chen
7fc41a771a runtime: Update cloud-hypervisor client pkg to version v0.10.0
The latest release of cloud-hypervisor v0.10.0 contains the following
updates: 1) `virtio-block` Support for Multiple Descriptors; 2) Memory
Zones; 3) `Seccomp` Sandbox Improvements; 4) Preliminary KVM HyperV
Emulation Control; 5) various bug fixes and refactoring.

Note that this patch updates the client code of clh's HTTP API in kata,
while the 'versions.yaml' file was updated in an earlier PR.

Fixes: #789

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-06 17:54:13 -07:00
David Gibson
a31d82fec2 agent/oci: Don't use deprecated Error::description() method
We shouldn't use it, and we don't need to implement it.

fixes #791

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-06 17:54:13 -07:00
James O. D. Hunt
9ef4c80340 runtime: Fix linter errors in release files
Fix the linter errors caught in the `runtime` repos `master` branch [1],
but not in the `2.0-dev` branch [2]. See [3] for further details.

[1] - https://github.com/kata-containers/runtime/pull/2976
[2] - https://github.com/kata-containers/kata-containers/pull/735
[3] - https://github.com/kata-containers/tests/issues/2870

Fixes: #783.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-06 17:54:13 -07:00
Bo Chen
6a4e413758 packaging: Build from source if the clh release binary is missing
This patch add fall-back code path that builds cloud-hypervisor static
binary from source, when the downloading of cloud-hypervisor binary is
failing. This is useful when we experience network issues, and also
useful for upgrading clh to non-released version.

Together with the changes in the tests repo
(https://github.com/kata-containers/tests/pull/2862), the Jenkins config
file is also updated with new Execute shell script for the clh CI in the
kata-containers repo. Those two changes fix the regression on clh CI
here. Please check details in the issue below.

Fixes: #781
Fixes: https://github.com/kata-containers/tests/issues/2858

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-06 17:54:13 -07:00
Francesco Giudici
678d4d189d runtime: add podman configuration to data collection script
Be more verbose about podman configuration in the output of the data
collection script: get the system configuration as seen by podman and
dump the configuration files when present.

Fixes: #243
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2020-10-06 17:54:13 -07:00
bin liu
718f718764 ci: use export command to export envs instead of env config item
Config item env is used as a Matrix Expansion key, so these envs
will export to build jobs individually.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-06 17:54:13 -07:00
bin liu
d860ded3f0 ci: use Travis cache to reduce build time
This PR includes these changes:
- use Rust installed by Travis
- install x86_64-unknown-linux-musl
- install rustfmt
- use Travis cache
- delete ci/install_vc.sh

Fixes: #748

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-06 17:54:13 -07:00
fupan.lfp
a141da8a20 agent: update cgroups crate
Update cgroups crate to fix the building issue
on Aarch64.

Fixes: #770

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-10-06 17:54:13 -07:00
Ychau Wang
aaaaee7a4b docs: Update the reference path of kata-deploy in the packaging
Use the relative path of kata-deploy to replace the 1.x packaging url in
the kata-deploy/README.md file. Fixed the path issue, producted by
creating new branch.

Fixes: #777

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-06 17:54:13 -07:00
James O. D. Hunt
21efaf1fca runtime: make kata-check check for newer release
Update `kata-check` to see if there is a newer version available for
download. Useful for users installing static packages (without a package
manager).

Fixes: #734.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-06 17:54:13 -07:00
Peng Tao
2056623e13 how-to: add privileged_without_host_devices to containerd guide
It should be set by default for Kata containers working with containerd.

Fixes: #775
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Julio Montes
34126ee704 travis: enable RUST_BACKTRACE
RUST_BACKTRACE=1 will help us a lot to debug unit tests when
a test is failing

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
980a338454 agent/rustjail: add more unit tests
Add unit tests for finish_root, read_only_path and mknod_dev
increasing code coverage of mount.rs

fixes #284

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
e14f766895 agent/rustjail: remove makedev function
remove `makedev` function, use `nix`'s implementation instead

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
2e0731f479 agent/rustjail: add unit tests for ms_move_rootfs and mask_path
Increase code coverage of mount.rs

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
addf62087c agent/rustjail: implement functions to chroot
Use conditional compilation (#[cfg]) to change chroot behaviour
at compilation time. For example, such function will just return
`Ok(())` when the unit tests are being compiled, otherwise real
chroot operation is performed.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
c24b68dc4f agent/rustjail: add unit test for pivot_rootfs
Add unit test for pivot_rootfs increasing the code coverage of
mount.rs

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
24677d7484 agent/rustjail: implement functions to pivot_root
Use conditional compilation (#[cfg]) to change pivot_root behaviour
at compilation time. For example, such function will just return
`Ok(())` when the unit tests are being compiled, otherwise real
pivot_root operation is performed.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
9e74c28158 agent/rustjail: add unit test for mount_cgroups
Add a unit test for `mount_cgroups` increasing the code coverage
of mount.rs from 44% to 52%

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
b7aae33cc1 agent/rustjail: add unit test for init_rootfs
Add a unit test for `init_rootfs` increasing the code coverage
of mount.rs from 0% to 44%.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
6d9d58278e agent/rustjail/mount: don't use unwrap
Don't use unwrap in `init_rootfs` instead return an Error, this way
we can write unit tests that don't panic.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
1bc6fbda8c agent/rustjail: add tempfile crate as depedency
Add tempfile crate as depedency, it will be used in the following
commits to create temporary directories for unit testing.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Julio Montes
d39f5a85e6 rustjail: implement functions to mount and umount files
Use conditional compilation (#[cfg]) to change mount and umount
behaviours at compilation time. For example, such functions will just
return `Ok(())` when the unit tests are being compiled, otherwise real
mount and umount operations are performed.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-06 17:54:13 -07:00
Ychau Wang
d90a0eefbe docs: Fix the kata-pkgsync tool's docs script path
Fix the kata-pkgsync tool's docs, change the download path of the
packaging tool in 2.0 release.

Fixes: #773

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-06 17:54:13 -07:00
Peng Tao
2618c014a0 docs: fix k8s containerd howto links
It should points to the internal versions.yaml file.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
5c4878f37e docs: fix up developer guide for 2.0
1. Until we restore docker/moby support, we should use crictl as
developer example.
2. Most of the hyperlinks should point to kata-containers repository.
3. There is no more standalone mode.

Fixes: #767
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
bd6b169e98 gitignore: ignore agent version.rs
It is auto-generated.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
5770336572 agent: fix agent panic running as init
We should mount procfs before trying to parse kernel command lines.

Fixes: #771
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
zhanghj
45daec7b37 packaging: use local version file for kata 2.0 in Makefile
Use local version file instead of downloading from upstream repo.

Fixes: #756

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2020-10-06 17:54:13 -07:00
Peng Tao
ed5a7dc022 docs: fix release process doc
We no longer build OBS packages. And we use
kata-containers/tools/packaging/release to do release.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
Peng Tao
6fc7c77721 packaging: fix release notes
Should mention the 2.0 branch docs.

Fixes: #763
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-06 17:54:13 -07:00
276 changed files with 10214 additions and 10848 deletions

View File

@@ -1,17 +0,0 @@
# Description of problem
(replace this text with the list of steps you followed)
# Expected result
(replace this text with an explanation of what you thought would happen)
# Actual result
(replace this text with details of what actually happened)
---
(replace this text with the output of the `kata-collect-data.sh` script, after
you have reviewed its content to ensure it does not contain any private
information).

View File

@@ -48,7 +48,25 @@ jobs:
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
pattern: '^.+(\n.{0,72})*$|^.+\n\s*[^a-zA-Z\s\n]|^.+\n\S+$'
# Notes:
#
# - The subject line is not enforced here (see other check), but has
# to be specified at the start of the regex as the action is passed
# the entire commit message.
#
# - Body lines *can* be longer than the maximum if they start
# with a non-alphabetic character.
#
# This allows stack traces, log files snippets, emails, long URLs,
# etc to be specified. Some of these naturally "work" as they start
# with numeric timestamps or addresses. Emails can but quoted using
# the normal ">" character, markdown bullets ("-", "*") are also
# useful for lists of URLs, but it is always possible to override
# the check by simply space indenting the content you need to add.
#
# - A SoB comment can be any length (as it is unreasonable to penalise
# people with long names/email addresses :)
pattern: '^.+(\n([a-zA-Z].{0,149}|[^a-zA-Z\n].*|Signed-off-by:.*|))+$'
error: 'Body line too long (max 72)'
post_error: ${{ env.error_msg }}

View File

@@ -18,9 +18,9 @@ main() {
fi
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
pushd $GITHUB_WORKSPACE/tools/packaging/obs-packaging
pushd $GITHUB_WORKSPACE/tools/packaging
git checkout $tag
./gen_versions_txt.sh $tag
./scripts/gen_versions_txt.sh $tag
popd
pushd $GITHUB_WORKSPACE/tools/packaging/release

53
.github/workflows/kata-deploy-test.yaml vendored Normal file
View File

@@ -0,0 +1,53 @@
on: issue_comment
name: test-kata-deploy
jobs:
check_comments:
runs-on: ubuntu-latest
steps:
- name: Check for Command
id: command
uses: kata-containers/slash-command-action@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
command: "test-kata-deploy"
reaction: "true"
reaction-type: "eyes"
allow-edits: "false"
permission-level: admin
- name: verify command arg is kata-deploy
run: |
echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
create-and-test-container:
needs: check_comments
runs-on: ubuntu-latest
steps:
- name: get-PR-ref
id: get-PR-ref
run: |
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
echo "reference for PR: " ${ref}
echo "##[set-output name=pr-ref;]${ref}"
- uses: actions/checkout@v2-beta
with:
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
- name: build-container-image
id: build-container-image
run: |
PR_SHA=$(git log --format=format:%H -n1)
VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/VERSION)
ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
docker push katadocker/kata-deploy-ci:$PR_SHA
echo "##[set-output name=pr-sha;]${PR_SHA}"
- name: test-kata-deploy-ci-in-aks
uses: ./kata-deploy/action
with:
packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
env:
PKG_SHA: ${{ steps.build-container-image.outputs.pr-sha }}
AZ_APPID: ${{ secrets.AZ_APPID }}
AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}

25
.github/workflows/snap.yaml vendored Normal file
View File

@@ -0,0 +1,25 @@
name: snap CI
on:
pull_request:
paths:
- "**/Makefile"
- "**/*.go"
- "**/*.mk"
- "**/*.rs"
- "**/*.sh"
- "**/*.toml"
- "**/*.yaml"
- "**/*.yml"
jobs:
test:
runs-on: ubuntu-20.04
steps:
- name: Check out
uses: actions/checkout@v2
- name: Install Snapcraft
uses: samuelmeuli/action-snapcraft@v1
- name: Build snap
run: |
snapcraft -d snap --destructive-mode

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@
**/*.rej
**/target
**/.vscode
src/agent/src/version.rs
src/agent/kata-agent.service

View File

@@ -5,28 +5,43 @@
dist: bionic
os: linux
language: go
go: 1.14.4
env: target_branch=$TRAVIS_BRANCH
# set cache directories manually, because
# we are using a non-standard directory struct
# cargo root is in srs/agent
#
# If needed, caches can be cleared
# by ways documented in
# https://docs.travis-ci.com/user/caching#clearing-caches
language: rust
rust:
- 1.44.1
cache:
cargo: true
directories:
- src/agent/target
before_install:
- git remote set-branches --add origin "${TRAVIS_BRANCH}"
- git fetch
- export RUST_BACKTRACE=1
- export target_branch=$TRAVIS_BRANCH
- "ci/setup.sh"
# we use install to run check agent
# so that it is easy to skip for non-amd64 platform
install:
- "ci/install_rust.sh"
- export PATH=$PATH:"$HOME/.cargo/bin"
- export RUST_AGENT=yes
- rustup target add x86_64-unknown-linux-musl
- sudo ln -sf /usr/bin/g++ /bin/musl-g++
- rustup component add rustfmt
- make -C ${TRAVIS_BUILD_DIR}/src/agent
- make -C ${TRAVIS_BUILD_DIR}/src/agent check
- sudo -E PATH=$PATH make -C ${TRAVIS_BUILD_DIR}/src/agent check
before_script:
- "ci/install_go.sh"
- "ci/install_vc.sh"
- make -C ${TRAVIS_BUILD_DIR}/src/runtime
- make -C ${TRAVIS_BUILD_DIR}/src/runtime test
- sudo -E PATH=$PATH GOPATH=$GOPATH make -C ${TRAVIS_BUILD_DIR}/src/runtime test
@@ -41,6 +56,7 @@ jobs:
- name: ppc64le test
os: linux-ppc64le
install: skip
script: skip
allow_failures:
- name: ppc64le test
fast_finish: true

View File

@@ -1,4 +1,4 @@
# Copyright 2019 Intel Corporation.
# Copyright (c) 2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -10,4 +10,3 @@
# used. See https://help.github.com/articles/about-code-owners/
*.md @kata-containers/documentation

View File

@@ -1 +1 @@
2.0.0-rc0
2.0.0

30
ci/go-no-os-exit.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Check there are no os.Exit() calls creeping into the code
# We don't use that exit path in the Kata codebase.
# Allow the path to check to be over-ridden.
# Default to the current directory.
go_packages=${1:-.}
echo "Checking for no os.Exit() calls for package [${go_packages}]"
candidates=`go list -f '{{.Dir}}/*.go' $go_packages`
for f in $candidates; do
filename=`basename $f`
# skip all go test files
[[ $filename == *_test.go ]] && continue
# skip exit.go where, the only file we should call os.Exit() from.
[[ $filename == "exit.go" ]] && continue
files="$f $files"
done
[ -z "$files" ] && echo "No files to check, skipping" && exit 0
if egrep -n '\<os\.Exit\>' $files; then
echo "Direct calls to os.Exit() are forbidden, please use exit() so atexit() works"
exit 1
fi

View File

@@ -5,26 +5,26 @@
export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
export tests_repo_dir="$GOPATH/src/$tests_repo"
export branch="${branch:-2.0-dev}"
clone_tests_repo()
{
# KATA_CI_NO_NETWORK is (has to be) ignored if there is
# no existing clone.
if [ -d "$tests_repo_dir" -a -n "$KATA_CI_NO_NETWORK" ]
if [ -d "$tests_repo_dir" -a -n "$CI" ]
then
return
fi
go get -d -u "$tests_repo" || true
if [ -n "${TRAVIS_BRANCH:-}" ]; then
( cd "${tests_repo_dir}" && git checkout "${TRAVIS_BRANCH}" )
fi
pushd "${tests_repo_dir}" && git checkout "${branch}" && popd
}
run_static_checks()
{
clone_tests_repo
# Make sure we have the targeting branch
git remote set-branches --add origin "${branch}"
git fetch -a
bash "$tests_repo_dir/.ci/static-checks.sh" "github.com/kata-containers/kata-containers"
}

View File

@@ -1,3 +0,0 @@
## Kata Containers Documentation Code of Conduct
Kata Containers follows the [OpenStack Foundation Code of Conduct](https://www.openstack.org/legal/community-code-of-conduct/).

View File

@@ -1,5 +0,0 @@
# Contributing
## This repo is part of [Kata Containers](https://katacontainers.io)
For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).

View File

@@ -13,7 +13,6 @@
* [journald rate limiting](#journald-rate-limiting)
* [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
* [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
* [Build and install Kata shim](#build-and-install-kata-shim)
* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
* [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
* [Get the osbuilder](#get-the-osbuilder)
@@ -30,26 +29,27 @@
* [Install a hypervisor](#install-a-hypervisor)
* [Build a custom QEMU](#build-a-custom-qemu)
* [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
* [Run Kata Containers with Docker](#run-kata-containers-with-docker)
* [Update the Docker systemd unit file](#update-the-docker-systemd-unit-file)
* [Create a container using Kata](#create-a-container-using-kata)
* [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
* [Appendices](#appendices)
* [Checking Docker default runtime](#checking-docker-default-runtime)
* [Set up a debug console](#set-up-a-debug-console)
* [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
* [Create a debug systemd service](#create-a-debug-systemd-service)
* [Build the debug image](#build-the-debug-image)
* [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
* [Ensure debug options are valid](#ensure-debug-options-are-valid)
* [Create a container](#create-a-container)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Obtain details of the image](#obtain-details-of-the-image)
* [Simple debug console setup](#simple-debug-console-setup)
* [Enable agent debug console](#enable-agent-debug-console)
* [Start `kata-monitor`](#start-kata-monitor)
* [Connect to debug console](#connect-to-debug-console)
* [Traditional debug console setup](#traditional-debug-console-setup)
* [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
* [Build the debug image](#build-the-debug-image)
* [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
* [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
* [Create a container](#create-a-container)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Obtain details of the image](#obtain-details-of-the-image)
* [Capturing kernel boot logs](#capturing-kernel-boot-logs)
* [Running standalone](#running-standalone)
* [Create an OCI bundle](#create-an-oci-bundle)
* [Launch the runtime to create a container](#launch-the-runtime-to-create-a-container)
# Warning
@@ -66,7 +66,7 @@ The recommended way to create a development environment is to first
to create a working system.
The installation guide instructions will install all required Kata Containers
components, plus Docker*, the hypervisor, and the Kata Containers image and
components, plus *Docker*, the hypervisor, and the Kata Containers image and
guest kernel.
# Requirements to build individual components
@@ -76,7 +76,12 @@ You need to install the following to build Kata Containers components:
- [golang](https://golang.org/dl)
To view the versions of go known to work, see the `golang` entry in the
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
[versions database](../versions.yaml).
- [rust](https://www.rust-lang.org/tools/install)
To view the versions of rust known to work, see the `rust` entry in the
[versions database](../versions.yaml).
- `make`.
- `gcc` (required for building the shim and runtime).
@@ -84,14 +89,14 @@ You need to install the following to build Kata Containers components:
# Build and install the Kata Containers runtime
```
$ go get -d -u github.com/kata-containers/runtime
$ cd $GOPATH/src/github.com/kata-containers/runtime
$ go get -d -u github.com/kata-containers/kata-containers
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/runtime
$ make && sudo -E PATH=$PATH make install
```
The build will create the following:
- runtime binary: `/usr/local/bin/kata-runtime`
- runtime binary: `/usr/local/bin/kata-runtime` and `/usr/local/bin/containerd-shim-kata-v2`
- configuration file: `/usr/share/defaults/kata-containers/configuration.toml`
# Check hardware requirements
@@ -242,13 +247,6 @@ Restart `systemd-journald` for the changes to take effect:
$ sudo systemctl restart systemd-journald
```
# Build and install Kata shim
```
$ go get -d -u github.com/kata-containers/shim
$ cd $GOPATH/src/github.com/kata-containers/shim && make && sudo make install
```
# Create and install rootfs and initrd image
## Build a custom Kata agent - OPTIONAL
@@ -257,15 +255,25 @@ $ cd $GOPATH/src/github.com/kata-containers/shim && make && sudo make install
>
> - You should only do this step if you are testing with the latest version of the agent.
The rust-agent is built with a static linked `musl.` To configure this:
```
$ go get -d -u github.com/kata-containers/agent
$ cd $GOPATH/src/github.com/kata-containers/agent && make
rustup target add x86_64-unknown-linux-musl
sudo ln -s /usr/bin/g++ /bin/musl-g++
```
To build the agent:
```
$ go get -d -u github.com/kata-containers/kata-containers
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/agent && make
```
## Get the osbuilder
```
$ go get -d -u github.com/kata-containers/osbuilder
$ go get -d -u github.com/kata-containers/kata-containers
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
```
## Create a rootfs image
@@ -276,16 +284,16 @@ able to run the `rootfs.sh` script with `USE_DOCKER=true` as expected in
the following example.
```
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs
$ sudo rm -rf ${ROOTFS_DIR}
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
```
You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `fedora`, `suse`, and `ubuntu` for `${distro}`. By default `seccomp` packages are not included in the rootfs image. Set `SECCOMP` to `yes` to include them.
> **Note:**
>
> - Check the [compatibility matrix](https://github.com/kata-containers/osbuilder#platform-distro-compatibility-matrix) before creating rootfs.
> - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.
> - You must ensure that the *default Docker runtime* is `runc` to make use of
> the `USE_DOCKER` variable. If that is not the case, remove the variable
> from the previous command. See [Checking Docker default runtime](#checking-docker-default-runtime).
@@ -297,15 +305,15 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `f
> - You should only do this step if you are testing with the latest version of the agent.
```
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../agent/kata-agent
$ sudo install -o root -g root -m 0440 ../../agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
$ sudo install -o root -g root -m 0440 ../../agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
```
### Build a rootfs image
```
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/image-builder
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/image-builder
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
```
@@ -332,9 +340,9 @@ $ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img)
## Create an initrd image - OPTIONAL
### Create a local rootfs for initrd image
```
$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs"
$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs"
$ sudo rm -rf ${ROOTFS_DIR}
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
```
`AGENT_INIT` controls if the guest image uses the Kata agent as the guest `init` process. When you create an initrd image,
@@ -344,7 +352,7 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `euleros`, and `fedora`
> **Note:**
>
> - Check the [compatibility matrix](https://github.com/kata-containers/osbuilder#platform-distro-compatibility-matrix) before creating rootfs.
> - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.
Optionally, add your custom agent binary to the rootfs with the following:
```
@@ -354,7 +362,7 @@ $ sudo install -o root -g root -m 0550 -T ../../agent/kata-agent ${ROOTFS_DIR}/s
### Build an initrd image
```
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/initrd-builder
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/initrd-builder
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh ${ROOTFS_DIR}'
```
@@ -382,7 +390,7 @@ Your QEMU directory need to be prepared with source code. Alternatively, you can
```
$ go get -d github.com/kata-containers/qemu
$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/runtime/versions.yaml | cut -d '"' -f2)
$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/kata-containers/versions.yaml | cut -d '"' -f2)
$ cd ${GOPATH}/src/github.com/kata-containers/qemu
$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
@@ -391,9 +399,9 @@ $ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
```
$ go get -d github.com/kata-containers/packaging
$ go get -d github.com/kata-containers/kata-containers/tools/packaging
$ cd $your_qemu_directory
$ ${GOPATH}/src/github.com/kata-containers/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
$ eval ./configure "$(cat kata.cfg)"
$ make -j $(nproc)
$ sudo -E make install
@@ -412,27 +420,11 @@ $ go get -d github.com/kata-containers/tests
$ script -fec 'sudo -E ${GOPATH}/src/github.com/kata-containers/tests/.ci/install_qemu.sh'
```
# Run Kata Containers with Docker
## Update the Docker systemd unit file
```
$ dockerUnit=$(systemctl show -p FragmentPath docker.service | cut -d "=" -f 2)
$ unitFile=${dockerUnit:-/etc/systemd/system/docker.service.d/kata-containers.conf}
$ test -e "$unitFile" || { sudo mkdir -p "$(dirname $unitFile)"; echo -e "[Service]\nType=simple\nExecStart=\nExecStart=/usr/bin/dockerd -D --default-runtime runc" | sudo tee "$unitFile"; }
$ grep -q "kata-runtime=" $unitFile || sudo sed -i 's!^\(ExecStart=[^$].*$\)!\1 --add-runtime kata-runtime=/usr/local/bin/kata-runtime!g' "$unitFile"
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
## Create a container using Kata
```
$ sudo docker run -ti --runtime kata-runtime busybox sh
```
# Run Kata Containers with Containerd
Refer to the [How to use Kata Containers and Containerd](how-to/containerd-kata.md) how-to guide.
# Run Kata Containers with Kubernetes
Refer to to the [Run Kata Containers with Kubernetes](how-to/run-kata-with-k8s.md) how-to guide.
Refer to the [Run Kata Containers with Kubernetes](how-to/run-kata-with-k8s.md) how-to guide.
# Troubleshoot Kata Containers
@@ -452,18 +444,6 @@ To perform analysis on Kata logs, use the
[`kata-log-parser`](https://github.com/kata-containers/tests/tree/master/cmd/log-parser)
tool, which can convert the logs into formats (e.g. JSON, TOML, XML, and YAML).
To obtain a full backtrace for the agent, proxy, runtime, or shim send the
`SIGUSR1` signal to the process ID of the component. The component will send a
backtrace to the system log on the host system and continue to run without
interruption.
For example, to obtain a backtrace for `kata-proxy`:
```
$ sudo kill -USR1 $kata_proxy_pid
$ sudo journalctl -t kata-proxy
```
See [Set up a debug console](#set-up-a-debug-console).
# Appendices
@@ -473,9 +453,56 @@ See [Set up a debug console](#set-up-a-debug-console).
```
$ sudo docker info 2>/dev/null | grep -i "default runtime" | cut -d: -f2- | grep -q runc && echo "SUCCESS" || echo "ERROR: Incorrect default Docker runtime"
```
## Set up a debug console
Kata containers provides two ways to connect to the guest. One is using traditional login service, which needs additional works. In contrast the simple debug console is easy to setup.
### Simple debug console setup
Kata Containers 2.0 supports a shell simulated *console* for quick debug purpose. This approach uses VSOCK to
connect to the shell running inside the guest which the agent starts. This method only requires the guest image to
contain either `/bin/sh` or `/bin/bash`.
#### Enable agent debug console
Enable debug_console_enabled in the `configuration.toml` configuration file:
```
[agent.kata]
debug_console_enabled = true
```
This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.
#### Start `kata-monitor`
The `kata-runtime exec` command needs `kata-monitor` to get the sandbox's `vsock` address to connect to, first start `kata-monitor`.
```
$ sudo kata-monitor
```
`kata-monitor` will serve at `localhost:8090` by default.
#### Connect to debug console
Command `kata-runtime exec` is used to connect to the debug console.
```
$ kata-runtime exec 1a9ab65be63b8b03dfd0c75036d27f0ed09eab38abb45337fea83acd3cd7bacd
bash-4.2# id
uid=0(root) gid=0(root) groups=0(root)
bash-4.2# pwd
/
bash-4.2# exit
exit
```
If you want to access guest OS through a traditional way, see [Traditional debug console setup)](#traditional-debug-console-setup).
### Traditional debug console setup
By default you cannot login to a virtual machine, since this can be sensitive
from a security perspective. Also, allowing logins would require additional
packages in the rootfs, which would increase the size of the image used to
@@ -486,12 +513,12 @@ the following steps (using rootfs or initrd image).
> **Note:** The following debug console instructions assume a systemd-based guest
> O/S image. This means you must create a rootfs for a distro that supports systemd.
> Currently, all distros supported by [osbuilder](https://github.com/kata-containers/osbuilder) support systemd
> Currently, all distros supported by [osbuilder](../tools/osbuilder) support systemd
> except for Alpine Linux.
>
> Look for `INIT_PROCESS=systemd` in the `config.sh` osbuilder rootfs config file
> to verify an osbuilder distro supports systemd for the distro you want to build rootfs for.
> For an example, see the [Clear Linux config.sh file](https://github.com/kata-containers/osbuilder/blob/master/rootfs-builder/clearlinux/config.sh).
> For an example, see the [Clear Linux config.sh file](../tools/osbuilder/rootfs-builder/clearlinux/config.sh).
>
> For a non-systemd-based distro, create an equivalent system
> service using that distros init system syntax. Alternatively, you can build a distro
@@ -501,7 +528,7 @@ the following steps (using rootfs or initrd image).
>
> Once these steps are taken you can connect to the virtual machine using the [debug console](Developer-Guide.md#connect-to-the-virtual-machine-using-the-debug-console).
### Create a custom image containing a shell
#### Create a custom image containing a shell
To login to a virtual machine, you must
[create a custom rootfs](#create-a-rootfs-image) or [custom initrd](#create-an-initrd-image---optional)
@@ -511,46 +538,17 @@ an additional `coreutils` package.
For example using CentOS:
```
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos'
```
### Create a debug systemd service
Create the service file that starts the shell in the rootfs directory:
```
$ cat <<EOT | sudo tee ${ROOTFS_DIR}/lib/systemd/system/kata-debug.service
[Unit]
Description=Kata Containers debug console
[Service]
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
StandardInput=tty
StandardOutput=tty
# Must be disabled to allow the job to access the real console
PrivateDevices=no
Type=simple
ExecStart=/bin/bash
Restart=always
EOT
```
**Note**: You might need to adjust the `ExecStart=` path.
Add a dependency to start the debug console:
```
$ sudo sed -i '$a Requires=kata-debug.service' ${ROOTFS_DIR}/lib/systemd/system/kata-containers.target
```
### Build the debug image
#### Build the debug image
Follow the instructions in the [Build a rootfs image](#build-a-rootfs-image)
section when using rootfs, or when using initrd, complete the steps in the [Build an initrd image](#build-an-initrd-image) section.
### Configure runtime for custom debug image
#### Configure runtime for custom debug image
Install the image:
@@ -575,33 +573,65 @@ $ (cd /usr/share/kata-containers && sudo ln -sf "$name" kata-containers.img)
**Note**: You should take care to undo this change after you finish debugging
to avoid all subsequently created containers from using the debug image.
### Ensure debug options are valid
#### Create a container
For the debug console to work, you **must** ensure that proxy debug is
**disabled** in the configuration file. If proxy debug is enabled, you will
not see any output when you connect to the virtual machine:
Create a container as normal. For example using `crictl`:
```
$ sudo mkdir -p /etc/kata-containers/
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
$ sudo awk '{if (/^\[proxy\.kata\]/) {got=1}; if (got == 1 && /^.*enable_debug/) {print "#enable_debug = true"; got=0; next; } else {print}}' /etc/kata-containers/configuration.toml > /tmp/configuration.toml
$ sudo install -o root -g root -m 0640 /tmp/configuration.toml /etc/kata-containers/
$ sudo crictl run -r kata container.yaml pod.yaml
```
### Create a container
#### Connect to the virtual machine using the debug console
Create a container as normal. For example using Docker:
The steps required to enable debug console for QEMU slightly differ with
those for firecracker / cloud-hypervisor.
##### Enabling debug console for QEMU
Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console.
```
$ sudo docker run -ti busybox sh
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console"/g' "${kata_configuration_file}"
```
### Connect to the virtual machine using the debug console
Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml`
or `/usr/share/defaults/kata-containers/configuration.toml`
or `/opt/kata/share/defaults/kata-containers/configuration-{hypervisor}.toml`, if
you installed Kata Containers using `kata-deploy`.
##### Enabling debug console for cloud-hypervisor / firecracker
Slightly different configuration is required in case of firecracker and cloud hypervisor.
Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`.
Hence, the kernel command line option `agent.debug_console` will not work for them.
These hypervisors support `hybrid vsocks`, which can be used for communication
between the host and the guest. The kernel command line option `agent.debug_console_vport`
was added to allow developers specify on which `vsock` port the debugging console should be connected.
Add the parameter `agent.debug_console_vport=1026` to the kernel command line
as shown below:
```
$ id=$(sudo docker ps -q --no-trunc)
$ console="/var/run/vc/vm/${id}/console.sock"
$ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"
sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console_vport=1026"/g' "${kata_configuration_file}"
```
> **Note** Ports 1024 and 1025 are reserved for communication with the agent
> and gathering of agent logs respectively.
Next, connect to the debug console. The VSOCKS paths vary slightly between
cloud-hypervisor and firecracker.
In case of cloud-hypervisor, connect to the `vsock` as shown:
```
$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
CONNECT 1026
```
**Note**: You need to type `CONNECT 1026` and press `RETURN` key after entering the `socat` command.
For firecracker, connect to the `hvsock` as shown:
```
$ sudo su -c 'cd /var/run/vc/firecracker/{sandbox_id}/root/ && socat stdin unix-connect:kata.hvsock'
CONNECT 1026
```
**Note**: You need to press the `RETURN` key to see the shell prompt.
@@ -609,10 +639,10 @@ $ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"
To disconnect from the virtual machine, type `CONTROL+q` (hold down the
`CONTROL` key and press `q`).
### Obtain details of the image
## Obtain details of the image
If the image is created using
[osbuilder](https://github.com/kata-containers/osbuilder), the following YAML
[osbuilder](../tools/osbuilder), the following YAML
file exists and contains details of the image and how it was created:
```
@@ -629,54 +659,22 @@ command inside the container to view the kernel boot logs.
If however you are unable to `exec` into the container, you can enable some debug
options to have the kernel boot messages logged into the system journal.
Which debug options you enable depends on if you are using the hypervisor `vsock` mode
or not, as defined by the `use_vsock` setting in the `[hypervisor.qemu]` section of
the configuration file. The following details the settings:
- For `use_vsock = false`:
- Set `enable_debug = true` in both the `[hypervisor.qemu]` and `[proxy.kata]` sections
- For `use_vsock = true`:
- Set `enable_debug = true` in both the `[hypervisor.qemu]` and `[shim.kata]` sections
- Set `enable_debug = true` in the `[hypervisor.qemu]` and `[runtime]` sections
For generic information on enabling debug in the configuration file, see the
[Enable full debug](#enable-full-debug) section.
The kernel boot messages will appear in the `kata-proxy` or `kata-shim` log appropriately,
The kernel boot messages will appear in the `containerd` or `CRI-O` log appropriately,
such as:
```bash
$ sudo journalctl -t kata-proxy
$ sudo journalctl -t containerd
-- Logs begin at Thu 2020-02-13 16:20:40 UTC, end at Thu 2020-02-13 16:30:23 UTC. --
...
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.608714324Z" level=info msg="[ 1.418768] brd: module loaded\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.628493231Z" level=info msg="[ 1.438612] loop: module loaded\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.67707956Z" level=info msg="[ 1.487165] pmem0: p1\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
time="2020-09-15T14:56:23.095113803+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.395399] brd: module loaded"
time="2020-09-15T14:56:23.102633107+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.402845] random: fast init done"
time="2020-09-15T14:56:23.103125469+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.403544] random: crng init done"
time="2020-09-15T14:56:23.105268162+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.405599] loop: module loaded"
time="2020-09-15T14:56:23.121121598+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.421324] memmap_init_zone_device initialised 32768 pages in 12ms"
...
```
## Running standalone
It is possible to start the runtime without a container manager. This is
mostly useful for testing and debugging purposes.
### Create an OCI bundle
To build an
[OCI bundle](https://github.com/opencontainers/runtime-spec/blob/master/bundle.md),
required by the runtime:
```
$ bundle="/tmp/bundle"
$ rootfs="$bundle/rootfs"
$ mkdir -p "$rootfs" && (cd "$bundle" && kata-runtime spec)
$ sudo docker export $(sudo docker create busybox) | tar -C "$rootfs" -xvf -
```
### Launch the runtime to create a container
Run the runtime standalone by providing it with the path to the
previously-created [OCI bundle](#create-an-oci-bundle):
```
$ sudo kata-runtime --log=/dev/stdout run --bundle "$bundle" foo
```

View File

@@ -54,7 +54,7 @@ Documents that help to understand and contribute to Kata Containers.
* [Developer Guide](Developer-Guide.md): Setup the Kata Containers developing environments
* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md)
* [Code of Conduct](CODE_OF_CONDUCT.md)
* [Code of Conduct](../CODE_OF_CONDUCT.md)
### Code Licensing

View File

@@ -10,7 +10,6 @@
- [Merge all bump version Pull requests](#merge-all-bump-version-pull-requests)
- [Tag all Kata repositories](#tag-all-kata-repositories)
- [Check Git-hub Actions](#check-git-hub-actions)
- [Create OBS Packages](#create-obs-packages)
- [Create release notes](#create-release-notes)
- [Announce the release](#announce-the-release)
<!-- TOC END -->
@@ -42,7 +41,7 @@
Alternatively, you can also bump the repositories using a script in the Kata packaging repo
```
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
$ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
$ export NEW_VERSION=<the-new-kata-version>
$ export BRANCH=<the-branch-you-want-to-bump>
$ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
@@ -59,7 +58,7 @@
Once all the pull requests to bump versions in all Kata repositories are merged,
tag all the repositories as shown below.
```
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
$ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
$ git checkout <kata-branch-to-release>
$ git pull
$ ./tag_repos.sh -p -b "$BRANCH" tag
@@ -71,33 +70,6 @@
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).
### Create OBS Packages
- We have set up an [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) job
to trigger generation of Kata packages in [OBS](https://build.opensuse.org/).
Go to the [Azure Pipelines job that creates OBS packages](https://dev.azure.com/kata-containers/release-process/_release?_a=releases&view=mine&definitionId=1).
- Click on "Create release" (blue button, at top right corner).
It should prompt you for variables to be passed to the release job. They should look like:
```
BRANCH="the-kata-branch-that-is-release"
BUILD_HEAD=false
OBS_BRANCH="the-kata-branch-that-is-release"
```
Note: If the release is `Alpha` , `Beta` , or `RC` (that is part of a `master` release), please use `OBS_BRANCH=master`.
The above step shall create OBS packages for Kata for various distributions that Kata supports and test them as well.
- Verify that the packages have built successfully by checking the [Kata OBS project page](https://build.opensuse.org/project/subprojects/home:katacontainers).
- Make sure packages work correctly. This can be done manually or via the [package testing pipeline](http://jenkins.katacontainers.io/job/package-release-testing).
You have to make sure the packages are already published by OBS before this step.
It should prompt you for variables to be passed to the pipeline:
```
BRANCH="<kata-branch-to-release>"
NEW_VERSION=<the-version-you-expect-to-be-packaged|latest>
```
Note: `latest` will verify that a package provides the latest Kata tag in that branch.
### Create release notes
We have a script in place in the packaging repository to create release notes that include a short-log of the commits across Kata components.
@@ -105,12 +77,12 @@
Run the script as shown below:
```
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
$ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
# Note: OLD_VERSION is where the script should start to get changes.
$ ./runtime-release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
# Edit the `notes.md` file to review and make any changes to the release notes.
# Add the release notes in GitHub runtime.
$ hub -C "${GOPATH}/src/github.com/kata-containers/runtime" release edit -F notes.md "${NEW_VERSION}"
$ hub release edit -F notes.md "${NEW_VERSION}"
```
### Announce the release

View File

@@ -1,185 +1,140 @@
* [Introduction](#introduction)
* [Unsupported scenarios](#unsupported-scenarios)
* [Maintenance Warning](#maintenance-warning)
* [Upgrade from Clear Containers](#upgrade-from-clear-containers)
* [Stop all running Clear Container instances](#stop-all-running-clear-container-instances)
* [Configuration migration](#configuration-migration)
* [Remove Clear Containers packages](#remove-clear-containers-packages)
* [Fedora](#fedora)
* [Ubuntu](#ubuntu)
* [Disable old container manager configuration](#disable-old-container-manager-configuration)
* [Install Kata Containers](#install-kata-containers)
* [Create a Kata Container](#create-a-kata-container)
* [Upgrade from runV](#upgrade-from-runv)
* [Maintenance warning](#maintenance-warning)
* [Determine current version](#determine-current-version)
* [Determine latest version](#determine-latest-version)
* [Configuration changes](#configuration-changes)
* [Upgrade Kata Containers](#upgrade-kata-containers)
* [Appendices](#appendices)
* [Assets](#assets)
* [Guest kernel](#guest-kernel)
* [Image](#image)
* [Determining asset versions](#determining-asset-versions)
* [Upgrade native distribution packaged version](#upgrade-native-distribution-packaged-version)
* [Static installation](#static-installation)
* [Determine if you are using a static installation](#determine-if-you-are-using-a-static-installation)
* [Remove a static installation](#remove-a-static-installation)
* [Upgrade a static installation](#upgrade-a-static-installation)
* [Custom assets](#custom-assets)
# Introduction
This document explains how to upgrade from
[Clear Containers](https://github.com/clearcontainers) and [runV](https://github.com/hyperhq/runv) to
[Kata Containers](https://github.com/kata-containers) and how to upgrade an existing
Kata Containers system to the latest version.
This document outlines the options for upgrading from a
[Kata Containers 1.x release](https://github.com/kata-containers/runtime/releases) to a
[Kata Containers 2.x release](https://github.com/kata-containers/kata-containers/releases).
# Unsupported scenarios
# Maintenance warning
Upgrading a Clear Containers system on the following distributions is **not**
supported since the installation process for these distributions makes use of
unpackaged components:
Kata Containers 2.x is the new focus for the Kata Containers development
community.
- [CentOS](https://github.com/clearcontainers/runtime/blob/master/docs/centos-installation-guide.md)
- [BCLinux](https://github.com/clearcontainers/runtime/blob/master/docs/bclinux-installation-guide.md)
- [RHEL](https://github.com/clearcontainers/runtime/blob/master/docs/rhel-installation-guide.md)
- [SLES](https://github.com/clearcontainers/runtime/blob/master/docs/sles-installation-guide.md)
Although Kata Containers 1.x releases will continue to be published for a
period of time, once a stable release for Kata Containers 2.x is published,
Kata Containers 1.x stable users should consider switching to the Kata 2.x
release.
Additionally, upgrading
[Clear Linux](https://github.com/clearcontainers/runtime/blob/master/docs/clearlinux-installation-guide.md)
is not supported as Kata Containers packages do not yet exist.
See the [stable branch strategy documentation](Stable-Branch-Strategy.md) for
further details.
# Maintenance Warning
# Determine current version
The Clear Containers codebase is no longer being developed. Only new releases
will be considered for significant bug fixes.
To display the current Kata Containers version, run one of the following:
The main development focus is now on Kata Containers. All Clear Containers
users are encouraged to switch to Kata Containers.
# Upgrade from Clear Containers
Since Kata Containers can co-exist on the same system as Clear Containers, if
you already have Clear Containers installed, the upgrade process is simply to
install Kata Containers. However, since Clear Containers is
[no longer being actively developed](#maintenance-warning),
you are encouraged to remove Clear Containers from your systems.
## Stop all running Clear Container instances
Assuming a Docker\* system, to stop all currently running Clear Containers:
```
$ for container in $(sudo docker ps -q); do sudo docker stop $container; done
```bash
$ kata-runtime --version
$ containerd-shim-kata-v2 --version
```
## Configuration migration
# Determine latest version
The automatic migration of
[Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration) to
[Kata Containers configuration](../src/runtime/README.md#configuration) is
not supported.
Kata Containers 2.x releases are published on the
[Kata Containers GitHub releases page](https://github.com/kata-containers/kata-containers/releases).
If you have made changes to your Clear Containers configuration, you should
review those changes and decide whether to manually apply those changes to the
Kata Containers configuration.
Alternatively, if you are using Kata Containers version 1.12.0 or newer, you
can check for newer releases using the command line:
> **Note**: This step must be completed before continuing to
> [remove the Clear Containers packages](#remove-clear-containers-packages) since doing so will
> *delete the default Clear Containers configuration file from your system*.
## Remove Clear Containers packages
> **Warning**: If you have modified your
> [Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration),
> you might want to make a safe copy of the configuration file before removing the
> packages since doing so will *delete the default configuration file*
### Fedora
```
$ sudo -E dnf remove cc-runtime\* cc-proxy\* cc-shim\* linux-container clear-containers-image qemu-lite cc-ksm-throttler
$ sudo rm /etc/yum.repos.d/home:clearcontainers:clear-containers-3.repo
```bash
$ kata-runtime kata-check --check-version-only
```
### Ubuntu
There are various other related options. Run `kata-runtime kata-check --help`
for further details.
```
$ sudo apt-get purge cc-runtime\* cc-proxy\* cc-shim\* linux-container clear-containers-image qemu-lite cc-ksm-throttler
$ sudo rm /etc/apt/sources.list.d/clear-containers.list
```
# Configuration changes
## Disable old container manager configuration
The [Kata Containers 2.x configuration file](/src/runtime/README.md#configuration)
is compatible with the
[Kata Containers 1.x configuration file](https://github.com/kata-containers/runtime/blob/master/README.md#configuration).
Assuming a Docker installation, remove the docker configuration for Clear
Containers:
However, if you have created a local configuration file
(`/etc/kata-containers/configuration.toml`), this will mask the newer Kata
Containers 2.x configuration file.
```
$ sudo rm /etc/systemd/system/docker.service.d/clear-containers.conf
```
## Install Kata Containers
Follow one of the [installation guides](install).
## Create a Kata Container
```
$ sudo docker run -ti busybox sh
```
# Upgrade from runV
runV and Kata Containers can run together on the same system without affecting each other, as long as they are
not configured to use the same container root storage. Currently, runV defaults to `/run/runv` and Kata Containers
defaults to `/var/run/kata-containers`.
Now, to upgrade from runV you need to fresh install Kata Containers by following one of
the [installation guides](install).
Since Kata Containers 2.x introduces a number of new options and changes
some default values, we recommend that you disable the local configuration
file (by moving or renaming it) until you have reviewed the changes to the
official configuration file and applied them to your local file if required.
# Upgrade Kata Containers
## Upgrade native distribution packaged version
As shown in the
[installation instructions](install),
Kata Containers provide binaries for popular distributions in their native
packaging formats. This allows Kata Containers to be upgraded using the
standard package management tools for your distribution.
# Appendices
> **Note:**
>
> Users should prefer the distribution packaged version of Kata Containers
> unless they understand the implications of a manual installation.
## Assets
## Static installation
Kata Containers requires additional resources to create a virtual machine
container. These resources are called
[Kata Containers assets](./design/architecture.md#assets),
which comprise a guest kernel and a root filesystem or initrd image. This
section describes when these components are updated.
> **Note:**
>
> Unless you are an advanced user, if you are using a static installation of
> Kata Containers, we recommend you remove it and install a
> [native distribution packaged version](#upgrade-native-distribution-packaged-version)
> instead.
Since the official assets are packaged, they are automatically upgraded when
new package versions are published.
### Determine if you are using a static installation
> **Warning**: Note that if you use custom assets (by modifying the
> [Kata Runtime configuration > file](../src/runtime/README.md#configuration)),
> it is your responsibility to ensure they are updated as necessary.
### Guest kernel
The `kata-linux-container` package contains a Linux\* kernel based on the
latest vanilla version of the
[long-term kernel](https://www.kernel.org/)
plus a small number of
[patches](../tools/packaging/kernel).
The `Longterm` branch is only updated with
[important bug fixes](https://www.kernel.org/category/releases.html)
meaning this package is only updated when necessary.
The guest kernel package is updated when a new long-term kernel is released
and when any patch updates are required.
### Image
The `kata-containers-image` package is updated only when critical updates are
available for the packages used to create it, such as:
- systemd
- [Kata Containers Agent](../src/agent)
### Determining asset versions
To see which versions of the assets being used:
If the following command displays the output "static", you are using a static
version of Kata Containers:
```bash
$ ls /opt/kata/bin/kata-runtime &>/dev/null && echo static
```
$ kata-runtime kata-env
```
### Remove a static installation
Static installations are installed in `/opt/kata/`, so to uninstall simply
remove this directory.
### Upgrade a static installation
If you understand the implications of using a static installation, to upgrade
first
[remove the existing static installation](#remove-a-static-installation), then
[install the latest release](#determine-latest-version).
See the
[manual installation installation documentation](install/README.md#manual-installation)
for details on how to automatically install and configuration a static release
with containerd.
# Custom assets
> **Note:**
>
> This section only applies to advanced users who have built their own guest
> kernel or image.
If you are using custom
[guest assets](design/architecture.md#guest-assets),
you must upgrade them to work with Kata Containers 2.x since Kata
Containers 1.x assets will **not** work.
See the following for further details:
- [Guest kernel documentation](/tools/packaging/kernel)
- [Guest image and initrd documentation](/tools/osbuilder)
The official assets are packaged meaning they are automatically included in
new releases.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 14 KiB

View File

@@ -1,72 +1,56 @@
# Kata Containers Architecture
* [Overview](#overview)
* [Virtualization](#virtualization)
* [Guest assets](#guest-assets)
* [Guest kernel](#guest-kernel)
* [Guest Image](#guest-image)
* [Root filesystem image](#root-filesystem-image)
* [Initrd image](#initrd-image)
* [Agent](#agent)
* [Runtime](#runtime)
* [Configuration](#configuration)
* [Significant OCI commands](#significant-oci-commands)
* [create](#create)
* [start](#start)
* [exec](#exec)
* [kill](#kill)
* [delete](#delete)
* [Networking](#networking)
* [Storage](#storage)
* [Kubernetes Support](#kubernetes-support)
* [Problem Statement](#problem-statement)
* [Containerd](#containerd)
* [CRI-O](#cri-o)
* [OCI Annotations](#oci-annotations)
* [Mixing VM based and namespace based runtimes](#mixing-vm-based-and-namespace-based-runtimes)
* [Appendices](#appendices)
* [DAX](#dax)
- [Kata Containers Architecture](#kata-containers-architecture)
- [Overview](#overview)
- [Virtualization](#virtualization)
- [Guest assets](#guest-assets)
- [Guest kernel](#guest-kernel)
- [Guest image](#guest-image)
- [Root filesystem image](#root-filesystem-image)
- [Initrd image](#initrd-image)
- [Agent](#agent)
- [Runtime](#runtime)
- [Configuration](#configuration)
- [Networking](#networking)
- [Network Hotplug](#network-hotplug)
- [Storage](#storage)
- [Kubernetes support](#kubernetes-support)
- [OCI annotations](#oci-annotations)
- [Mixing VM based and namespace based runtimes](#mixing-vm-based-and-namespace-based-runtimes)
- [Appendices](#appendices)
- [DAX](#dax)
## Overview
This is an architectural overview of Kata Containers, based on the 1.5.0 release.
This is an architectural overview of Kata Containers, based on the 2.0 release.
The two primary deliverables of the Kata Containers project are a container runtime
and a CRI friendly shim. There is also a CRI friendly library API behind them.
The primary deliverable of the Kata Containers project is a CRI friendly shim. There is also a CRI friendly library API behind them.
The [Kata Containers runtime (`kata-runtime`)](../../src/runtime)
The [Kata Containers runtime](../../src/runtime)
is compatible with the [OCI](https://github.com/opencontainers) [runtime specification](https://github.com/opencontainers/runtime-spec)
and therefore works seamlessly with the
[Docker\* Engine](https://www.docker.com/products/docker-engine) pluggable runtime
architecture. It also supports the [Kubernetes\* Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-node/container-runtime-interface.md)
and therefore works seamlessly with the [Kubernetes\* Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-node/container-runtime-interface.md)
through the [CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and
[Containerd CRI Plugin\*](https://github.com/containerd/cri) implementation. In other words, you can transparently
select between the [default Docker and CRI shim runtime (runc)](https://github.com/opencontainers/runc)
and `kata-runtime`.
[Containerd\*](https://github.com/containerd/containerd) implementation.
`kata-runtime` creates a QEMU\*/KVM virtual machine for each container or pod,
the Docker engine or `kubelet` (Kubernetes) creates respectively.
![Docker and Kata Containers](arch-images/docker-kata.png)
Kata Containers creates a QEMU\*/KVM virtual machine for pod that `kubelet` (Kubernetes) creates respectively.
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/containerd-shim-v2)
is another Kata Containers entrypoint, which
is the Kata Containers entrypoint, which
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI compatible containers with one shim (the `shimv2`) per Pod instead
of `2N+1` shims (a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself), and no standalone
`kata-proxy` process even if no VSOCK is available.
Before `shimv2` (as done in [Kata Containers 1.x releases](https://github.com/kata-containers/runtime/releases)), we need to create a `containerd-shim` and a [`kata-shim`](https://github.com/kata-containers/shim) for each container and the Pod sandbox itself, plus an optional [`kata-proxy`](https://github.com/kata-containers/proxy) when VSOCK is not available. With `shimv2`, Kubernetes can launch Pod and OCI compatible containers with one shim (the `shimv2`) per Pod instead of `2N+1` shims, and no standalone `kata-proxy` process even if no VSOCK is available.
![Kubernetes integration with shimv2](arch-images/shimv2.svg)
The container process is then spawned by
[agent](../../src/agent), an agent process running
as a daemon inside the virtual machine. `kata-agent` runs a gRPC server in
[`kata-agent`](../../src/agent), an agent process running
as a daemon inside the virtual machine. `kata-agent` runs a [`ttRPC`](https://github.com/containerd/ttrpc-rust) server in
the guest using a VIRTIO serial or VSOCK interface which QEMU exposes as a socket
file on the host. `kata-runtime` uses a gRPC protocol to communicate with
file on the host. `shimv2` uses a `ttRPC` protocol to communicate with
the agent. This protocol allows the runtime to send container management
commands to the agent. The protocol is also used to carry the I/O streams (stdout,
stderr, stdin) between the containers and the manage engines (e.g. Docker Engine).
stderr, stdin) between the containers and the manage engines (e.g. CRI-O or containerd).
For any given container, both the init process and all potentially executed
commands within that container, together with their related I/O streams, need
@@ -111,7 +95,7 @@ The only services running in the context of the mini O/S are the init daemon
is created using libcontainer, creating a container in the same manner that is done
by `runc`.
For example, when `docker run -ti ubuntu date` is run:
For example, when `ctr run -ti ubuntu date` is run:
- The hypervisor will boot the mini-OS image using the guest kernel.
- `systemd`, running inside the mini-OS context, will launch the `kata-agent` in
@@ -130,170 +114,37 @@ The only service running in the context of the initrd is the [Agent](#agent) as
## Agent
[`kata-agent`](../../src/agent) is a process running in the
guest as a supervisor for managing containers and processes running within
those containers.
[`kata-agent`](../../src/agent) is a process running in the guest as a supervisor for managing containers and processes running within those containers.
The `kata-agent` execution unit is the sandbox. A `kata-agent` sandbox is a container sandbox defined by a set of namespaces (NS, UTS, IPC and PID). `kata-runtime` can
For the 2.0 release, the `kata-agent` is rewritten in the [RUST programming language](https://www.rust-lang.org/) so that we can minimize its memory footprint while keeping the memory safety of the original GO version of [`kata-agent` used in Kata Container 1.x](https://github.com/kata-containers/agent). This memory footprint reduction is pretty impressive, from tens of megabytes down to less than 100 kilobytes, enabling Kata Containers in more use cases like functional computing and edge computing.
The `kata-agent` execution unit is the sandbox. A `kata-agent` sandbox is a container sandbox defined by a set of namespaces (NS, UTS, IPC and PID). `shimv2` can
run several containers per VM to support container engines that require multiple
containers running inside a pod. In the case of docker, `kata-runtime` creates a
single container per pod.
containers running inside a pod.
`kata-agent` communicates with the other Kata components over ttRPC.
### Agent gRPC protocol
placeholder
`kata-agent` communicates with the other Kata components over `ttRPC`.
## Runtime
`kata-runtime` is an OCI compatible container runtime and is responsible for handling
all commands specified by
[the OCI runtime specification](https://github.com/opencontainers/runtime-spec)
and launching `kata-shim` instances.
`containerd-shim-kata-v2` is a [containerd runtime shimv2](https://github.com/containerd/containerd/blob/v1.4.1/runtime/v2/README.md) implementation and is responsible for handling the `runtime v2 shim APIs`, which is similar to [the OCI runtime specification](https://github.com/opencontainers/runtime-spec) but simplifies the architecture by loading the runtime once and making RPC calls to handle the various container lifecycle commands. This refinement is an improvement on the OCI specification which requires the container manager call the runtime binary multiple times, at least once for each lifecycle command.
`kata-runtime` heavily utilizes the
[virtcontainers project](https://github.com/containers/virtcontainers), which
provides a generic, runtime-specification agnostic, hardware-virtualized containers
library.
`containerd-shim-kata-v2` heavily utilizes the
[virtcontainers package](../../src/runtime/virtcontainers/), which provides a generic, runtime-specification agnostic, hardware-virtualized containers library.
### Configuration
The runtime uses a TOML format configuration file called `configuration.toml`. By
default this file is installed in the `/usr/share/defaults/kata-containers`
directory and contains various settings such as the paths to the hypervisor,
the guest kernel and the mini-OS image.
The runtime uses a TOML format configuration file called `configuration.toml`. By default this file is installed in the `/usr/share/defaults/kata-containers` directory and contains various settings such as the paths to the hypervisor, the guest kernel and the mini-OS image.
The actual configuration file paths can be determined by running:
```
$ kata-runtime --kata-show-default-config-paths
```
Most users will not need to modify the configuration file.
The file is well commented and provides a few "knobs" that can be used to modify
the behavior of the runtime.
The file is well commented and provides a few "knobs" that can be used to modify the behavior of the runtime and your chosen hypervisor.
The configuration file is also used to enable runtime [debug output](../Developer-Guide.md#enable-full-debug).
### Significant OCI commands
Here we describe how `kata-runtime` handles the most important OCI commands.
#### `create`
When handling the OCI
[`create`](https://github.com/kata-containers/runtime/blob/master/cli/create.go)
command, `kata-runtime` goes through the following steps:
1. Create the network namespace where we will spawn VM and shims processes.
2. Call into the pre-start hooks. One of them should be responsible for creating
the `veth` network pair between the host network namespace and the network namespace
freshly created.
3. Scan the network from the new network namespace, and create a MACVTAP connection
between the `veth` interface and a `tap` interface into the VM.
4. Start the VM inside the network namespace by providing the `tap` interface
previously created.
5. Wait for the VM to be ready.
6. Start `kata-proxy`, which will connect to the created VM. The `kata-proxy` process
will take care of proxying all communications with the VM. Kata has a single proxy
per VM.
7. Communicate with `kata-agent` (through the proxy) to configure the sandbox
inside the VM.
8. Communicate with `kata-agent` to create the container, relying on the OCI
configuration file `config.json` initially provided to `kata-runtime`. This
spawns the container process inside the VM, leveraging the `libcontainer` package.
9. Start `kata-shim`, which will connect to the gRPC server socket provided by the `kata-proxy`. `kata-shim` will spawn a few Go routines to parallelize blocking calls `ReadStdout()` , `ReadStderr()` and `WaitProcess()`. Both `ReadStdout()` and `ReadStderr()` are run through infinite loops since `kata-shim` wants the output of those until the container process terminates. `WaitProcess()` is a unique call which returns the exit code of the container process when it terminates inside the VM. Note that `kata-shim` is started inside the network namespace, to allow upper layers to determine which network namespace has been created and by checking the `kata-shim` process. It also creates a new PID namespace by entering into it. This ensures that all `kata-shim` processes belonging to the same container will get killed when the `kata-shim` representing the container process terminates.
At this point the container process is running inside of the VM, and it is represented
on the host system by the `kata-shim` process.
![`kata-oci-create`](arch-images/kata-oci-create.svg)
#### `start`
With traditional containers, [`start`](https://github.com/kata-containers/runtime/blob/master/cli/start.go) launches a container process in its own set of namespaces. With Kata Containers, the main task of `kata-runtime` is to ask [`kata-agent`](#agent) to start the container workload inside the virtual machine. `kata-runtime` will run through the following steps:
1. Communicate with `kata-agent` (through the proxy) to start the container workload
inside the VM. If, for example, the command to execute inside of the container is `top`,
the `kata-shim`'s `ReadStdOut()` will start returning text output for top, and
`WaitProcess()` will continue to block as long as the `top` process runs.
2. Call into the post-start hooks. Usually, this is a no-op since nothing is provided
(this needs clarification)
![`kata-oci-start`](arch-images/kata-oci-start.svg)
#### `exec`
OCI [`exec`](https://github.com/kata-containers/runtime/blob/master/cli/exec.go) allows you to run an additional command within an already running
container. In Kata Containers, this is handled as follows:
1. A request is sent to the `kata agent` (through the proxy) to start a new process
inside an existing container running within the VM.
2. A new `kata-shim` is created within the same network and PID namespaces as the
original `kata-shim` representing the container process. This new `kata-shim` is
used for the new exec process.
Now the process started with `exec` is running within the VM, sharing `uts`, `pid`, `mnt` and `ipc` namespaces with the container process.
![`kata-oci-exec`](arch-images/kata-oci-exec.svg)
#### `kill`
When sending the OCI [`kill`](https://github.com/kata-containers/runtime/blob/master/cli/kill.go) command, the container runtime should send a
[UNIX signal](https://en.wikipedia.org/wiki/Unix_signal) to the container process.
A `kill` sending a termination signal such as `SIGKILL` or `SIGTERM` is expected
to terminate the container process. In the context of a traditional container,
this means stopping the container. For `kata-runtime`, this translates to stopping
the container and the VM associated with it.
1. Send a request to kill the container process to the `kata-agent` (through the proxy).
2. Wait for `kata-shim` process to exit.
3. Force kill the container process if `kata-shim` process didn't return after a
timeout. This is done by communicating with `kata-agent` (connecting the proxy),
sending `SIGKILL` signal to the container process inside the VM.
4. Wait for `kata-shim` process to exit, and return an error if we reach the
timeout again.
5. Communicate with `kata-agent` (through the proxy) to remove the container
configuration from the VM.
6. Communicate with `kata-agent` (through the proxy) to destroy the sandbox
configuration from the VM.
7. Stop the VM.
8. Remove all network configurations inside the network namespace and delete the
namespace.
9. Execute post-stop hooks.
If `kill` was invoked with a non-termination signal, this simply signals the container process. Otherwise, everything has been torn down, and the VM has been removed.
#### `delete`
[`delete`](https://github.com/kata-containers/runtime/blob/master/cli/delete.go) removes all internal resources related to a container. A running container
cannot be deleted unless the OCI runtime is explicitly being asked to, by using
`--force` flag.
If the sandbox is not stopped, but the particular container process returned on
its own already, the `kata-runtime` will first go through most of the steps a `kill`
would go through for a termination signal. After this process, or if the `sandboxID` was already stopped to begin with, then `kata-runtime` will:
1. Remove container resources. Every file kept under `/var/{lib,run}/virtcontainers/sandboxes/<sandboxID>/<containerID>`.
2. Remove sandbox resources. Every file kept under `/var/{lib,run}/virtcontainers/sandboxes/<sandboxID>`.
At this point, everything related to the container should have been removed from the host system, and no related process should be running.
#### `state`
[`state`](https://github.com/kata-containers/runtime/blob/master/cli/state.go)
returns the status of the container. For `kata-runtime`, this means being
able to detect if the container is still running by looking at the state of `kata-shim`
process representing this container process.
1. Ask the container status by checking information stored on disk. (clarification needed)
2. Check `kata-shim` process representing the container.
3. In case the container status on disk was supposed to be `ready` or `running`,
and the `kata-shim` process no longer exists, this involves the detection of a
stopped container. This means that before returning the container status,
the container has to be properly stopped. Here are the steps involved in this detection:
1. Wait for `kata-shim` process to exit.
2. Force kill the container process if `kata-shim` process didn't return after a timeout. This is done by communicating with `kata-agent` (connecting the proxy), sending `SIGKILL` signal to the container process inside the VM.
3. Wait for `kata-shim` process to exit, and return an error if we reach the timeout again.
4. Communicate with `kata-agent` (connecting the proxy) to remove the container configuration from the VM.
4. Return container status.
## Networking
Containers will typically live in their own, possibly shared, networking namespace.
@@ -305,66 +156,31 @@ In order to do so, container engines will usually add one end of a virtual
ethernet (`veth`) pair into the container networking namespace. The other end of
the `veth` pair is added to the host networking namespace.
This is a very namespace-centric approach as many hypervisors (in particular QEMU)
cannot handle `veth` interfaces. Typically, `TAP` interfaces are created for VM
connectivity.
This is a very namespace-centric approach as many hypervisors/VMMs cannot handle `veth`
interfaces. Typically, `TAP` interfaces are created for VM connectivity.
To overcome incompatibility between typical container engines expectations
and virtual machines, `kata-runtime` networking transparently connects `veth`
interfaces with `TAP` ones using MACVTAP:
and virtual machines, Kata Containers networking transparently connects `veth`
interfaces with `TAP` ones using Traffic Control:
![Kata Containers networking](arch-images/network.png)
With a TC filter in place, a redirection is created between the container network and the
virtual machine. As an example, the CNI may create a device, `eth0`, in the container's network
namespace, which is a VETH device. Kata Containers will create a tap device for the VM, `tap0_kata`,
and setup a TC redirection filter to mirror traffic from `eth0`'s ingress to `tap0_kata`'s egress,
and a second to mirror traffic from `tap0_kata`'s ingress to `eth0`'s egress.
Kata Containers maintains support for MACVTAP, which was an earlier implementation used in Kata. TC-filter
is the default because it allows for simpler configuration, better CNI plugin compatibility, and performance
on par with MACVTAP.
Kata Containers has deprecated support for bridge due to lacking performance relative to TC-filter and MACVTAP.
Kata Containers supports both
[CNM](https://github.com/docker/libnetwork/blob/master/docs/design.md#the-container-network-model)
and [CNI](https://github.com/containernetworking/cni) for networking management.
### CNM
![High-level CNM Diagram](arch-images/CNM_overall_diagram.png)
__CNM lifecycle__
1. `RequestPool`
2. `CreateNetwork`
3. `RequestAddress`
4. `CreateEndPoint`
5. `CreateContainer`
6. Create `config.json`
7. Create PID and network namespace
8. `ProcessExternalKey`
9. `JoinEndPoint`
10. `LaunchContainer`
11. Launch
12. Run container
![Detailed CNM Diagram](arch-images/CNM_detailed_diagram.png)
__Runtime network setup with CNM__
1. Read `config.json`
2. Create the network namespace
3. Call the `prestart` hook (from inside the netns)
4. Scan network interfaces inside netns and get the name of the interface
created by prestart hook
5. Create bridge, TAP, and link all together with network interface previously
created
### Network Hotplug
Kata Containers has developed a set of network sub-commands and APIs to add, list and
@@ -375,35 +191,14 @@ The following diagram illustrates the Kata Containers network hotplug workflow.
![Network Hotplug](arch-images/kata-containers-network-hotplug.png)
## Storage
Container workloads are shared with the virtualized environment through [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt).
The devicemapper storage driver is a special case. The driver uses dedicated block
devices rather than formatted filesystems, and operates at the block level rather
than the file level. This knowledge is used to directly use the underlying block
device instead of the overlay file system for the container root file system. The
block device maps to the top read-write layer for the overlay. This approach gives
much better I/O performance compared to using 9pfs to share the container file system.
Container workloads are shared with the virtualized environment through [virtio-fs](https://virtio-fs.gitlab.io/).
The approach above does introduce a limitation in terms of dynamic file copy
in/out of the container using the `docker cp` operations. The copy operation from
host to container accesses the mounted file system on the host-side. This is
not expected to work and may lead to inconsistencies as the block device will
be simultaneously written to from two different mounts. The copy operation from
container to host will work, provided the user calls `sync(1)` from within the
container prior to the copy to make sure any outstanding cached data is written
to the block device.
The [devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/master/snapshots/devmapper) is a special case. The `snapshotter` uses dedicated block devices rather than formatted filesystems, and operates at the block level rather than the file level. This knowledge is used to directly use the underlying block device instead of the overlay file system for the container root file system. The block device maps to the top read-write layer for the overlay. This approach gives much better I/O performance compared to using `virtio-fs` to share the container file system.
```
docker cp [OPTIONS] CONTAINER:SRC_PATH HOST:DEST_PATH
docker cp [OPTIONS] HOST:SRC_PATH CONTAINER:DEST_PATH
```
Kata Containers has the ability to hotplug and remove block devices, which makes it possible to use block devices for containers started after the VM has been launched.
Kata Containers has the ability to hotplug and remove block devices, which makes it
possible to use block devices for containers started after the VM has been launched.
Users can check to see if the container uses the devicemapper block device as its
rootfs by calling `mount(8)` within the container. If the devicemapper block device
is used, `/` will be mounted on `/dev/vda`. Users can disable direct mounting
of the underlying block device through the runtime configuration.
Users can check to see if the container uses the devicemapper block device as its rootfs by calling `mount(8)` within the container. If the devicemapper block device
is used, `/` will be mounted on `/dev/vda`. Users can disable direct mounting of the underlying block device through the runtime configuration.
## Kubernetes support
@@ -424,44 +219,13 @@ lifecycle management from container execution through the dedicated
In other words, a Kubelet is a CRI client and expects a CRI implementation to
handle the server side of the interface.
[CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and [Containerd CRI Plugin\*](https://github.com/containerd/cri) are CRI implementations that rely on [OCI](https://github.com/opencontainers/runtime-spec)
[CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and [Containerd\*](https://github.com/containerd/containerd/) are CRI implementations that rely on [OCI](https://github.com/opencontainers/runtime-spec)
compatible runtimes for managing container instances.
Kata Containers is an officially supported CRI-O and Containerd CRI Plugin runtime. It is OCI compatible and therefore aligns with project's architecture and requirements.
However, due to the fact that Kubernetes execution units are sets of containers (also
known as pods) rather than single containers, the Kata Containers runtime needs to
get extra information to seamlessly integrate with Kubernetes.
### Problem statement
The Kubernetes\* execution unit is a pod that has specifications detailing constraints
such as namespaces, groups, hardware resources, security contents, *etc* shared by all
the containers within that pod.
By default the Kubelet will send a container creation request to its CRI runtime for
each pod and container creation. Without additional metadata from the CRI runtime,
the Kata Containers runtime will thus create one virtual machine for each pod and for
each containers within a pod. However the task of providing the Kubernetes pod semantics
when creating one virtual machine for each container within the same pod is complex given
the resources of these virtual machines (such as networking or PID) need to be shared.
The challenge with Kata Containers when working as a Kubernetes\* runtime is thus to know
when to create a full virtual machine (for pods) and when to create a new container inside
a previously created virtual machine. In both cases it will get called with very similar
arguments, so it needs the help of the Kubernetes CRI runtime to be able to distinguish a
pod creation request from a container one.
### Containerd
As of Kata Containers 1.5, using `shimv2` with containerd 1.2.0 or above is the preferred
way to run Kata Containers with Kubernetes ([see the howto](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
The CRI-O will catch up soon ([`kubernetes-sigs/cri-o#2024`](https://github.com/kubernetes-sigs/cri-o/issues/2024)).
Refer to the following how-to guides:
Kata Containers is an officially supported CRI-O and Containerd runtime. Refer to the following guides on how to set up Kata Containers with Kubernetes:
- [How to use Kata Containers and Containerd](../how-to/containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)
### CRI-O
- [Run Kata Containers with Kubernetes](../how-to/run-kata-with-k8s.md)
#### OCI annotations
@@ -506,36 +270,10 @@ with a Kubernetes pod:
#### Mixing VM based and namespace based runtimes
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](../how-to/containerd-kata.md#kubernetes-runtimeclass)
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/)
> has been supported and the user can specify runtime without the non-standardized annotations.
One interesting evolution of the CRI-O support for `kata-runtime` is the ability
to run virtual machine based pods alongside namespace ones. With CRI-O and Kata
Containers, one can introduce the concept of workload trust inside a Kubernetes
cluster.
A cluster operator can now tag (through Kubernetes annotations) container workloads
as `trusted` or `untrusted`. The former labels known to be safe workloads while
the latter describes potentially malicious or misbehaving workloads that need the
highest degree of isolation. In a software development context, an example of a `trusted` workload would be a containerized continuous integration engine whereas all
developers applications would be `untrusted` by default. Developers workloads can
be buggy, unstable or even include malicious code and thus from a security perspective
it makes sense to tag them as `untrusted`. A CRI-O and Kata Containers based
Kubernetes cluster handles this use case transparently as long as the deployed
containers are properly tagged. All `untrusted` containers will be handled by Kata Containers and thus run in a hardware virtualized secure sandbox while `runc`, for
example, could handle the `trusted` ones.
CRI-O's default behavior is to trust all pods, except when they're annotated with
`io.kubernetes.cri-o.TrustedSandbox` set to `false`. The default CRI-O trust level
is set through its `configuration.toml` configuration file. Generally speaking,
the CRI-O runtime selection between its trusted runtime (typically `runc`) and its untrusted one (`kata-runtime`) is a function of the pod `Privileged` setting, the `io.kubernetes.cri-o.TrustedSandbox` annotation value, and the default CRI-O trust
level. When a pod is `Privileged`, the runtime will always be `runc`. However, when
a pod is **not** `Privileged` the runtime selection is done as follows:
| | `io.kubernetes.cri-o.TrustedSandbox` not set | `io.kubernetes.cri-o.TrustedSandbox` = `true` | `io.kubernetes.cri-o.TrustedSandbox` = `false` |
| :--- | :---: | :---: | :---: |
| Default CRI-O trust level: `trusted` | runc | runc | Kata Containers |
| Default CRI-O trust level: `untrusted` | Kata Containers | Kata Containers | Kata Containers |
With `RuntimeClass`, users can define Kata Containers as a `RuntimeClass` and then explicitly specify that a pod being created as a Kata Containers pod. For details, please refer to [How to use Kata Containers and Containerd](../../docs/how-to/containerd-kata.md).
# Appendices

View File

@@ -220,6 +220,566 @@ components:
fixed: false
values: []
since: 2.0.0
- prefix: kata_firecracker
title: Firecracker vmm metrics
desc: Metrics for Firecracker vmm
metrics:
- name: kata_firecracker_api_server
type: GAUGE
unit: ""
help: Metrics related to the internal API server.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: process_startup_time_cpu_us
desc: ""
- value: process_startup_time_us
desc: ""
- value: sync_response_fails
desc: ""
- value: sync_vmm_send_timeout_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_block
type: GAUGE
unit: ""
help: Block Device associated metrics.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: activate_fails
desc: ""
- value: cfg_fails
desc: ""
- value: event_fails
desc: ""
- value: execute_fails
desc: ""
- value: flush_count
desc: ""
- value: invalid_reqs_count
desc: ""
- value: no_avail_buffer
desc: ""
- value: queue_event_count
desc: ""
- value: rate_limiter_event_count
desc: ""
- value: rate_limiter_throttled_events
desc: ""
- value: read_bytes
desc: ""
- value: read_count
desc: ""
- value: update_count
desc: ""
- value: update_fails
desc: ""
- value: write_bytes
desc: ""
- value: write_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_get_api_requests
type: GAUGE
unit: ""
help: Metrics specific to GET API Requests for counting user triggered actions and/or failures.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: instance_info_count
desc: ""
- value: instance_info_fails
desc: ""
- value: machine_cfg_count
desc: ""
- value: machine_cfg_fails
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_i8042
type: GAUGE
unit: ""
help: Metrics specific to the i8042 device.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: error_count
desc: ""
- value: missed_read_count
desc: ""
- value: missed_write_count
desc: ""
- value: read_count
desc: ""
- value: reset_count
desc: ""
- value: write_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_latencies_us
type: GAUGE
unit: ""
help: Performance metrics related for the moment only to snapshots.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: diff_create_snapshot
desc: ""
- value: full_create_snapshot
desc: ""
- value: load_snapshot
desc: ""
- value: pause_vm
desc: ""
- value: resume_vm
desc: ""
- value: vmm_diff_create_snapshot
desc: ""
- value: vmm_full_create_snapshot
desc: ""
- value: vmm_load_snapshot
desc: ""
- value: vmm_pause_vm
desc: ""
- value: vmm_resume_vm
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_logger
type: GAUGE
unit: ""
help: Metrics for the logging subsystem.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: log_fails
desc: ""
- value: metrics_fails
desc: ""
- value: missed_log_count
desc: ""
- value: missed_metrics_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_mmds
type: GAUGE
unit: ""
help: Metrics for the MMDS functionality.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: connections_created
desc: ""
- value: connections_destroyed
desc: ""
- value: rx_accepted
desc: ""
- value: rx_accepted_err
desc: ""
- value: rx_accepted_unusual
desc: ""
- value: rx_bad_eth
desc: ""
- value: rx_count
desc: ""
- value: tx_bytes
desc: ""
- value: tx_count
desc: ""
- value: tx_errors
desc: ""
- value: tx_frames
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_net
type: GAUGE
unit: ""
help: Network-related metrics.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: activate_fails
desc: ""
- value: cfg_fails
desc: ""
- value: event_fails
desc: ""
- value: mac_address_updates
desc: ""
- value: no_rx_avail_buffer
desc: ""
- value: no_tx_avail_buffer
desc: ""
- value: rx_bytes_count
desc: ""
- value: rx_count
desc: ""
- value: rx_event_rate_limiter_count
desc: ""
- value: rx_fails
desc: ""
- value: rx_packets_count
desc: ""
- value: rx_partial_writes
desc: ""
- value: rx_queue_event_count
desc: ""
- value: rx_rate_limiter_throttled
desc: ""
- value: rx_tap_event_count
desc: ""
- value: tap_read_fails
desc: ""
- value: tap_write_fails
desc: ""
- value: tx_bytes_count
desc: ""
- value: tx_count
desc: ""
- value: tx_fails
desc: ""
- value: tx_malformed_frames
desc: ""
- value: tx_packets_count
desc: ""
- value: tx_partial_reads
desc: ""
- value: tx_queue_event_count
desc: ""
- value: tx_rate_limiter_event_count
desc: ""
- value: tx_rate_limiter_throttled
desc: ""
- value: tx_spoofed_mac_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_patch_api_requests
type: GAUGE
unit: ""
help: Metrics specific to PATCH API Requests for counting user triggered actions and/or failures.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: drive_count
desc: ""
- value: drive_fails
desc: ""
- value: machine_cfg_count
desc: ""
- value: machine_cfg_fails
desc: ""
- value: network_count
desc: ""
- value: network_fails
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_put_api_requests
type: GAUGE
unit: ""
help: Metrics specific to PUT API Requests for counting user triggered actions and/or failures.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: actions_count
desc: ""
- value: actions_fails
desc: ""
- value: boot_source_count
desc: ""
- value: boot_source_fails
desc: ""
- value: drive_count
desc: ""
- value: drive_fails
desc: ""
- value: logger_count
desc: ""
- value: logger_fails
desc: ""
- value: machine_cfg_count
desc: ""
- value: machine_cfg_fails
desc: ""
- value: metrics_count
desc: ""
- value: metrics_fails
desc: ""
- value: network_count
desc: ""
- value: network_fails
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_rtc
type: GAUGE
unit: ""
help: Metrics specific to the RTC device.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: error_count
desc: ""
- value: missed_read_count
desc: ""
- value: missed_write_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_seccomp
type: GAUGE
unit: ""
help: Metrics for the seccomp filtering.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: num_faults
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_signals
type: GAUGE
unit: ""
help: Metrics related to signals.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: sigbus
desc: ""
- value: sigsegv
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_uart
type: GAUGE
unit: ""
help: Metrics specific to the UART device.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: error_count
desc: ""
- value: flush_count
desc: ""
- value: missed_read_count
desc: ""
- value: missed_write_count
desc: ""
- value: read_count
desc: ""
- value: write_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_vcpu
type: GAUGE
unit: ""
help: Metrics specific to VCPUs' mode of functioning.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: exit_io_in
desc: ""
- value: exit_io_out
desc: ""
- value: exit_mmio_read
desc: ""
- value: exit_mmio_write
desc: ""
- value: failures
desc: ""
- value: filter_cpuid
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_vmm
type: GAUGE
unit: ""
help: Metrics specific to the machine manager as a whole.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: device_events
desc: ""
- value: panic_count
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- name: kata_firecracker_vsock
type: GAUGE
unit: ""
help: Vsock-related metrics.
labels:
- name: item
desc: ""
manually_edit: false
fixed: true
values:
- value: activate_fails
desc: ""
- value: cfg_fails
desc: ""
- value: conn_event_fails
desc: ""
- value: conns_added
desc: ""
- value: conns_killed
desc: ""
- value: conns_removed
desc: ""
- value: ev_queue_event_fails
desc: ""
- value: killq_resync
desc: ""
- value: muxer_event_fails
desc: ""
- value: rx_bytes_count
desc: ""
- value: rx_packets_count
desc: ""
- value: rx_queue_event_count
desc: ""
- value: rx_queue_event_fails
desc: ""
- value: rx_read_fails
desc: ""
- value: tx_bytes_count
desc: ""
- value: tx_flush_fails
desc: ""
- value: tx_packets_count
desc: ""
- value: tx_queue_event_count
desc: ""
- value: tx_queue_event_fails
desc: ""
- value: tx_write_fails
desc: ""
- name: sandbox_id
desc: ""
manually_edit: false
fixed: false
values: []
since: 2.0.0
- prefix: kata_guest
title: Kata guest OS metrics
desc: Guest OS's metrics in hypervisor.

View File

@@ -9,6 +9,7 @@
* [Metrics list](#metrics-list)
* [Metric types](#metric-types)
* [Kata agent metrics](#kata-agent-metrics)
* [Firecracker metrics](#firecracker-metrics)
* [Kata guest OS metrics](#kata-guest-os-metrics)
* [Hypervisor metrics](#hypervisor-metrics)
* [Kata monitor metrics](#kata-monitor-metrics)
@@ -152,6 +153,7 @@ Metrics is categorized by component where metrics are collected from and for.
* [Metric types](#metric-types)
* [Kata agent metrics](#kata-agent-metrics)
* [Firecracker metrics](#firecracker-metrics)
* [Kata guest OS metrics](#kata-guest-os-metrics)
* [Hypervisor metrics](#hypervisor-metrics)
* [Kata monitor metrics](#kata-monitor-metrics)
@@ -198,6 +200,30 @@ Agent's metrics contains metrics about agent process.
| `kata_agent_total_time`: <br> Agent process total time | `GAUGE` | | <ul><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_agent_total_vm`: <br> Agent process total `vm` size | `GAUGE` | | <ul><li>`sandbox_id`</li></ul> | 2.0.0 |
### Firecracker metrics
Metrics for Firecracker vmm.
| Metric name | Type | Units | Labels | Introduced in Kata version |
|---|---|---|---|---|
| `kata_firecracker_api_server`: <br> Metrics related to the internal API server. | `GAUGE` | | <ul><li>`item`<ul><li>`process_startup_time_cpu_us`</li><li>`process_startup_time_us`</li><li>`sync_response_fails`</li><li>`sync_vmm_send_timeout_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_block`: <br> Block Device associated metrics. | `GAUGE` | | <ul><li>`item`<ul><li>`activate_fails`</li><li>`cfg_fails`</li><li>`event_fails`</li><li>`execute_fails`</li><li>`flush_count`</li><li>`invalid_reqs_count`</li><li>`no_avail_buffer`</li><li>`queue_event_count`</li><li>`rate_limiter_event_count`</li><li>`rate_limiter_throttled_events`</li><li>`read_bytes`</li><li>`read_count`</li><li>`update_count`</li><li>`update_fails`</li><li>`write_bytes`</li><li>`write_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_get_api_requests`: <br> Metrics specific to GET API Requests for counting user triggered actions and/or failures. | `GAUGE` | | <ul><li>`item`<ul><li>`instance_info_count`</li><li>`instance_info_fails`</li><li>`machine_cfg_count`</li><li>`machine_cfg_fails`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_i8042`: <br> Metrics specific to the i8042 device. | `GAUGE` | | <ul><li>`item`<ul><li>`error_count`</li><li>`missed_read_count`</li><li>`missed_write_count`</li><li>`read_count`</li><li>`reset_count`</li><li>`write_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_latencies_us`: <br> Performance metrics related for the moment only to snapshots. | `GAUGE` | | <ul><li>`item`<ul><li>`diff_create_snapshot`</li><li>`full_create_snapshot`</li><li>`load_snapshot`</li><li>`pause_vm`</li><li>`resume_vm`</li><li>`vmm_diff_create_snapshot`</li><li>`vmm_full_create_snapshot`</li><li>`vmm_load_snapshot`</li><li>`vmm_pause_vm`</li><li>`vmm_resume_vm`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_logger`: <br> Metrics for the logging subsystem. | `GAUGE` | | <ul><li>`item`<ul><li>`log_fails`</li><li>`metrics_fails`</li><li>`missed_log_count`</li><li>`missed_metrics_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_mmds`: <br> Metrics for the MMDS functionality. | `GAUGE` | | <ul><li>`item`<ul><li>`connections_created`</li><li>`connections_destroyed`</li><li>`rx_accepted`</li><li>`rx_accepted_err`</li><li>`rx_accepted_unusual`</li><li>`rx_bad_eth`</li><li>`rx_count`</li><li>`tx_bytes`</li><li>`tx_count`</li><li>`tx_errors`</li><li>`tx_frames`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_net`: <br> Network-related metrics. | `GAUGE` | | <ul><li>`item`<ul><li>`activate_fails`</li><li>`cfg_fails`</li><li>`event_fails`</li><li>`mac_address_updates`</li><li>`no_rx_avail_buffer`</li><li>`no_tx_avail_buffer`</li><li>`rx_bytes_count`</li><li>`rx_count`</li><li>`rx_event_rate_limiter_count`</li><li>`rx_fails`</li><li>`rx_packets_count`</li><li>`rx_partial_writes`</li><li>`rx_queue_event_count`</li><li>`rx_rate_limiter_throttled`</li><li>`rx_tap_event_count`</li><li>`tap_read_fails`</li><li>`tap_write_fails`</li><li>`tx_bytes_count`</li><li>`tx_count`</li><li>`tx_fails`</li><li>`tx_malformed_frames`</li><li>`tx_packets_count`</li><li>`tx_partial_reads`</li><li>`tx_queue_event_count`</li><li>`tx_rate_limiter_event_count`</li><li>`tx_rate_limiter_throttled`</li><li>`tx_spoofed_mac_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_patch_api_requests`: <br> Metrics specific to PATCH API Requests for counting user triggered actions and/or failures. | `GAUGE` | | <ul><li>`item`<ul><li>`drive_count`</li><li>`drive_fails`</li><li>`machine_cfg_count`</li><li>`machine_cfg_fails`</li><li>`network_count`</li><li>`network_fails`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_put_api_requests`: <br> Metrics specific to PUT API Requests for counting user triggered actions and/or failures. | `GAUGE` | | <ul><li>`item`<ul><li>`actions_count`</li><li>`actions_fails`</li><li>`boot_source_count`</li><li>`boot_source_fails`</li><li>`drive_count`</li><li>`drive_fails`</li><li>`logger_count`</li><li>`logger_fails`</li><li>`machine_cfg_count`</li><li>`machine_cfg_fails`</li><li>`metrics_count`</li><li>`metrics_fails`</li><li>`network_count`</li><li>`network_fails`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_rtc`: <br> Metrics specific to the RTC device. | `GAUGE` | | <ul><li>`item`<ul><li>`error_count`</li><li>`missed_read_count`</li><li>`missed_write_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_seccomp`: <br> Metrics for the seccomp filtering. | `GAUGE` | | <ul><li>`item`<ul><li>`num_faults`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_signals`: <br> Metrics related to signals. | `GAUGE` | | <ul><li>`item`<ul><li>`sigbus`</li><li>`sigsegv`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_uart`: <br> Metrics specific to the UART device. | `GAUGE` | | <ul><li>`item`<ul><li>`error_count`</li><li>`flush_count`</li><li>`missed_read_count`</li><li>`missed_write_count`</li><li>`read_count`</li><li>`write_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vcpu`: <br> Metrics specific to VCPUs' mode of functioning. | `GAUGE` | | <ul><li>`item`<ul><li>`exit_io_in`</li><li>`exit_io_out`</li><li>`exit_mmio_read`</li><li>`exit_mmio_write`</li><li>`failures`</li><li>`filter_cpuid`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vmm`: <br> Metrics specific to the machine manager as a whole. | `GAUGE` | | <ul><li>`item`<ul><li>`device_events`</li><li>`panic_count`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
| `kata_firecracker_vsock`: <br> Vsock-related metrics. | `GAUGE` | | <ul><li>`item`<ul><li>`activate_fails`</li><li>`cfg_fails`</li><li>`conn_event_fails`</li><li>`conns_added`</li><li>`conns_killed`</li><li>`conns_removed`</li><li>`ev_queue_event_fails`</li><li>`killq_resync`</li><li>`muxer_event_fails`</li><li>`rx_bytes_count`</li><li>`rx_packets_count`</li><li>`rx_queue_event_count`</li><li>`rx_queue_event_fails`</li><li>`rx_read_fails`</li><li>`tx_bytes_count`</li><li>`tx_flush_fails`</li><li>`tx_packets_count`</li><li>`tx_queue_event_count`</li><li>`tx_queue_event_fails`</li><li>`tx_write_fails`</li></ul></li><li>`sandbox_id`</li></ul> | 2.0.0 |
### Kata guest OS metrics
Guest OS's metrics in hypervisor.

View File

@@ -6,6 +6,7 @@
* [Advanced Topics](#advanced-topics)
## Kubernetes Integration
- [Run Kata containers with `crictl`](run-kata-with-crictl.md)
- [Run Kata Containers with Kubernetes](run-kata-with-k8s.md)
- [How to use Kata Containers and Containerd](containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)

View File

@@ -193,13 +193,16 @@ From Containerd v1.2.4 and Kata v1.6.0, there is a new runtime option supported,
```toml
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
privileged_without_host_devices = true
[plugins.cri.containerd.runtimes.kata.options]
ConfigPath = "/etc/kata-containers/config.toml"
```
`privileged_without_host_devices` tells containerd that a privileged Kata container should not have direct access to all host devices. If unset, containerd will pass all host devices to Kata container, which may cause security issues.
This `ConfigPath` option is optional. If you do not specify it, shimv2 first tries to get the configuration file from the environment variable `KATA_CONF_FILE`. If neither are set, shimv2 will use the default Kata configuration file paths (`/etc/kata-containers/configuration.toml` and `/usr/share/defaults/kata-containers/configuration.toml`).
If you use Containerd older than v1.2.4 or a version of Kata older than v1.6.0 and also want to specify a configuration file, you can use the following workaround, since the shimv2 accepts an environment variable, `KATA_CONF_FILE` for the configuration file path. Then, you can create a
If you use Containerd older than v1.2.4 or a version of Kata older than v1.6.0 and also want to specify a configuration file, you can use the following workaround, since the shimv2 accepts an environment variable, `KATA_CONF_FILE` for the configuration file path. Then, you can create a
shell script with the following:
```bash

View File

@@ -0,0 +1,19 @@
{
"metadata": {
"name": "busybox-container",
"namespace": "test.kata"
},
"image": {
"image": "docker.io/library/busybox:latest"
},
"command": [
"sleep",
"9999"
],
"args": [],
"working_dir": "/",
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false
}

View File

@@ -0,0 +1,20 @@
{
"metadata": {
"name": "busybox-pod",
"uid": "busybox-pod",
"namespace": "test.kata"
},
"hostname": "busybox_host",
"log_directory": "",
"dns_config": {
},
"port_mappings": [],
"resources": {
},
"labels": {
},
"annotations": {
},
"linux": {
}
}

View File

@@ -0,0 +1,39 @@
{
"metadata": {
"name": "redis-client",
"namespace": "test.kata"
},
"image": {
"image": "docker.io/library/redis:6.0.8-alpine"
},
"command": [
"tail", "-f", "/dev/null"
],
"envs": [
{
"key": "PATH",
"value": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
},
{
"key": "TERM",
"value": "xterm"
}
],
"labels": {
"tier": "backend"
},
"annotations": {
"pod": "redis-client-pod"
},
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,
"linux": {
"resources": {
"memory_limit_in_bytes": 524288000
},
"security_context": {
}
}
}

View File

@@ -0,0 +1,28 @@
{
"metadata": {
"name": "redis-client-pod",
"uid": "test-redis-client-pod",
"namespace": "test.kata"
},
"hostname": "redis-client",
"log_directory": "",
"dns_config": {
"searches": [
"8.8.8.8"
]
},
"port_mappings": [],
"resources": {
"cpu": {
"limits": 1,
"requests": 1
}
},
"labels": {
"tier": "backend"
},
"annotations": {
},
"linux": {
}
}

View File

@@ -0,0 +1,36 @@
{
"metadata": {
"name": "redis-server",
"namespace": "test.kata"
},
"image": {
"image": "docker.io/library/redis:6.0.8-alpine"
},
"envs": [
{
"key": "PATH",
"value": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
},
{
"key": "TERM",
"value": "xterm"
}
],
"labels": {
"tier": "backend"
},
"annotations": {
"pod": "redis-server-pod"
},
"log_path": "",
"stdin": false,
"stdin_once": false,
"tty": false,
"linux": {
"resources": {
"memory_limit_in_bytes": 524288000
},
"security_context": {
}
}
}

View File

@@ -0,0 +1,28 @@
{
"metadata": {
"name": "redis-server-pod",
"uid": "test-redis-server-pod",
"namespace": "test.kata"
},
"hostname": "redis-server",
"log_directory": "",
"dns_config": {
"searches": [
"8.8.8.8"
]
},
"port_mappings": [],
"resources": {
"cpu": {
"limits": 1,
"requests": 1
}
},
"labels": {
"tier": "backend"
},
"annotations": {
},
"linux": {
}
}

View File

@@ -29,7 +29,7 @@ to launch Kata Containers. For the previous version of Kata Containers, the Pods
> **Note:** For information about the supported versions of these components,
> see the Kata Containers
> [`versions.yaml`](https://github.com/kata-containers/runtime/blob/master/versions.yaml)
> [`versions.yaml`](../../versions.yaml)
> file.
## Install and configure containerd

View File

@@ -0,0 +1,150 @@
# Working with `crictl`
* [What's `cri-tools`](#whats-cri-tools)
* [Use `crictl` run Pods in Kata containers](#use-crictl-run-pods-in-kata-containers)
* [Run `busybox` Pod](#run-busybox-pod)
* [Run pod sandbox with config file](#run-pod-sandbox-with-config-file)
* [Create container in the pod sandbox with config file](#create-container-in-the-pod-sandbox-with-config-file)
* [Start container](#start-container)
* [Run `redis` Pod](#run-redis-pod)
* [Create `redis-server` Pod](#create-redis-server-pod)
* [Create `redis-client` Pod](#create-redis-client-pod)
* [Check `redis` server is working](#check-redis-server-is-working)
## What's `cri-tools`
[`cri-tools`](https://github.com/kubernetes-sigs/cri-tools) provides debugging and validation tools for Kubelet Container Runtime Interface (CRI).
`cri-tools` includes two tools: `crictl` and `critest`. `crictl` is the CLI for Kubelet CRI, in this document, we will show how to use `crictl` to run Pods in Kata containers.
> **Note:** `cri-tools` is only used for debugging and validation purpose, and don't use it to run production workloads.
> **Note:** For how to install and configure `cri-tools` with CRI runtimes like `containerd` or CRI-O, please also refer to other [howtos](./README.md).
## Use `crictl` run Pods in Kata containers
Sample config files in this document can be found [here](./data/crictl/).
### Run `busybox` Pod
#### Run pod sandbox with config file
```bash
$ sudo crictl runp -r kata sandbox_config.json
16a62b035940f9c7d79fd53e93902d15ad21f7f9b3735f1ac9f51d16539b836b
$ sudo crictl pods
POD ID CREATED STATE NAME NAMESPACE ATTEMPT
16a62b035940f 21 seconds ago Ready busybox-pod 0
```
#### Create container in the pod sandbox with config file
```bash
$ sudo crictl create 16a62b035940f container_config.json sandbox_config.json
e6ca0e0f7f532686236b8b1f549e4878e4fe32ea6b599a5d684faf168b429202
```
List containers and check the container is in `Created` state:
```bash
$ sudo crictl ps -a
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID
e6ca0e0f7f532 docker.io/library/busybox:latest 19 seconds ago Created busybox-container 0 16a62b035940f
```
#### Start container
```bash
$ sudo crictl start e6ca0e0f7f532
e6ca0e0f7f532
```
List containers and we can see that the container state has changed from `Created` to `Running`:
```bash
$ sudo crictl ps
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID
e6ca0e0f7f532 docker.io/library/busybox:latest About a minute ago Running busybox-container 0 16a62b035940f
```
And last we can `exec` into `busybox` container:
```bash
$ sudo crictl exec -it e6ca0e0f7f532 sh
```
And run commands in it:
```
/ # hostname
busybox_host
/ # id
uid=0(root) gid=0(root)
```
### Run `redis` Pod
In this example, we will create two Pods: one is for `redis` server, and another one is `redis` client.
#### Create `redis-server` Pod
It's also possible to start a container within a single command:
```bash
$ sudo crictl run -r kata redis_server_container_config.json redis_server_sandbox_config.json
bb36e05c599125842c5193909c4de186b1cee3818f5d17b951b6a0422681ce4b
```
#### Create `redis-client` Pod
```bash
$ sudo crictl run -r kata redis_client_container_config.json redis_client_sandbox_config.json
e344346c5414e3f51f97f20b2262e0b7afe457750e94dc0edb109b94622fc693
```
After the new container started, we can check the running Pods and containers.
```bash
$ sudo crictl pods
POD ID CREATED STATE NAME NAMESPACE ATTEMPT
469d08a7950e3 30 seconds ago Ready redis-client-pod 0
02c12fdb08219 About a minute ago Ready redis-server-pod 0
$ sudo crictl ps
CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID
e344346c5414e docker.io/library/redis:6.0.8-alpine 35 seconds ago Running redis-client 0 469d08a7950e3
bb36e05c59912 docker.io/library/redis:6.0.8-alpine About a minute ago Running redis-server 0 02c12fdb08219
```
#### Check `redis` server is working
To connect to the `redis-server`. First we need to get the `redis-server`'s IP address.
```bash
$ server=$(sudo crictl inspectp 02c12fdb08219 | jq .status.network.ip | tr -d '"' )
$ echo $server
172.19.0.118
```
Launch `redis-cli` in the new Pod and connect server running at `172.19.0.118`.
```bash
$ sudo crictl exec -it e344346c5414e redis-cli -h $server
172.19.0.118:6379> get test-key
(nil)
172.19.0.118:6379> set test-key test-value
OK
172.19.0.118:6379> get test-key
"test-value"
```
Then back to `redis-server`, check if the `test-key` is set in server.
```bash
$ sudo crictl exec -it bb36e05c59912 redis-cli get test-key
"test-val"
```
Returned `test-val` is just set by `redis-cli` in `redis-client` Pod.

View File

@@ -1,109 +1,82 @@
# Kata Containers installation user guides
* [Prerequisites](#prerequisites)
* [Packaged installation methods](#packaged-installation-methods)
* [Supported Distributions](#supported-distributions)
* [Official packages](#official-packages)
* [Automatic Installation](#automatic-installation)
* [Snap Installation](#snap-installation)
* [Scripted Installation](#scripted-installation)
* [Manual Installation](#manual-installation)
* [Build from source installation](#build-from-source-installation)
* [Installing on a Cloud Service Platform](#installing-on-a-cloud-service-platform)
* [Further information](#further-information)
* [Kata Containers installation user guides](#kata-containers-installation-user-guides)
* [Prerequisites](#prerequisites)
* [Legacy installation](#legacy-installation)
* [Packaged installation methods](#packaged-installation-methods)
* [Official packages](#official-packages)
* [Snap Installation](#snap-installation)
* [Automatic Installation](#automatic-installation)
* [Manual Installation](#manual-installation)
* [Build from source installation](#build-from-source-installation)
* [Installing on a Cloud Service Platform](#installing-on-a-cloud-service-platform)
* [Further information](#further-information)
The following is an overview of the different installation methods available. All of these methods equally result
in a system configured to run Kata Containers.
## Prerequisites
Kata Containers requires nested virtualization or bare metal.
See the
[hardware requirements](../../src/runtime/README.md#hardware-requirements)
[hardware requirements](/src/runtime/README.md#hardware-requirements)
to see if your system is capable of running Kata Containers.
## Legacy installation
If you wish to install a legacy 1.x version of Kata Containers, see
[the Kata Containers 1.x installation documentation](https://github.com/kata-containers/documentation/tree/master/install/).
## Packaged installation methods
> **Notes:**
>
> - Packaged installation methods uses your distribution's native package format (such as RPM or DEB).
> - You are strongly encouraged to choose an installation method that provides
> automatic updates, to ensure you benefit from security updates and bug fixes.
| Installation method | Description | Distributions supported |
|------------------------------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------|
| [Automatic](#automatic-installation) |Run a single command to install a full system |[see table](#supported-distributions) |
| [Using snap](#snap-installation) |Easy to install and automatic updates |any distro that supports snapd |
| [Using official distro packages](#official-packages) |Kata packages provided by Linux distributions official repositories |[see table](#supported-distributions) |
| [Scripted](#scripted-installation) |Generates an installation script which will result in a working system when executed |[see table](#supported-distributions) |
| [Manual](#manual-installation) |Allows the user to read a brief document and execute the specified commands step-by-step |[see table](#supported-distributions) |
| Installation method | Description | Automatic updates | Use case |
|------------------------------------------------------|---------------------------------------------------------------------|-------------------|----------------------------------------------------------|
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
| [Using snap](#snap-installation) | Easy to install | yes | Good alternative to official distro packages. |
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. |
| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. |
### Supported Distributions
Kata is packaged by the Kata community for:
|Distribution (link to installation guide) | Versions |
|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
|[CentOS](centos-installation-guide.md) | 7 |
|[Debian](debian-installation-guide.md) | 9, 10 |
|[Fedora](fedora-installation-guide.md) | 28, 29, 30 |
|[openSUSE](opensuse-installation-guide.md) | [Leap](opensuse-leap-installation-guide.md) (15, 15.1)<br>[Tumbleweed](opensuse-tumbleweed-installation-guide.md) |
|[Red Hat Enterprise Linux (RHEL)](rhel-installation-guide.md) | 7 |
|[SUSE Linux Enterprise Server (SLES)](sles-installation-guide.md)| SLES 12 SP3 |
|[Ubuntu](ubuntu-installation-guide.md) | 16.04, 18.04 |
#### Official packages
### Official packages
Kata packages are provided by official distribution repositories for:
|Distribution (link to packages) | Versions |
|-----------------------------------------------------------------|------------|
|[openSUSE](https://software.opensuse.org/package/katacontainers) | Tumbleweed |
| Distribution (link to installation guide) | Minimum versions |
|----------------------------------------------------------|--------------------------------------------------------------------------------|
| [CentOS](centos-installation-guide.md) | 8 |
| [Fedora](fedora-installation-guide.md) | 32, Rawhide |
| [openSUSE](opensuse-installation-guide.md) | [Leap 15.1](opensuse-leap-15.1-installation-guide.md)<br>Leap 15.2, Tumbleweed |
| [SUSE Linux Enterprise (SLE)](sle-installation-guide.md) | SLE 15 SP1, 15 SP2 |
### Automatic Installation
[Use `kata-manager`](installing-with-kata-manager.md) to automatically install Kata packages.
> **Note::**
>
> All users are encouraged to uses the official distribution versions of Kata
> Containers unless they understand the implications of alternative methods.
### Snap Installation
> **Note:** The snap installation is available for all distributions which support `snapd`.
[![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/kata-containers)
[Use snap](snap-installation-guide.md) to install Kata Containers from https://snapcraft.io.
### Scripted Installation
[Use `kata-doc-to-script`](installing-with-kata-doc-to-script.md) to generate installation scripts that can be reviewed before they are executed.
### Automatic Installation
[Use `kata-manager`](/utils/README.md) to automatically install a working Kata Containers system.
### Manual Installation
Manual installation instructions are available for [these distributions](#supported-distributions) and document how to:
1. Add the Kata Containers repository to your distro package manager, and import the packages signing key.
2. Install the Kata Containers packages.
3. Install a supported container manager.
4. Configure the container manager to use `kata-runtime` as the default OCI runtime. Or, for Kata Containers 1.5.0 or above, configure the
`io.containerd.kata.v2` to be the runtime shim (see [containerd runtime v2 (shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
and [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)).
> **Notes on upgrading**:
> - If you are installing Kata Containers on a system that already has Clear Containers or `runv` installed,
> first read [the upgrading document](../Upgrading.md).
> **Notes on releases**:
> - [This download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/)
> hosts the Kata Containers packages built by OBS for all the supported architectures.
> Packages are available for the latest and stable releases (more info [here](../Stable-Branch-Strategy.md)).
>
> - The following guides apply to the latest Kata Containers release
> (a.k.a. `master` release).
>
> - When choosing a stable release, replace all `master` occurrences in the URLs
> with a `stable-x.y` version available on the [download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/).
> **Notes on packages source verification**:
> - The Kata packages hosted on the download server are signed with GPG to ensure integrity and authenticity.
>
> - The public key used to sign packages is available [at this link](https://raw.githubusercontent.com/kata-containers/tests/master/data/rpm-signkey.pub); the fingerprint is `9FDC0CB6 3708CF80 3696E2DC D0B37B82 6063F3ED`.
>
> - Only trust the signing key and fingerprint listed in the previous bullet point. Do not disable GPG checks,
> otherwise packages source and authenticity is not guaranteed.
Follow the [containerd installation guide](container-manager/containerd/containerd-install.md).
## Build from source installation
> **Notes:**
>
> - Power users who decide to build from sources should be aware of the
@@ -115,6 +88,7 @@ who are comfortable building software from source to use the latest component
versions. This is not recommended for normal users.
## Installing on a Cloud Service Platform
* [Amazon Web Services (AWS)](aws-installation-guide.md)
* [Google Compute Engine (GCE)](gce-installation-guide.md)
* [Microsoft Azure](azure-installation-guide.md)
@@ -122,6 +96,7 @@ versions. This is not recommended for normal users.
* [VEXXHOST OpenStack Cloud](vexxhost-installation-guide.md)
## Further information
* The [upgrading document](../Upgrading.md).
* The [developer guide](../Developer-Guide.md).
* The [runtime documentation](../../src/runtime/README.md).

View File

@@ -15,4 +15,4 @@ Create a new virtual machine with:
## Set up with distribution specific quick start
Follow distribution specific [install guides](../install/README.md#supported-distributions).
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).

View File

@@ -4,14 +4,25 @@
```bash
$ source /etc/os-release
$ sudo yum -y install yum-utils
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo -E yum-config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/CentOS_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E yum -y install kata-runtime kata-proxy kata-shim
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/advanced-virt.repo
[advanced-virt]
name=Advanced Virtualization
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/advanced-virtualization
enabled=1
gpgcheck=1
skip_if_unavailable=1
EOF
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
[kata-containers]
name=Kata Containers
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/kata-containers
enabled=1
gpgcheck=1
skip_if_unavailable=1
EOF
$ sudo -E dnf module disable -y virt:rhel
$ sudo -E dnf install -y kata-runtime
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/centos-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,128 @@
# Install Kata Containers with containerd
> **Note:**
>
> - If Kata Containers and / or containerd are packaged by your distribution,
> we recommend you install these versions to ensure they are updated when
> new releases are available.
> **Warning:**
>
> - These instructions install the **newest** versions of Kata Containers and
> containerd from binary release packages. These versions may not have been
> tested with your distribution version.
>
> - Since your package manager is not being used, it is **your**
> responsibility to ensure these packages are kept up-to-date when new
> versions are released.
>
> - If you decide to proceed and install a Kata Containers release, you can
> still check for the latest version of Kata Containers by running
> `kata-runtime kata-check --only-list-releases`.
>
> - These instructions will not work for Fedora 31 and higher since those
> distribution versions only support cgroups version 2 by default. However,
> Kata Containers currently requires cgroups version 1 (on the host side). See
> https://github.com/kata-containers/kata-containers/issues/927 for further
> details.
## Install Kata Containers
> **Note:**
>
> If your distribution packages Kata Containers, we recommend you install that
> version. If it does not, or you wish to perform a manual installation,
> continue with the steps below.
- Download a release from:
- https://github.com/kata-containers/kata-containers/releases
Note that Kata Containers uses [semantic versioning](https://semver.org) so
you should install a version that does *not* include a dash ("-"), since this
indicates a pre-release version.
- Unpack the downloaded archive.
Kata Containers packages use a `/opt/kata/` prefix so either add that to
your `PATH`, or create symbolic links for the following commands. The
advantage of using symbolic links is that the `systemd(1)` configuration file
for containerd will not need to be modified to allow the daemon to find this
binary (see the [section on installing containerd](#install-containerd) below).
| Command | Description |
|-|-|
| `/opt/kata/bin/containerd-shim-kata-v2` | The main Kata 2.x binary |
| `/opt/kata/bin/kata-collect-data.sh` | Data collection script used for [raising issues](https://github.com/kata-containers/kata-containers/issues) |
| `/opt/kata/bin/kata-runtime` | Utility command |
- Check installation by showing version details:
```bash
$ kata-runtime --version
```
## Install containerd
> **Note:**
>
> If your distribution packages containerd, we recommend you install that
> version. If it does not, or you wish to perform a manual installation,
> continue with the steps below.
- Download a release from:
- https://github.com/containerd/containerd/releases
- Unpack the downloaded archive.
- Configure containerd
- Download the standard `systemd(1)` service file and install to
`/etc/systemd/system/`:
- https://raw.githubusercontent.com/containerd/containerd/master/containerd.service
> **Notes:**
>
> - You will need to reload the systemd configuration after installing this
> file.
>
> - If you have not created a symbolic link for
> `/opt/kata/bin/containerd-shim-kata-v2`, you will need to modify this
> file to ensure the containerd daemon's `PATH` contains `/opt/kata/`.
> See the `Environment=` command in `systemd.exec(5)` for further
> details.
- Add the Kata Containers configuration to the containerd configuration file:
```toml
[plugins]
[plugins.cri]
[plugins.cri.containerd]
default_runtime_name = "kata"
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
```
> **Note:**
>
> The containerd daemon needs to be able to find the
> `containerd-shim-kata-v2` binary to allow Kata Containers to be created.
- Start the containerd service.
## Test the installation
You are now ready to run Kata Containers. You can perform a simple test by
running the following commands:
```bash
$ image="docker.io/library/busybox:latest"
$ sudo ctr image pull "$image"
$ sudo ctr run --runtime "io.containerd.kata.v2" --rm -t "$image" test-kata uname -r
```
The last command above shows details of the kernel version running inside the
container, which will likely be different to the host kernel version.

View File

@@ -1,22 +0,0 @@
# Install Kata Containers on Debian
1. Install the Kata Containers components with the following commands:
```bash
$ export DEBIAN_FRONTEND=noninteractive
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ source /etc/os-release
$ [ "$ID" = debian ] && [ -z "$VERSION_ID" ] && echo >&2 "ERROR: Debian unstable not supported.
You can try stable packages here:
http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}" && exit 1
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Debian_${VERSION_ID}/ /' > /etc/apt/sources.list.d/kata-containers.list"
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Debian_${VERSION_ID}/Release.key | sudo apt-key add -
$ sudo -E apt-get update
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/debian-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,75 +0,0 @@
# Install Docker for Kata Containers on CentOS
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../centos-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
$ sudo yum -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/centos).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -1,103 +0,0 @@
# Install Docker for Kata Containers on Debian
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../debian-installation-guide.md).
> - This guide allows for installation with `systemd` or `sysVinit` init systems.
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo apt-get -y install apt-transport-https ca-certificates curl gnupg2 software-properties-common
$ curl -fsSL https://download.docker.com/linux/$(. /etc/os-release; echo "$ID")/gpg | sudo apt-key add -
$ sudo add-apt-repository "deb https://download.docker.com/linux/$(. /etc/os-release; echo "$ID") $(lsb_release -cs) stable"
$ sudo apt-get update
$ sudo -E apt-get -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/debian).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
a. `sysVinit`
- with `sysVinit`, docker config is stored in `/etc/default/docker`, edit the options similar to the following:
```sh
$ sudo sh -c "echo '# specify docker runtime for kata-containers
DOCKER_OPTS=\"-D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime\"' >> /etc/default/docker"
```
b. systemd (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
c. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with one of the following (depending on init choice):
a. `sysVinit`
```sh
$ sudo /etc/init.d/docker stop
$ sudo /etc/init.d/docker start
```
To watch for errors:
```sh
$ tail -f /var/log/docker.log
```
b. systemd
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -1,77 +0,0 @@
# Install Docker for Kata Containers on Fedora
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../fedora-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ source /etc/os-release
$ sudo dnf config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo
$ sudo dnf makecache
$ sudo dnf -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/fedora).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -1,75 +0,0 @@
# Install Docker for Kata Containers on openSUSE
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../opensuse-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo zypper -n install docker
```
For more information on installing Docker please refer to the
[Docker Guide](https://software.opensuse.org/package/docker).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. Specify the runtime options in `/etc/sysconfig/docker` (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ DOCKER_SYSCONFIG=/etc/sysconfig/docker
# Add kata-runtime to the list of available runtimes, if not already listed
$ grep -qE "^ *DOCKER_OPTS=.+--add-runtime[= ] *kata-runtime" $DOCKER_SYSCONFIG || sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+)\" *$|\1 --add-runtime kata-runtime=/usr/bin/kata-runtime\"|g" $DOCKER_SYSCONFIG
# If a current default runtime is specified, overwrite it with kata-runtime
$ sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+--default-runtime[= ] *)[^ \"]+(.*\"$)|\1kata-runtime\2|g" $DOCKER_SYSCONFIG
# Add kata-runtime as default runtime, if no default runtime is specified
$ grep -qE "^ *DOCKER_OPTS=.+--default-runtime" $DOCKER_SYSCONFIG || sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+)(\"$)|\1 --default-runtime=kata-runtime\2|g" $DOCKER_SYSCONFIG
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -1,14 +0,0 @@
# Install Docker for Kata Containers on openSUSE Leap
Follow the instructions in the generic [openSUSE Docker install guide](opensuse-docker-install.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/opensuse-docker-install.md
Please download this file and run kata-doc-to-script.sh again."
```
-->

View File

@@ -1,14 +0,0 @@
# Install Docker for Kata Containers on openSUSE Tumbleweed
Follow the instructions in the generic [openSUSE Docker install guide](opensuse-docker-install.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/opensuse-docker-install.md
Please download this file and run kata-doc-to-script.sh again."
```
-->

View File

@@ -1,76 +0,0 @@
# Install Docker for Kata Containers on RHEL
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../rhel-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ export rhel_devtoolset_version="7"
$ sudo subscription-manager repos --enable=rhel-${rhel_devtoolset_version}-server-extras-rpms
$ sudo yum -y install docker && systemctl enable --now docker
```
For more information on installing Docker please refer to the
[Docker Guide](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux_atomic_host/7/html-single/getting_started_with_containers/#getting_docker_in_rhel_7).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -1,74 +0,0 @@
# Install Docker for Kata Containers on SLES
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../sles-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo zypper -n install docker
```
For more information on installing Docker please refer to the
[Docker Guide](https://www.suse.com/documentation/sles-12/singlehtml/book_sles_docker/book_sles_docker.html).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -1,79 +0,0 @@
# Install Docker for Kata Containers on Ubuntu
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../ubuntu-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo -E apt-get -y install apt-transport-https ca-certificates software-properties-common
$ curl -sL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
$ arch=$(dpkg --print-architecture)
$ sudo -E add-apt-repository "deb [arch=${arch}] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
$ sudo -E apt-get update
$ sudo -E apt-get -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/ubuntu).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@@ -3,15 +3,8 @@
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo dnf -y install dnf-plugins-core
$ sudo -E dnf config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Fedora_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E dnf -y install kata-runtime kata-proxy kata-shim
$ sudo -E dnf -y install kata-runtime
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/fedora-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,47 +0,0 @@
# Installing with `kata-doc-to-script`
* [Introduction](#introduction)
* [Packages Installation](#packages-installation)
* [Docker Installation and Setup](#docker-installation-and-setup)
## Introduction
Use [these installation instructions](README.md#supported-distributions) together with
[`kata-doc-to-script`](https://github.com/kata-containers/tests/blob/master/.ci/kata-doc-to-script.sh)
to generate installation bash scripts.
> Note:
> - Only the Docker container manager installation can be scripted. For other setups you must
> install and configure the container manager manually.
## Packages Installation
```bash
$ source /etc/os-release
$ curl -fsSL -O https://raw.githubusercontent.com/kata-containers/documentation/master/install/${ID}-installation-guide.md
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/.ci/kata-doc-to-script.sh) ${ID}-installation-guide.md ${ID}-install.sh"
```
For example, if your distribution is CentOS, the previous example will generate a runnable shell script called `centos-install.sh`.
To proceed with the installation, run:
```bash
$ source /etc/os-release
$ bash "./${ID}-install.sh"
```
## Docker Installation and Setup
```bash
$ source /etc/os-release
$ curl -fsSL -O https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/${ID}-docker-install.md
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/.ci/kata-doc-to-script.sh) ${ID}-docker-install.md ${ID}-docker-install.sh"
```
For example, if your distribution is CentOS, this will generate a runnable shell script called `centos-docker-install.sh`.
To proceed with the Docker installation, run:
```bash
$ source /etc/os-release
$ bash "./${ID}-docker-install.sh"
```

View File

@@ -1,47 +0,0 @@
# Installing with `kata-manager`
* [Introduction](#introduction)
* [Full Installation](#full-installation)
* [Install the Kata packages only](#install-the-kata-packages-only)
* [Further Information](#further-information)
## Introduction
`kata-manager` automates the Kata Containers installation procedure documented for [these Linux distributions](README.md#supported-distributions).
> **Note**:
> - `kata-manager` requires `curl` and `sudo` installed on your system.
>
> - Full installation mode is only available for Docker container manager. For other setups, you
> can still use `kata-manager` to [install Kata package](#install-the-kata-packages-only), and then setup your container manager manually.
>
> - You can run `kata-manager` in dry run mode by passing the `-n` flag. Dry run mode allows you to review the
> commands that `kata-manager` would run, without doing any change to your system.
## Full Installation
This command does the following:
1. Installs Kata Containers packages
2. Installs Docker
3. Configure Docker to use the Kata OCI runtime by default
```bash
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) install-docker-system"
```
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) remove-packages"
```
-->
## Install the Kata packages only
Use the following command to only install Kata Containers packages.
```bash
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) install-packages"
```
## Further Information
For more information on what `kata-manager` can do, refer to the [`kata-manager` page](https://github.com/kata-containers/tests/blob/master/cmd/kata-manager).

View File

@@ -3,21 +3,8 @@
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ DISTRO_REPO=$(sed "s/ /_/g" <<< "$NAME")
$ [ -n "$VERSION" ] && DISTRO_REPO+="_${VERSION}"
$ DISTRO_REPO=$(echo $DISTRO_REPO | tr -d ' ')
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ REPO_ALIAS="kata-${BRANCH}"
$ PUBKEY="/tmp/rpm-signkey.pub"
$ curl -SsL -o "$PUBKEY" "https://raw.githubusercontent.com/kata-containers/tests/master/data/rpm-signkey.pub"
$ sudo -E rpm --import "$PUBKEY"
$ zypper lr "$REPO_ALIAS" && sudo -E zypper -n removerepo "$REPO_ALIAS"
$ sudo -E zypper addrepo --refresh "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/${DISTRO_REPO}/" "$REPO_ALIAS"
$ sudo -E zypper -n install kata-runtime
$ sudo -E zypper -n install katacontainers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/opensuse-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,11 @@
# Install Kata Containers on openSUSE Leap 15.1
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/openSUSE_Leap_15.1/devel:kubic.repo"
$ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,19 +0,0 @@
# Install Kata Containers on openSUSE Leap
1. Install Kata Containers on openSUSE by following the instructions in the
[openSUSE install guide](opensuse-installation-guide.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/opensuse-installation-guide.md
Please download this file and run kata-doc-to-script.sh again."
```
-->
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/opensuse-leap-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,19 +0,0 @@
# Install Kata Containers on openSUSE Tumbleweed
1. Install Kata Containers on openSUSE by following the instructions in the
[openSUSE install guide](opensuse-installation-guide.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/opensuse-installation-guide.md
Please download this file and run kata-doc-to-script.sh again."
```
-->
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/opensuse-tumbleweed-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,16 +0,0 @@
# Install Kata Containers on RHEL
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo -E yum-config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/RHEL_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E yum -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/rhel-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,13 @@
# Install Kata Containers on SLE
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ DISTRO_VERSION=$(sed "s/-/_/g" <<< "$VERSION")
$ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/SLE_${DISTRO_VERSION}_Backports/devel:kubic.repo"
$ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,15 +0,0 @@
# Install Kata Containers on SLES
1. Install the Kata Containers components with the following commands:
```bash
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo -E zypper addrepo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/SLE_15_SP1/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E zypper -n --no-gpg-checks install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/sles-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -12,6 +12,4 @@
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/ubuntu-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -13,4 +13,4 @@ with v2). The recommended machine type for container workloads is `v2-highcpu`
## Set up with distribution specific quick start
Follow distribution specific [install guides](../install/README.md#supported-distributions).
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).

View File

@@ -182,12 +182,6 @@ impl<D> RuntimeLevelFilter<D> {
level: Mutex::new(level),
}
}
fn set_level(&self, level: slog::Level) {
let mut log_level = self.level.lock().unwrap();
*log_level = level;
}
}
impl<D> Drain for RuntimeLevelFilter<D>

View File

@@ -84,12 +84,12 @@ then a new configuration file can be [created](#configure-kata-containers)
and [configured][7].
[1]: https://docs.snapcraft.io/snaps/intro
[2]: ../../../docs/design/architecture.md#root-filesystem-image
[2]: ../docs/design/architecture.md#root-filesystem-image
[3]: https://docs.snapcraft.io/reference/confinement#classic
[4]: https://github.com/kata-containers/runtime#configuration
[5]: https://docs.docker.com/engine/reference/commandline/dockerd
[6]: ../../../docs/install/docker/ubuntu-docker-install.md
[7]: ../../../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
[6]: ../docs/install/docker/ubuntu-docker-install.md
[7]: ../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
[8]: https://snapcraft.io/kata-containers
[9]: ../../../docs/Developer-Guide.md#run-kata-containers-with-docker
[10]: ../../../docs/Developer-Guide.md#run-kata-containers-with-kubernetes
[9]: ../docs/Developer-Guide.md#run-kata-containers-with-docker
[10]: ../docs/Developer-Guide.md#run-kata-containers-with-kubernetes

View File

@@ -224,7 +224,7 @@ parts:
after: [godeps, runtime]
build-packages:
- gcc
- python
- python3
- zlib1g-dev
- libcap-ng-dev
- libglib2.0-dev
@@ -284,7 +284,7 @@ parts:
done
# Only x86_64 supports libpmem
[ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev
[ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev libseccomp-dev
configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
chmod +x ${configure_hypervisor}

76
src/agent/Cargo.lock generated
View File

@@ -100,7 +100,7 @@ checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "cgroups"
version = "0.1.1-alpha.0"
source = "git+https://github.com/kata-containers/cgroups-rs?tag=0.1.1#3852d7c1805499cd6a0e37ec400d81a7085d91a7"
source = "git+https://github.com/kata-containers/cgroups-rs?branch=stable-0.1.1#8717524f2c95aacd30768b6f0f7d7f2fddef5cac"
dependencies = [
"libc",
"log",
@@ -119,6 +119,15 @@ dependencies = [
"time",
]
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
dependencies = [
"bitflags",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
@@ -305,6 +314,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3286f09f7d4926fc486334f28d8d2e6ebe4f7f9994494b6dab27ddfad2c9b11b"
[[package]]
name = "lock_api"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75"
dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.8"
@@ -416,6 +434,30 @@ dependencies = [
"serde_json",
]
[[package]]
name = "parking_lot"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3a704eb390aafdc107b0e392f56a82b668e3a71366993b5340f5833fd62505e"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d58c7c768d4ba344e3e8d72518ac13e259d7c7ade24167003b8488e10b6740a3"
dependencies = [
"cfg-if",
"cloudabi",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]]
name = "path-absolutize"
version = "1.2.1"
@@ -448,7 +490,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "059a34f111a9dee2ce1ac2826a68b24601c4298cfeb1a587c3cb493d5ab46f52"
dependencies = [
"libc",
"nix 0.17.0",
"nix 0.18.0",
]
[[package]]
@@ -659,8 +701,10 @@ dependencies = [
"serde",
"serde_derive",
"serde_json",
"serial_test",
"slog",
"slog-scope",
"tempfile",
]
[[package]]
@@ -712,6 +756,28 @@ dependencies = [
"serde",
]
[[package]]
name = "serial_test"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b15f74add9a9d4a3eb2bf739c9a427d266d3895b53d992c3a7c234fec2ff1f1"
dependencies = [
"lazy_static",
"parking_lot",
"serial_test_derive",
]
[[package]]
name = "serial_test_derive"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65f59259be9fc1bf677d06cc1456e97756004a1a5a577480f71430bd7c17ba33"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "signal-hook"
version = "0.1.15"
@@ -779,6 +845,12 @@ dependencies = [
"slog",
]
[[package]]
name = "smallvec"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
[[package]]
name = "spin"
version = "0.5.2"

View File

@@ -32,7 +32,7 @@ tempfile = "3.1.0"
prometheus = { version = "0.9.0", features = ["process"] }
procfs = "0.7.9"
anyhow = "1.0.32"
cgroups = { git = "https://github.com/kata-containers/cgroups-rs", tag = "0.1.1"}
cgroups = { git = "https://github.com/kata-containers/cgroups-rs", branch = "stable-0.1.1"}
[workspace]
members = [

View File

@@ -42,6 +42,15 @@ endif
ifeq ($(ARCH), ppc64le)
override ARCH = powerpc64le
override LIBC = gnu
$(warning "WARNING: powerpc64le-unknown-linux-musl target is unavailable")
endif
EXTRA_RUSTFLAGS :=
ifeq ($(ARCH), aarch64)
override EXTRA_RUSTFLAGS = -C link-arg=-lgcc
$(warning "WARNING: aarch64-musl needs extra symbols from libgcc")
endif
TRIPLE = $(ARCH)-unknown-linux-$(LIBC)
@@ -104,10 +113,10 @@ default: $(TARGET) show-header
$(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
$(TARGET_PATH): $(SOURCES) | show-summary
@cargo build --target $(TRIPLE) --$(BUILD_TYPE)
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
optimize: $(SOURCES) | show-summary show-header
@RUSTFLAGS='-C link-arg=-s' cargo build --target $(TRIPLE) --$(BUILD_TYPE)
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
show-header:
@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"

View File

@@ -1,10 +1,10 @@
# Kata Agent in Rust
This is a rust version of the [`kata-agent`](https://github.com/kata-containers/kata-agent).
This is a rust version of the [`kata-agent`](https://github.com/kata-containers/agent).
In Denver PTG, [we discussed about re-writing agent in rust](https://etherpad.openstack.org/p/katacontainers-2019-ptg-denver-agenda):
> In general, we all think about re-write agent in rust to reduce the footprint of agent. Moreover, Eric mentioned the possibility to stop using gRPC, which may have some impact on footprint. We may begin to do some PoC to show how much we could save by re-writing agent in rust.
> In general, we all think about re-write agent in rust to reduce the footprint of agent. Moreover, Eric mentioned the possibility to stop using gRPC, which may have some impact on footprint. We may begin to do some POC to show how much we could save by re-writing agent in rust.
After that, we drafted the initial code here, and any contributions are welcome.
@@ -18,7 +18,7 @@ After that, we drafted the initial code here, and any contributions are welcome.
| exec/list process | :white_check_mark: |
| I/O stream | :white_check_mark: |
| Cgroups | :white_check_mark: |
| Capabilities, rlimit, readonly path, masked path, users | :white_check_mark: |
| Capabilities, `rlimit`, readonly path, masked path, users | :white_check_mark: |
| container stats (`stats_container`) | :white_check_mark: |
| Hooks | :white_check_mark: |
| **Agent Features & APIs** |
@@ -28,7 +28,7 @@ After that, we drafted the initial code here, and any contributions are welcome.
| network, interface/routes (`update_container`) | :white_check_mark: |
| File transfer API (`copy_file`) | :white_check_mark: |
| Device APIs (`reseed_random_device`, , `online_cpu_memory`, `mem_hotplug_probe`, `set_guet_data_time`) | :white_check_mark: |
| vsock support | :white_check_mark: |
| VSOCK support | :white_check_mark: |
| virtio-serial support | :heavy_multiplication_x: |
| OCI Spec validator | :white_check_mark: |
| **Infrastructures**|
@@ -39,25 +39,24 @@ After that, we drafted the initial code here, and any contributions are welcome.
## Getting Started
### Build from Source
The rust-agent need to be built with rust nightly, and static linked with musl.
The rust-agent need to be built with rust newer than 1.37, and static linked with `musl`.
```bash
rustup target add x86_64-unknown-linux-musl
git submodule update --init --recursive
sudo ln -s /usr/bin/g++ /bin/musl-g++
cargo build --target x86_64-unknown-linux-musl --release
```
## Run Kata CI with rust-agent
* Firstly, install kata as noted by ["how to install Kata"](../../docs/install/README.md)
* Secondly, build your own kata initrd/image following the steps in ["how to build your own initrd/image"](../../docs/Developer-Guide.md#create-and-install-rootfs-and-initrd-image).
* Firstly, install Kata as noted by ["how to install Kata"](../../docs/install/README.md)
* Secondly, build your own Kata initrd/image following the steps in ["how to build your own initrd/image"](../../docs/Developer-Guide.md#create-and-install-rootfs-and-initrd-image).
notes: Please use your rust agent instead of the go agent when building your initrd/image.
* Clone the kata ci test cases from: https://github.com/kata-containers/tests.git, and then run the cri test with:
* Clone the Kata CI test cases from: https://github.com/kata-containers/tests.git, and then run the CRI test with:
```bash
$sudo -E PATH=$PATH -E GOPATH=$GOPATH integration/containerd/shimv2/shimv2-tests.sh
```
## Mini Benchmark
The memory of 'RssAnon' consumed by the go-agent and rust-agent as below:
The memory of `RssAnon` consumed by the go-agent and rust-agent as below:
go-agent: about 11M
rust-agent: about 1.1M

View File

@@ -653,7 +653,7 @@ pub struct WindowsNetwork {
#[serde(
default,
skip_serializing_if = "String::is_empty",
rename = "nwtworkSharedContainerName"
rename = "networkSharedContainerName"
)]
pub network_shared_container_name: String,
}

View File

@@ -29,13 +29,6 @@ impl Display for Error {
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::Io(ref e) => e.description(),
Error::Json(ref e) => e.description(),
}
}
fn cause(&self) -> Option<&dyn error::Error> {
match *self {
Error::Io(ref e) => Some(e),

View File

@@ -6,11 +6,11 @@
pub mod agent;
pub mod agent_ttrpc;
pub mod empty;
pub mod health;
pub mod health_ttrpc;
pub mod oci;
pub mod types;
pub mod empty;
#[cfg(test)]
mod tests {

View File

@@ -24,4 +24,8 @@ regex = "1.1"
path-absolutize = "1.2.0"
dirs = "3.0.1"
anyhow = "1.0.32"
cgroups = { git = "https://github.com/kata-containers/cgroups-rs", tag = "0.1.1"}
cgroups = { git = "https://github.com/kata-containers/cgroups-rs", branch = "stable-0.1.1"}
tempfile = "3.1.0"
[dev-dependencies]
serial_test = "0.5.0"

View File

@@ -126,13 +126,12 @@ pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
)
.map_err(|e| anyhow!(e.to_string()))?;
if let Err(_) = caps::set(
let _ = caps::set(
None,
CapSet::Ambient,
to_capshashset(cfd_log, caps.ambient.as_ref()),
) {
log_child!(cfd_log, "failed to set ambient capability");
}
)
.map_err(|_| log_child!(cfd_log, "failed to set ambient capability"));
Ok(())
}

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use cgroups::blkio::{BlkIo, BlkIoController, BlkIoData, IoService};
use cgroups::blkio::{BlkIoController, BlkIoData, IoService};
use cgroups::cpu::CpuController;
use cgroups::cpuacct::CpuAcctController;
use cgroups::cpuset::CpuSetController;
@@ -15,18 +15,18 @@ use cgroups::memory::MemController;
use cgroups::pid::PidController;
use cgroups::{
BlkIoDeviceResource, BlkIoDeviceThrottleResource, Cgroup, CgroupPid, Controller,
DeviceResource, DeviceResources, HugePageResource, MaxValue, NetworkPriority,
DeviceResource, HugePageResource, MaxValue, NetworkPriority,
};
use crate::cgroups::Manager as CgroupManager;
use crate::container::DEFAULT_DEVICES;
use anyhow::{anyhow, Context, Error, Result};
use anyhow::{anyhow, Context, Result};
use lazy_static;
use libc::{self, pid_t};
use nix::errno::Errno;
use oci::{
LinuxBlockIO, LinuxCPU, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
LinuxNetwork, LinuxPids, LinuxResources, LinuxThrottleDevice, LinuxWeightDevice,
LinuxNetwork, LinuxPids, LinuxResources,
};
use protobuf::{CachedSize, RepeatedField, SingularPtrField, UnknownFields};
@@ -34,7 +34,6 @@ use protocols::agent::{
BlkioStats, BlkioStatsEntry, CgroupStats, CpuStats, CpuUsage, HugetlbStats, MemoryData,
MemoryStats, PidsStats, ThrottlingData,
};
use regex::Regex;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
@@ -91,7 +90,7 @@ impl CgroupManager for Manager {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
cg.add_task(CgroupPid::from(pid as u64));
cg.add_task(CgroupPid::from(pid as u64))?;
Ok(())
}
@@ -194,10 +193,10 @@ impl CgroupManager for Manager {
let freezer_controller: &FreezerController = cg.controller_of().unwrap();
match state {
FreezerState::Thawed => {
freezer_controller.thaw();
freezer_controller.thaw()?;
}
FreezerState::Frozen => {
freezer_controller.freeze();
freezer_controller.freeze()?;
}
_ => {
return Err(nix::Error::Sys(Errno::EINVAL).into());
@@ -230,7 +229,7 @@ impl CgroupManager for Manager {
}
fn set_network_resources(
cg: &cgroups::Cgroup,
_cg: &cgroups::Cgroup,
network: &LinuxNetwork,
res: &mut cgroups::Resources,
) -> Result<()> {
@@ -259,7 +258,7 @@ fn set_network_resources(
}
fn set_devices_resources(
cg: &cgroups::Cgroup,
_cg: &cgroups::Cgroup,
device_resources: &Vec<LinuxDeviceCgroup>,
res: &mut cgroups::Resources,
) -> Result<()> {
@@ -267,18 +266,21 @@ fn set_devices_resources(
let mut devices = vec![];
for d in device_resources.iter() {
let dev = linux_device_group_to_cgroup_device(&d);
devices.push(dev);
if let Some(dev) = linux_device_group_to_cgroup_device(&d) {
devices.push(dev);
}
}
for d in DEFAULT_DEVICES.iter() {
let dev = linux_device_to_cgroup_device(&d);
devices.push(dev);
if let Some(dev) = linux_device_to_cgroup_device(&d) {
devices.push(dev);
}
}
for d in DEFAULT_ALLOWED_DEVICES.iter() {
let dev = linux_device_group_to_cgroup_device(&d);
devices.push(dev);
if let Some(dev) = linux_device_group_to_cgroup_device(&d) {
devices.push(dev);
}
}
res.devices.update_values = true;
@@ -288,7 +290,7 @@ fn set_devices_resources(
}
fn set_hugepages_resources(
cg: &cgroups::Cgroup,
_cg: &cgroups::Cgroup,
hugepage_limits: &Vec<LinuxHugepageLimit>,
res: &mut cgroups::Resources,
) -> Result<()> {
@@ -363,11 +365,11 @@ fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
if !cpu.cpus.is_empty() {
cpuset_controller.set_cpus(&cpu.cpus);
cpuset_controller.set_cpus(&cpu.cpus)?;
}
if !cpu.mems.is_empty() {
cpuset_controller.set_mems(&cpu.mems);
cpuset_controller.set_mems(&cpu.mems)?;
}
let cpu_controller: &CpuController = cg.controller_of().unwrap();
@@ -379,11 +381,12 @@ fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
shares
};
if shares != 0 {
cpu_controller.set_shares(shares);
cpu_controller.set_shares(shares)?;
}
}
cpu_controller.set_cfs_quota_and_period(cpu.quota, cpu.period);
set_resource!(cpu_controller, set_cfs_quota, cpu, quota);
set_resource!(cpu_controller, set_cfs_period, cpu, period);
set_resource!(cpu_controller, set_rt_runtime, cpu, realtime_runtime);
set_resource!(cpu_controller, set_rt_period_us, cpu, realtime_period);
@@ -465,26 +468,32 @@ fn build_blk_io_device_throttle_resource(
blk_io_device_throttle_resources
}
fn linux_device_to_cgroup_device(d: &LinuxDevice) -> DeviceResource {
let dev_type = DeviceType::from_char(d.r#type.chars().next()).unwrap();
fn linux_device_to_cgroup_device(d: &LinuxDevice) -> Option<DeviceResource> {
let dev_type = match DeviceType::from_char(d.r#type.chars().next()) {
Some(t) => t,
None => return None,
};
let mut permissions = vec![
let permissions = vec![
DevicePermissions::Read,
DevicePermissions::Write,
DevicePermissions::MkNod,
];
DeviceResource {
Some(DeviceResource {
allow: true,
devtype: dev_type,
major: d.major,
minor: d.minor,
access: permissions,
}
})
}
fn linux_device_group_to_cgroup_device(d: &LinuxDeviceCgroup) -> DeviceResource {
let dev_type = DeviceType::from_char(d.r#type.chars().next()).unwrap();
fn linux_device_group_to_cgroup_device(d: &LinuxDeviceCgroup) -> Option<DeviceResource> {
let dev_type = match DeviceType::from_char(d.r#type.chars().next()) {
Some(t) => t,
None => return None,
};
let mut permissions: Vec<DevicePermissions> = vec![];
for p in d.access.chars().collect::<Vec<char>>() {
@@ -496,13 +505,13 @@ fn linux_device_group_to_cgroup_device(d: &LinuxDeviceCgroup) -> DeviceResource
}
}
DeviceResource {
Some(DeviceResource {
allow: d.allow,
devtype: dev_type,
major: d.major.unwrap_or(0),
minor: d.minor.unwrap_or(0),
access: permissions,
}
})
}
// split space separated values into an vector of u64
@@ -518,7 +527,7 @@ fn lines_to_map(content: &str) -> HashMap<String, u64> {
.lines()
.map(|x| x.split_whitespace().collect::<Vec<&str>>())
.filter(|x| x.len() == 2 && x[1].parse::<u64>().is_ok())
.fold(HashMap::new(), |mut hm, mut x| {
.fold(HashMap::new(), |mut hm, x| {
hm.insert(x[0].to_string(), x[1].parse::<u64>().unwrap());
hm
})
@@ -1059,7 +1068,7 @@ impl Manager {
info!(sl!(), "updating cpuset for path {:?}", &r_path);
let cg = load_or_create(h, &r_path);
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
cpuset_controller.set_cpus(cpuset_cpus);
cpuset_controller.set_cpus(cpuset_cpus)?;
}
Ok(())

View File

@@ -3,11 +3,9 @@
// SPDX-License-Identifier: Apache-2.0
//
// use crate::configs::{FreezerState, Config};
use anyhow::{anyhow, Result};
use oci::LinuxResources;
use protocols::agent::CgroupStats;
use std::collections::HashMap;
use cgroups::freezer::FreezerState;

View File

@@ -103,19 +103,16 @@ fn register_memory_event_v2(
}
info!(sl!(), "event.wd: {:?}", event.wd);
match event.wd {
ev_fd => {
let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
if oom.unwrap_or(0) > 0 {
sender.send(containere_id.clone()).unwrap();
return;
}
if event.wd == ev_fd {
let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
if oom.unwrap_or(0) > 0 {
sender.send(containere_id.clone()).unwrap();
return;
}
cg_fd => {
let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
if pids.unwrap_or(-1) == 0 {
return;
}
} else if event.wd == cg_fd {
let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
if pids.unwrap_or(-1) == 0 {
return;
}
}
}

View File

@@ -366,128 +366,3 @@ impl IfPrioMap {
format!("{} {}", self.interface, self.priority)
}
}
/*
impl Config {
fn new(opts: &CreateOpts) -> Result<Self> {
if opts.spec.is_none() {
return Err(ErrorKind::ErrorCode("invalid createopts!".into()));
}
let root = unistd::getcwd().chain_err(|| "cannot getwd")?;
let root = root.as_path().canonicalize().chain_err(||
"cannot resolve root into absolute path")?;
let mut root = root.into();
let cwd = root.clone();
let spec = opts.spec.as_ref().unwrap();
if spec.root.is_none() {
return Err(ErrorKind::ErrorCode("no root".into()));
}
let rootfs = PathBuf::from(&spec.root.as_ref().unwrap().path);
if rootfs.is_relative() {
root = format!("{}/{}", root, rootfs.into());
}
// handle annotations
let mut label = spec.annotations
.iter()
.map(|(key, value)| format!("{}={}", key, value)).collect();
label.push(format!("bundle={}", cwd));
let mut config = Config {
rootfs: root,
no_pivot_root: opts.no_pivot_root,
readonlyfs: spec.root.as_ref().unwrap().readonly,
hostname: spec.hostname.clone(),
labels: label,
no_new_keyring: opts.no_new_keyring,
rootless_euid: opts.rootless_euid,
rootless_cgroups: opts.rootless_cgroups,
};
config.mounts = Vec::new();
for m in &spec.mounts {
config.mounts.push(Mount::new(&cwd, &m)?);
}
config.devices = create_devices(&spec)?;
config.cgroups = Cgroups::new(&opts)?;
if spec.linux.as_ref().is_none() {
return Err(ErrorKind::ErrorCode("no linux configuration".into()));
}
let linux = spec.linux.as_ref().unwrap();
let propagation = MOUNTPROPAGATIONMAPPING.get(linux.rootfs_propagation);
if propagation.is_none() {
Err(ErrorKind::ErrorCode("rootfs propagation not support".into()));
}
config.root_propagation = propagation.unwrap();
if config.no_pivot_root && (config.root_propagation & MSFlags::MSPRIVATE != 0) {
return Err(ErrorKind::ErrorCode("[r]private is not safe without pivot root".into()));
}
// handle namespaces
let m: HashMap<String, String> = HashMap::new();
for ns in &linux.namespaces {
if NAMESPACEMAPPING.get(&ns.r#type.as_str()).is_none() {
return Err(ErrorKind::ErrorCode("namespace don't exist".into()));
}
if m.get(&ns.r#type).is_some() {
return Err(ErrorKind::ErrorCode(format!("duplicate ns {}", ns.r#type)));
}
m.insert(ns.r#type, ns.path);
}
if m.contains_key(oci::NETWORKNAMESPACE) {
let path = m.get(oci::NETWORKNAMESPACE).unwrap();
if path == "" {
config.networks = vec![Network {
r#type: "loopback",
}];
}
}
if m.contains_key(oci::USERNAMESPACE) {
setup_user_namespace(&spec, &mut config)?;
}
config.namespaces = m.iter().map(|(key, value)| Namespace {
r#type: key,
path: value,
}).collect();
config.mask_paths = linux.mask_paths;
config.readonly_path = linux.readonly_path;
config.mount_label = linux.mount_label;
config.sysctl = linux.sysctl;
config.seccomp = None;
config.intelrdt = None;
if spec.process.is_some() {
let process = spec.process.as_ref().unwrap();
config.oom_score_adj = process.oom_score_adj;
config.process_label = process.selinux_label.clone();
if process.capabilities.as_ref().is_some() {
let cap = process.capabilities.as_ref().unwrap();
config.capabilities = Some(Capabilities {
..cap
})
}
}
config.hooks = None;
config.version = spec.version;
Ok(config)
}
}
impl Mount {
fn new(cwd: &str, m: &oci::Mount) -> Result<Self> {
}
}
*/

View File

@@ -3,35 +3,32 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use dirs;
use lazy_static;
use libc::pid_t;
use oci::{Hook, Linux, LinuxNamespace, LinuxResources, POSIXRlimit, Spec};
use oci::{LinuxDevice, LinuxIDMapping};
use serde_json;
use std::clone::Clone;
use std::ffi::{CStr, CString};
use std::fmt;
use std::fmt::Display;
use std::fs;
use std::os::unix::io::RawFd;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::SystemTime;
// use crate::sync::Cond;
use anyhow::{anyhow, bail, Context, Result};
use libc::pid_t;
use oci::{LinuxDevice, LinuxIDMapping};
use std::clone::Clone;
use std::fmt::Display;
use std::process::{Child, Command};
use cgroups::freezer::FreezerState;
use crate::process::Process;
// use crate::intelrdt::Manager as RdtManager;
use crate::capabilities::{self, CAPSMAP};
use crate::cgroups::fs::Manager as FsManager;
use crate::cgroups::Manager;
use crate::log_child;
use crate::process::Process;
use crate::specconv::CreateOpts;
use crate::sync::*;
// use crate::stats::Stats;
use crate::capabilities::{self, CAPSMAP};
use crate::cgroups::fs::{self as fscgroup, Manager as FsManager};
use crate::cgroups::Manager;
use crate::{mount, validator};
use protocols::agent::StatsContainerResponse;
@@ -55,7 +52,7 @@ use std::io::BufRead;
use std::io::BufReader;
use std::os::unix::io::FromRawFd;
use slog::{debug, info, o, Logger};
use slog::{info, o, Logger};
const STATE_FILENAME: &'static str = "state.json";
const EXEC_FIFO_FILENAME: &'static str = "exec.fifo";
@@ -214,11 +211,6 @@ pub struct BaseState {
init_process_pid: i32,
#[serde(default)]
init_process_start: u64,
/*
#[serde(default)]
created: SystemTime,
config: Config,
*/
}
pub trait BaseContainer {
@@ -280,12 +272,8 @@ pub struct SyncPC {
}
pub trait Container: BaseContainer {
// fn checkpoint(&self, opts: &CriuOpts) -> Result<()>;
// fn restore(&self, p: &Process, opts: &CriuOpts) -> Result<()>;
fn pause(&mut self) -> Result<()>;
fn resume(&mut self) -> Result<()>;
// fn notify_oom(&self) -> Result<(Sender, Receiver)>;
// fn notify_memory_pressure(&self, lvl: PressureLevel) -> Result<(Sender, Receiver)>;
}
impl Container for LinuxContainer {
@@ -332,16 +320,11 @@ impl Container for LinuxContainer {
pub fn init_child() {
let cwfd = std::env::var(CWFD_FD).unwrap().parse::<i32>().unwrap();
let cfd_log = std::env::var(CLOG_FD).unwrap().parse::<i32>().unwrap();
match do_init_child(cwfd) {
Ok(_) => (),
Err(e) => {
log_child!(cfd_log, "child exit: {:?}", e);
write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
return;
}
}
std::process::exit(-1);
let _ = do_init_child(cwfd).map_err(|e| {
log_child!(cfd_log, "child exit: {:?}", e);
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
});
}
fn do_init_child(cwfd: RawFd) -> Result<()> {
@@ -366,7 +349,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let buf = read_sync(crfd)?;
let process_str = std::str::from_utf8(&buf)?;
let mut oci_process: oci::Process = serde_json::from_str(process_str)?;
let oci_process: oci::Process = serde_json::from_str(process_str)?;
log_child!(cfd_log, "notify parent to send cgroup manager");
write_sync(cwfd, SYNC_SUCCESS, "")?;
@@ -387,7 +370,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let linux = spec.linux.as_ref().unwrap();
// get namespace vector to join/new
let nses = get_namespaces(&linux)?;
let nses = get_namespaces(&linux);
let mut userns = false;
let mut to_new = CloneFlags::empty();
@@ -406,19 +389,17 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
to_new.set(*s, true);
}
} else {
let fd = match fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()) {
Ok(v) => v,
Err(e) => {
let fd =
fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()).map_err(|e| {
log_child!(
cfd_log,
"cannot open type: {} path: {}",
ns.r#type.clone(),
ns.path.clone()
);
log_child!(cfd_log, "error is : {}", e.as_errno().unwrap().desc());
return Err(e.into());
}
};
log_child!(cfd_log, "error is : {:?}", e.as_errno());
e
})?;
if *s != CloneFlags::CLONE_NEWPID {
to_join.push((*s, fd));
@@ -444,6 +425,23 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
setrlimit(rl)?;
}
//
// Make the process non-dumpable, to avoid various race conditions that
// could cause processes in namespaces we're joining to access host
// resources (or potentially execute code).
//
// However, if the number of namespaces we are joining is 0, we are not
// going to be switching to a different security context. Thus setting
// ourselves to be non-dumpable only breaks things (like rootless
// containers), which is the recommendation from the kernel folks.
//
// Ref: https://github.com/opencontainers/runc/commit/50a19c6ff828c58e5dab13830bd3dacde268afe5
//
if !nses.is_empty() {
prctl::set_dumpable(false)
.map_err(|e| anyhow!(e).context("set process non-dumpable failed"))?;
}
if userns {
log_child!(cfd_log, "enter new user namespace");
sched::unshare(CloneFlags::CLONE_NEWUSER)?;
@@ -470,17 +468,20 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
}
log_child!(cfd_log, "join namespace {:?}", s);
if let Err(e) = sched::setns(fd, s) {
sched::setns(fd, s).or_else(|e| {
if s == CloneFlags::CLONE_NEWUSER {
if e.as_errno().unwrap() != Errno::EINVAL {
write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
return Err(e.into());
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
return Err(e);
}
Ok(())
} else {
write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
return Err(e.into());
let _ = write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
Err(e)
}
}
})?;
unistd::close(fd)?;
if s == CloneFlags::CLONE_NEWUSER {
@@ -552,20 +553,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
if guser.additional_gids.len() > 0 {
setgroups(guser.additional_gids.as_slice()).map_err(|e| {
write_sync(
let _ = write_sync(
cwfd,
SYNC_FAILED,
format!("setgroups failed: {:?}", e).as_str(),
);
e
})?;
}
// NoNewPeiviledges, Drop capabilities
if oci_process.no_new_privileges {
if let Err(_) = prctl::set_no_new_privileges(true) {
return Err(anyhow!("cannot set no new privileges"));
}
prctl::set_no_new_privileges(true).map_err(|_| anyhow!("cannot set no new privileges"))?;
}
if oci_process.capabilities.is_some() {
@@ -588,7 +588,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
fifofd = std::env::var(FIFO_FD)?.parse::<i32>().unwrap();
}
//cleanup the env inherited from parent
// cleanup the env inherited from parent
for (key, _) in env::vars() {
env::remove_var(key);
}
@@ -597,7 +597,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
for e in env.iter() {
let v: Vec<&str> = e.splitn(2, "=").collect();
if v.len() != 2 {
//info!(logger, "incorrect env config!");
continue;
}
env::set_var(v[0], v[1]);
@@ -613,20 +612,15 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let exec_file = Path::new(&args[0]);
log_child!(cfd_log, "process command: {:?}", &args);
if !exec_file.exists() {
match find_file(exec_file) {
Some(_) => (),
None => {
return Err(anyhow!("the file {} is not exist", &args[0]));
}
}
find_file(exec_file).ok_or_else(|| anyhow!("the file {} is not exist", &args[0]))?;
}
// notify parent that the child's ready to start
write_sync(cwfd, SYNC_SUCCESS, "")?;
log_child!(cfd_log, "ready to run exec");
unistd::close(cfd_log);
unistd::close(crfd);
unistd::close(cwfd);
let _ = unistd::close(cfd_log);
let _ = unistd::close(crfd);
let _ = unistd::close(cwfd);
if oci_process.terminal {
unistd::setsid()?;
@@ -647,8 +641,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
}
do_exec(&args);
Err(anyhow!("fail to create container"))
}
impl BaseContainer for LinuxContainer {
@@ -743,7 +735,6 @@ impl BaseContainer for LinuxContainer {
return Err(anyhow!("exec fifo exists"));
}
unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o622).unwrap())?;
// defer!(fs::remove_file(&fifo_file)?);
fifofd = fcntl::open(
fifo_file.as_str(),
@@ -766,7 +757,9 @@ impl BaseContainer for LinuxContainer {
let st = self.oci_state()?;
let (pfd_log, cfd_log) = unistd::pipe().context("failed to create pipe")?;
fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let _ = fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))
.map_err(|e| warn!(logger, "fcntl pfd log FD_CLOEXEC {:?}", e));
let child_logger = logger.new(o!("action" => "child process log"));
let log_handler = thread::spawn(move || {
@@ -795,54 +788,57 @@ impl BaseContainer for LinuxContainer {
info!(logger, "exec fifo opened!");
let (prfd, cwfd) = unistd::pipe().context("failed to create pipe")?;
let (crfd, pwfd) = unistd::pipe().context("failed to create pipe")?;
fcntl::fcntl(prfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
fcntl::fcntl(pwfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let _ = fcntl::fcntl(prfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))
.map_err(|e| warn!(logger, "fcntl prfd FD_CLOEXEC {:?}", e));
let _ = fcntl::fcntl(pwfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))
.map_err(|e| warn!(logger, "fcntl pwfd FD_COLEXEC {:?}", e));
defer!({
unistd::close(prfd);
unistd::close(pwfd);
let _ = unistd::close(prfd).map_err(|e| warn!(logger, "close prfd {:?}", e));
let _ = unistd::close(pwfd).map_err(|e| warn!(logger, "close pwfd {:?}", e));
});
let mut child_stdin = std::process::Stdio::null();
let mut child_stdout = std::process::Stdio::null();
let mut child_stderr = std::process::Stdio::null();
let mut stdin = -1;
let mut stdout = -1;
let mut stderr = -1;
let child_stdin: std::process::Stdio;
let child_stdout: std::process::Stdio;
let child_stderr: std::process::Stdio;
if tty {
let pseduo = pty::openpty(None, None)?;
p.term_master = Some(pseduo.master);
fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let pseudo = pty::openpty(None, None)?;
p.term_master = Some(pseudo.master);
let _ = fcntl::fcntl(pseudo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))
.map_err(|e| warn!(logger, "fnctl pseudo.master {:?}", e));
let _ = fcntl::fcntl(pseudo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))
.map_err(|e| warn!(logger, "fcntl pseudo.slave {:?}", e));
child_stdin = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) };
child_stdout = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) };
child_stderr = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) };
child_stdin = unsafe { std::process::Stdio::from_raw_fd(pseudo.slave) };
child_stdout = unsafe { std::process::Stdio::from_raw_fd(pseudo.slave) };
child_stderr = unsafe { std::process::Stdio::from_raw_fd(pseudo.slave) };
} else {
stdin = p.stdin.unwrap();
stdout = p.stdout.unwrap();
stderr = p.stderr.unwrap();
let stdin = p.stdin.unwrap();
let stdout = p.stdout.unwrap();
let stderr = p.stderr.unwrap();
child_stdin = unsafe { std::process::Stdio::from_raw_fd(stdin) };
child_stdout = unsafe { std::process::Stdio::from_raw_fd(stdout) };
child_stderr = unsafe { std::process::Stdio::from_raw_fd(stderr) };
}
let old_pid_ns = match fcntl::open(PID_NS_PATH, OFlag::O_CLOEXEC, Mode::empty()) {
Ok(v) => v,
Err(e) => {
let old_pid_ns =
fcntl::open(PID_NS_PATH, OFlag::O_CLOEXEC, Mode::empty()).map_err(|e| {
error!(
logger,
"cannot open pid ns path: {} with error: {:?}", PID_NS_PATH, e
);
return Err(e.into());
}
};
e
})?;
//restore the parent's process's pid namespace.
defer!({
sched::setns(old_pid_ns, CloneFlags::CLONE_NEWPID);
unistd::close(old_pid_ns);
let _ = sched::setns(old_pid_ns, CloneFlags::CLONE_NEWPID)
.map_err(|e| warn!(logger, "settns CLONE_NEWPID {:?}", e));
let _ = unistd::close(old_pid_ns)
.map_err(|e| warn!(logger, "close old pid namespace {:?}", e));
});
let pidns = get_pid_namespace(&self.logger, linux)?;
@@ -872,7 +868,7 @@ impl BaseContainer for LinuxContainer {
child = child.env(FIFO_FD, format!("{}", fifofd));
}
let mut child = child.spawn()?;
let child = child.spawn()?;
unistd::close(crfd)?;
unistd::close(cwfd)?;
@@ -884,29 +880,28 @@ impl BaseContainer for LinuxContainer {
}
if p.init {
unistd::close(fifofd);
let _ = unistd::close(fifofd).map_err(|e| warn!(logger, "close fifofd {:?}", e));
}
info!(logger, "child pid: {}", p.pid);
match join_namespaces(
join_namespaces(
&logger,
&spec,
&p,
self.cgroup_manager.as_ref().unwrap(),
&st,
&mut child,
pwfd,
prfd,
) {
Ok(_) => (),
Err(e) => {
error!(logger, "create container process error {:?}", e);
// kill the child process.
signal::kill(Pid::from_raw(p.pid), Some(Signal::SIGKILL));
return Err(e);
}
};
)
.map_err(|e| {
error!(logger, "create container process error {:?}", e);
// kill the child process.
let _ = signal::kill(Pid::from_raw(p.pid), Some(Signal::SIGKILL))
.map_err(|e| warn!(logger, "signal::kill joining namespaces {:?}", e));
e
})?;
info!(logger, "entered namespaces!");
@@ -916,7 +911,9 @@ impl BaseContainer for LinuxContainer {
let (exit_pipe_r, exit_pipe_w) = unistd::pipe2(OFlag::O_CLOEXEC)
.context("failed to create pipe")
.map_err(|e| {
signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL));
let _ = signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL))
.map_err(|e| warn!(logger, "signal::kill creating pipe {:?}", e));
e
})?;
@@ -930,7 +927,9 @@ impl BaseContainer for LinuxContainer {
self.processes.insert(p.pid, p);
info!(logger, "wait on child log handler");
log_handler.join();
let _ = log_handler
.join()
.map_err(|e| warn!(logger, "joining log handler {:?}", e));
info!(logger, "create process completed");
return Ok(());
}
@@ -1022,7 +1021,7 @@ where
})
}
fn do_exec(args: &[String]) -> Result<()> {
fn do_exec(args: &[String]) -> ! {
let path = &args[0];
let p = CString::new(path.to_string()).unwrap();
let sa: Vec<CString> = args
@@ -1031,25 +1030,22 @@ fn do_exec(args: &[String]) -> Result<()> {
.collect();
let a: Vec<&CStr> = sa.iter().map(|s| s.as_c_str()).collect();
if let Err(e) = unistd::execvp(p.as_c_str(), a.as_slice()) {
// info!(logger, "execve failed!!!");
// info!(logger, "binary: {:?}, args: {:?}, envs: {:?}", p, a, env);
match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
_ => std::process::exit(-2),
let _ = unistd::execvp(p.as_c_str(), a.as_slice()).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
}
// should never reach here
Ok(())
_ => std::process::exit(-2),
});
unreachable!()
}
fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> {
let linux = match spec.linux.as_mut() {
None => return Err(anyhow!("Spec didn't container linux field")),
Some(l) => l,
};
info!(logger, "updating namespaces");
let linux = spec
.linux
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't contain linux field"))?;
let namespaces = linux.namespaces.as_mut_slice();
for namespace in namespaces.iter_mut() {
@@ -1076,19 +1072,18 @@ fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result<Option<RawFd>> {
return Ok(None);
}
let fd = match fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()) {
Ok(v) => v,
Err(e) => {
let fd =
fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()).map_err(|e| {
error!(
logger,
"cannot open type: {} path: {}",
ns.r#type.clone(),
ns.path.clone()
);
error!(logger, "error is : {}", e.as_errno().unwrap().desc());
return Err(e.into());
}
};
error!(logger, "error is : {:?}", e.as_errno());
e
})?;
return Ok(Some(fd));
}
@@ -1098,24 +1093,21 @@ fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result<Option<RawFd>> {
}
fn is_userns_enabled(linux: &Linux) -> bool {
for ns in &linux.namespaces {
if ns.r#type == "user" && ns.path == "" {
return true;
}
}
false
linux
.namespaces
.iter()
.any(|ns| ns.r#type == "user" && ns.path == "")
}
fn get_namespaces(linux: &Linux) -> Result<Vec<LinuxNamespace>> {
let mut ns: Vec<LinuxNamespace> = Vec::new();
for i in &linux.namespaces {
ns.push(LinuxNamespace {
r#type: i.r#type.clone(),
path: i.path.clone(),
});
}
Ok(ns)
fn get_namespaces(linux: &Linux) -> Vec<LinuxNamespace> {
linux
.namespaces
.iter()
.map(|ns| LinuxNamespace {
r#type: ns.r#type.clone(),
path: ns.path.clone(),
})
.collect()
}
fn join_namespaces(
@@ -1124,7 +1116,6 @@ fn join_namespaces(
p: &Process,
cm: &FsManager,
st: &OCIState,
_child: &mut Child,
pwfd: RawFd,
prfd: RawFd,
) -> Result<()> {
@@ -1141,7 +1132,6 @@ fn join_namespaces(
info!(logger, "wait child received oci spec");
// child.try_wait()?;
read_sync(prfd)?;
info!(logger, "send oci process from parent to child");
@@ -1154,7 +1144,7 @@ fn join_namespaces(
let cm_str = serde_json::to_string(cm)?;
write_sync(pwfd, SYNC_DATA, cm_str.as_str())?;
//wait child setup user namespace
// wait child setup user namespace
info!(logger, "wait child setup user namespace");
read_sync(prfd)?;
@@ -1213,7 +1203,7 @@ fn join_namespaces(
read_sync(prfd)?;
info!(logger, "get ready to run poststart hook!");
//run poststart hook
// run poststart hook
if spec.hooks.is_some() {
info!(logger, "poststart hook");
let hooks = spec.hooks.as_ref().unwrap();
@@ -1230,36 +1220,30 @@ fn join_namespaces(
}
fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIDMapping]) -> Result<()> {
let mut data = String::new();
for m in maps {
if m.size == 0 {
continue;
}
let val = format!("{} {} {}\n", m.container_id, m.host_id, m.size);
data = data + &val;
}
let data = maps
.iter()
.filter(|m| m.size != 0)
.map(|m| format!("{} {} {}\n", m.container_id, m.host_id, m.size))
.collect::<Vec<_>>()
.join("");
info!(logger, "mapping: {}", data);
if !data.is_empty() {
let fd = fcntl::open(path, OFlag::O_WRONLY, Mode::empty())?;
defer!(unistd::close(fd).unwrap());
match unistd::write(fd, data.as_bytes()) {
Ok(_) => {}
Err(e) => {
info!(logger, "cannot write mapping");
return Err(e.into());
}
}
unistd::write(fd, data.as_bytes()).map_err(|e| {
info!(logger, "cannot write mapping");
e
})?;
}
Ok(())
}
fn setid(uid: Uid, gid: Gid) -> Result<()> {
// set uid/gid
if let Err(e) = prctl::set_keep_capabilities(true) {
bail!(anyhow!(e).context("set keep capabilities returned"));
};
prctl::set_keep_capabilities(true)
.map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;
{
unistd::setresgid(gid, gid, gid)?;
}
@@ -1271,9 +1255,9 @@ fn setid(uid: Uid, gid: Gid) -> Result<()> {
capabilities::reset_effective()?;
}
if let Err(e) = prctl::set_keep_capabilities(false) {
bail!(anyhow!(e).context("set keep capabilities returned"));
};
prctl::set_keep_capabilities(false)
.map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;
Ok(())
}
@@ -1291,13 +1275,13 @@ impl LinuxContainer {
// validate oci spec
validator::validate(&config)?;
if let Err(e) = fs::create_dir_all(root.as_str()) {
fs::create_dir_all(root.as_str()).map_err(|e| {
if e.kind() == std::io::ErrorKind::AlreadyExists {
return Err(e).context(format!("container {} already exists", id.as_str()));
return anyhow!(e).context(format!("container {} already exists", id.as_str()));
}
return Err(e).context(format!("fail to create container directory {}", root));
}
anyhow!(e).context(format!("fail to create container directory {}", root))
})?;
unistd::chown(
root.as_str(),
@@ -1432,7 +1416,6 @@ fn set_sysctls(sysctls: &HashMap<String, String>) -> Result<()> {
Ok(())
}
use std::error::Error as StdError;
use std::io::Read;
use std::os::unix::process::ExitStatusExt;
use std::process::Stdio;
@@ -1452,7 +1435,6 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
let args = h.args.clone();
let envs = h.env.clone();
let state = serde_json::to_string(st)?;
// state.push_str("\n");
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
defer!({
@@ -1472,9 +1454,6 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
info!(logger, "hook child: {} status: {}", child, status);
// let _ = wait::waitpid(_ch,
// Some(WaitPidFlag::WEXITED | WaitPidFlag::__WALL));
if status != 0 {
if status == -libc::ETIMEDOUT {
return Err(anyhow!(nix::Error::from_errno(Errno::ETIMEDOUT)));
@@ -1515,7 +1494,7 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
.spawn()
.unwrap();
//send out our pid
// send out our pid
tx.send(child.id() as libc::pid_t).unwrap();
info!(logger, "hook grand: {}", child.id());
@@ -1534,7 +1513,7 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
.unwrap()
.read_to_string(&mut out)
.unwrap();
info!(logger, "{}", out.as_str());
info!(logger, "child stdout: {}", out.as_str());
match child.wait() {
Ok(exit) => {
let code: i32 = if exit.success() {
@@ -1553,7 +1532,7 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
info!(
logger,
"wait child error: {} {}",
e.description(),
e,
e.raw_os_error().unwrap()
);
@@ -1604,8 +1583,6 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
SYNC_DATA,
std::str::from_utf8(&status.to_be_bytes()).unwrap_or_default(),
);
// let _ = wait::waitpid(Pid::from_raw(pid),
// Some(WaitPidFlag::WEXITED | WaitPidFlag::__WALL));
std::process::exit(0);
}
}

View File

@@ -13,6 +13,8 @@
#![allow(non_upper_case_globals)]
// #![allow(unused_comparisons)]
#[macro_use]
#[cfg(test)]
extern crate serial_test;
extern crate serde;
extern crate serde_json;
#[macro_use]
@@ -34,13 +36,6 @@ extern crate oci;
extern crate path_absolutize;
extern crate regex;
// Convenience macro to obtain the scope logger
macro_rules! sl {
() => {
slog_scope::logger().new(o!("subsystem" => "rustjail"))
};
}
pub mod capabilities;
pub mod cgroups;
pub mod container;
@@ -74,7 +69,6 @@ use protocols::oci::{
Root as grpcRoot, Spec as grpcSpec,
};
use std::collections::HashMap;
use std::mem::MaybeUninit;
pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
let console_size = if p.ConsoleSize.is_some() {
@@ -96,7 +90,12 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
username: u.Username.clone(),
}
} else {
unsafe { MaybeUninit::zeroed().assume_init() }
ociUser {
uid: 0,
gid: 0,
additional_gids: vec![],
username: String::from(""),
}
};
let capabilities = if p.Capabilities.is_some() {
@@ -141,11 +140,6 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
}
}
fn process_oci_to_grpc(_p: ociProcess) -> grpcProcess {
// dont implement it for now
unsafe { MaybeUninit::zeroed().assume_init() }
}
fn root_grpc_to_oci(root: &grpcRoot) -> ociRoot {
ociRoot {
path: root.Path.clone(),
@@ -153,10 +147,6 @@ fn root_grpc_to_oci(root: &grpcRoot) -> ociRoot {
}
}
fn root_oci_to_grpc(_root: &ociRoot) -> grpcRoot {
unsafe { MaybeUninit::zeroed().assume_init() }
}
fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
ociMount {
destination: m.destination.clone(),
@@ -166,10 +156,6 @@ fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
}
}
fn mount_oci_to_grpc(_m: &ociMount) -> grpcMount {
unsafe { MaybeUninit::zeroed().assume_init() }
}
use oci::Hook as ociHook;
use protocols::oci::Hook as grpcHook;
@@ -200,10 +186,6 @@ fn hooks_grpc_to_oci(h: &grpcHooks) -> ociHooks {
}
}
fn hooks_oci_to_grpc(_h: &ociHooks) -> grpcHooks {
unsafe { MaybeUninit::zeroed().assume_init() }
}
use oci::{
LinuxDevice as ociLinuxDevice, LinuxIDMapping as ociLinuxIDMapping,
LinuxIntelRdt as ociLinuxIntelRdt, LinuxNamespace as ociLinuxNamespace,
@@ -570,14 +552,16 @@ pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
}
}
pub fn oci_to_grpc(_oci: &ociSpec) -> grpcSpec {
unsafe { MaybeUninit::zeroed().assume_init() }
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
#[allow(unused_macros)]
#[macro_export]
macro_rules! skip_if_not_root {
() => {
if !nix::unistd::Uid::effective().is_root() {
println!("INFO: skipping {} which needs root", module_path!());
return;
}
};
}
}

View File

@@ -7,13 +7,16 @@ use anyhow::{anyhow, bail, Context, Error, Result};
use libc::uid_t;
use nix::errno::Errno;
use nix::fcntl::{self, OFlag};
use nix::mount::{self, MntFlags, MsFlags};
#[cfg(not(test))]
use nix::mount;
use nix::mount::{MntFlags, MsFlags};
use nix::sys::stat::{self, Mode, SFlag};
use nix::unistd::{self, Gid, Uid};
use nix::NixPath;
use oci::{LinuxDevice, Mount, Spec};
use std::collections::{HashMap, HashSet};
use std::fs::{self, OpenOptions};
use std::mem::MaybeUninit;
use std::os::unix;
use std::os::unix::io::RawFd;
use std::path::{Path, PathBuf};
@@ -48,6 +51,13 @@ pub struct Info {
}
const MOUNTINFOFORMAT: &'static str = "{d} {d} {d}:{d} {} {} {} {}";
const PROC_PATH: &str = "/proc";
// since libc didn't defined this const for musl, thus redefined it here.
#[cfg(all(target_os = "linux", target_env = "gnu"))]
const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
#[cfg(all(target_os = "linux", target_env = "musl"))]
const PROC_SUPER_MAGIC: libc::c_ulong = 0x00009fa0;
lazy_static! {
static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
@@ -102,6 +112,33 @@ lazy_static! {
};
}
#[inline(always)]
#[allow(unused_variables)]
fn mount<P1: ?Sized + NixPath, P2: ?Sized + NixPath, P3: ?Sized + NixPath, P4: ?Sized + NixPath>(
source: Option<&P1>,
target: &P2,
fstype: Option<&P3>,
flags: MsFlags,
data: Option<&P4>,
) -> std::result::Result<(), nix::Error> {
#[cfg(not(test))]
return mount::mount(source, target, fstype, flags, data);
#[cfg(test)]
return Ok(());
}
#[inline(always)]
#[allow(unused_variables)]
fn umount2<P: ?Sized + NixPath>(
target: &P,
flags: MntFlags,
) -> std::result::Result<(), nix::Error> {
#[cfg(not(test))]
return mount::umount2(target, flags);
#[cfg(test)]
return Ok(());
}
pub fn init_rootfs(
cfd_log: RawFd,
spec: &Spec,
@@ -113,22 +150,34 @@ pub fn init_rootfs(
lazy_static::initialize(&PROPAGATION);
lazy_static::initialize(&LINUXDEVICETYPE);
let linux = spec.linux.as_ref().unwrap();
let linux = &spec
.linux
.as_ref()
.ok_or::<Error>(anyhow!("Could not get linux configuration from spec"))?;
let mut flags = MsFlags::MS_REC;
match PROPAGATION.get(&linux.rootfs_propagation.as_str()) {
Some(fl) => flags |= *fl,
None => flags |= MsFlags::MS_SLAVE,
}
let rootfs = spec.root.as_ref().unwrap().path.as_str();
let root = fs::canonicalize(rootfs)?;
let rootfs = root.to_str().unwrap();
let root = spec
.root
.as_ref()
.ok_or(anyhow!("Could not get rootfs path from spec"))
.and_then(|r| {
fs::canonicalize(r.path.as_str()).context("Could not canonicalize rootfs path")
})?;
mount::mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)?;
let rootfs = (*root)
.to_str()
.ok_or(anyhow!("Could not convert rootfs path to string"))?;
mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)?;
rootfs_parent_mount_private(rootfs)?;
mount::mount(
mount(
Some(rootfs),
rootfs,
None::<&str>,
@@ -139,8 +188,12 @@ pub fn init_rootfs(
for m in &spec.mounts {
let (mut flags, data) = parse_mount(&m);
if !m.destination.starts_with("/") || m.destination.contains("..") {
return Err(anyhow!(nix::Error::Sys(Errno::EINVAL)));
return Err(anyhow!(
"the mount destination {} is invalid",
m.destination
));
}
if m.r#type == "cgroup" {
mount_cgroups(cfd_log, &m, rootfs, flags, &data, cpath, mounts)?;
} else {
@@ -148,6 +201,25 @@ pub fn init_rootfs(
flags &= !MsFlags::MS_RDONLY;
}
if m.r#type == "bind" {
check_proc_mount(m)?;
}
// If the destination already exists and is not a directory, we bail
// out This is to avoid mounting through a symlink or similar -- which
// has been a "fun" attack scenario in the past.
if m.r#type == "proc" || m.r#type == "sysfs" {
if let Ok(meta) = fs::symlink_metadata(&m.destination) {
if !meta.is_dir() {
return Err(anyhow!(
"Mount point {} must be ordinary directory: got {:?}",
m.destination,
meta.file_type()
));
}
}
}
mount_from(cfd_log, &m, &rootfs, flags, &data, "")?;
// bind mount won't change mount options, we need remount to make mount options
// effective.
@@ -157,7 +229,7 @@ pub fn init_rootfs(
for o in &m.options {
if let Some(fl) = PROPAGATION.get(o.as_str()) {
let dest = format!("{}{}", &rootfs, &m.destination);
mount::mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
}
}
}
@@ -176,6 +248,59 @@ pub fn init_rootfs(
Ok(())
}
fn check_proc_mount(m: &Mount) -> Result<()> {
// White list, it should be sub directories of invalid destinations
// These entries can be bind mounted by files emulated by fuse,
// so commands like top, free displays stats in container.
let valid_destinations = [
"/proc/cpuinfo",
"/proc/diskstats",
"/proc/meminfo",
"/proc/stat",
"/proc/swaps",
"/proc/uptime",
"/proc/loadavg",
"/proc/net/dev",
];
for i in valid_destinations.iter() {
if m.destination.as_str() == *i {
return Ok(());
}
}
if m.destination == PROC_PATH {
// only allow a mount on-top of proc if it's source is "proc"
unsafe {
let mut stats = MaybeUninit::<libc::statfs>::uninit();
if let Ok(_) = m
.source
.with_nix_path(|path| libc::statfs(path.as_ptr(), stats.as_mut_ptr()))
{
if stats.assume_init().f_type == PROC_SUPER_MAGIC {
return Ok(());
}
} else {
return Ok(());
}
return Err(anyhow!(format!(
"{} cannot be mounted to {} because it is not of type proc",
m.source, m.destination
)));
}
}
if m.destination.starts_with(PROC_PATH) {
return Err(anyhow!(format!(
"{} cannot be mounted because it is inside /proc",
m.destination
)));
}
return Ok(());
}
fn mount_cgroups_v2(cfd_log: RawFd, m: &Mount, rootfs: &str, flags: MsFlags) -> Result<()> {
let olddir = unistd::getcwd()?;
unistd::chdir(rootfs)?;
@@ -196,7 +321,7 @@ fn mount_cgroups_v2(cfd_log: RawFd, m: &Mount, rootfs: &str, flags: MsFlags) ->
if flags.contains(MsFlags::MS_RDONLY) {
let dest = format!("{}{}", rootfs, m.destination.as_str());
mount::mount(
mount(
Some(dest.as_str()),
dest.as_str(),
None::<&str>,
@@ -282,20 +407,17 @@ fn mount_cgroups(
if key != base {
let src = format!("{}/{}", m.destination.as_str(), key);
match unix::fs::symlink(destination.as_str(), &src[1..]) {
Err(e) => {
log_child!(
cfd_log,
"symlink: {} {} err: {}",
key,
destination.as_str(),
e.to_string()
);
unix::fs::symlink(destination.as_str(), &src[1..]).map_err(|e| {
log_child!(
cfd_log,
"symlink: {} {} err: {}",
key,
destination.as_str(),
e.to_string()
);
return Err(e.into());
}
Ok(_) => {}
}
e
})?;
}
}
@@ -303,7 +425,7 @@ fn mount_cgroups(
if flags.contains(MsFlags::MS_RDONLY) {
let dest = format!("{}{}", rootfs, m.destination.as_str());
mount::mount(
mount(
Some(dest.as_str()),
dest.as_str(),
None::<&str>,
@@ -315,6 +437,17 @@ fn mount_cgroups(
Ok(())
}
#[allow(unused_variables)]
fn pivot_root<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(
new_root: &P1,
put_old: &P2,
) -> anyhow::Result<(), nix::Error> {
#[cfg(not(test))]
return unistd::pivot_root(new_root, put_old);
#[cfg(test)]
return Ok(());
}
pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<()> {
let oldroot = fcntl::open("/", OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty())?;
defer!(unistd::close(oldroot).unwrap());
@@ -323,7 +456,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
// Change to the new root so that the pivot_root actually acts on it.
unistd::fchdir(newroot)?;
unistd::pivot_root(".", ".").context(format!("failed to pivot_root on {:?}", path))?;
pivot_root(".", ".").context(format!("failed to pivot_root on {:?}", path))?;
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
@@ -336,7 +469,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
// to races where we still have a reference to a mount while a process in
// the host namespace are trying to operate on something they think has no
// mounts (devicemapper in particular).
mount::mount(
mount(
Some("none"),
".",
Some(""),
@@ -345,7 +478,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
)?;
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
mount::umount2(".", MntFlags::MNT_DETACH).context("failed to do umount2")?;
umount2(".", MntFlags::MNT_DETACH).context("failed to do umount2")?;
// Switch back to our shiny new root.
unistd::chdir("/")?;
@@ -368,7 +501,7 @@ fn rootfs_parent_mount_private(path: &str) -> Result<()> {
}
if options.contains("shared:") {
mount::mount(
mount(
None::<&str>,
mount_point.as_str(),
None::<&str>,
@@ -436,6 +569,15 @@ fn parse_mount_table() -> Result<Vec<Info>> {
Ok(infos)
}
#[inline(always)]
#[allow(unused_variables)]
fn chroot<P: ?Sized + NixPath>(path: &P) -> Result<(), nix::Error> {
#[cfg(not(test))]
return unistd::chroot(path);
#[cfg(test)]
return Ok(());
}
pub fn ms_move_root(rootfs: &str) -> Result<bool> {
unistd::chdir(rootfs)?;
let mount_infos = parse_mount_table()?;
@@ -463,41 +605,40 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
}
// Be sure umount events are not propagated to the host.
mount::mount(
mount(
None::<&str>,
abs_mount_point,
None::<&str>,
MsFlags::MS_SLAVE | MsFlags::MS_REC,
None::<&str>,
)?;
match mount::umount2(abs_mount_point, MntFlags::MNT_DETACH) {
Ok(_) => (),
Err(e) => {
if e.ne(&nix::Error::from(Errno::EINVAL)) && e.ne(&nix::Error::from(Errno::EPERM)) {
return Err(anyhow!(e));
}
// If we have not privileges for umounting (e.g. rootless), then
// cover the path.
mount::mount(
Some("tmpfs"),
abs_mount_point,
Some("tmpfs"),
MsFlags::empty(),
None::<&str>,
)?;
umount2(abs_mount_point, MntFlags::MNT_DETACH).or_else(|e| {
if e.ne(&nix::Error::from(Errno::EINVAL)) && e.ne(&nix::Error::from(Errno::EPERM)) {
return Err(anyhow!(e));
}
}
// If we have not privileges for umounting (e.g. rootless), then
// cover the path.
mount(
Some("tmpfs"),
abs_mount_point,
Some("tmpfs"),
MsFlags::empty(),
None::<&str>,
)?;
Ok(())
})?;
}
mount::mount(
mount(
Some(abs_root),
"/",
None::<&str>,
MsFlags::MS_MOVE,
None::<&str>,
)?;
unistd::chroot(".")?;
chroot(".")?;
unistd::chdir("/")?;
Ok(true)
@@ -544,18 +685,14 @@ fn mount_from(
Path::new(&dest)
};
// let _ = fs::create_dir_all(&dir);
match fs::create_dir_all(&dir) {
Ok(_) => {}
Err(e) => {
log_child!(
cfd_log,
"creat dir {}: {}",
dir.to_str().unwrap(),
e.to_string()
);
}
}
let _ = fs::create_dir_all(&dir).map_err(|e| {
log_child!(
cfd_log,
"creat dir {}: {}",
dir.to_str().unwrap(),
e.to_string()
)
});
// make sure file exists so we can bind over it
if src.is_file() {
@@ -572,31 +709,26 @@ fn mount_from(
}
};
match stat::stat(dest.as_str()) {
Ok(_) => {}
Err(e) => {
log_child!(
cfd_log,
"dest stat error. {}: {}",
dest.as_str(),
e.as_errno().unwrap().desc()
);
}
}
let _ = stat::stat(dest.as_str()).map_err(|e| {
log_child!(
cfd_log,
"dest stat error. {}: {:?}",
dest.as_str(),
e.as_errno()
)
});
match mount::mount(
mount(
Some(src.as_str()),
dest.as_str(),
Some(m.r#type.as_str()),
flags,
Some(d.as_str()),
) {
Ok(_) => {}
Err(e) => {
log_child!(cfd_log, "mount error: {}", e.as_errno().unwrap().desc());
return Err(e.into());
}
}
)
.map_err(|e| {
log_child!(cfd_log, "mount error: {:?}", e.as_errno());
e
})?;
if flags.contains(MsFlags::MS_BIND)
&& flags.intersects(
@@ -608,24 +740,17 @@ fn mount_from(
| MsFlags::MS_SLAVE),
)
{
match mount::mount(
mount(
Some(dest.as_str()),
dest.as_str(),
None::<&str>,
flags | MsFlags::MS_REMOUNT,
None::<&str>,
) {
Err(e) => {
log_child!(
cfd_log,
"remout {}: {}",
dest.as_str(),
e.as_errno().unwrap().desc()
);
return Err(e.into());
}
Ok(_) => {}
}
)
.map_err(|e| {
log_child!(cfd_log, "remout {}: {:?}", dest.as_str(), e.as_errno());
e
})?;
}
Ok(())
}
@@ -669,10 +794,6 @@ fn ensure_ptmx() -> Result<()> {
Ok(())
}
fn makedev(major: u64, minor: u64) -> u64 {
(minor & 0xff) | ((major & 0xfff) << 8) | ((minor & !0xff) << 12) | ((major & !0xfff) << 32)
}
lazy_static! {
static ref LINUXDEVICETYPE: HashMap<&'static str, SFlag> = {
let mut m = HashMap::new();
@@ -693,7 +814,7 @@ fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
&dev.path[1..],
*f,
Mode::from_bits_truncate(dev.file_mode.unwrap_or(0)),
makedev(dev.major as u64, dev.minor as u64),
nix::sys::stat::makedev(dev.major as u64, dev.minor as u64),
)?;
unistd::chown(
@@ -714,7 +835,7 @@ fn bind_dev(dev: &LinuxDevice) -> Result<()> {
unistd::close(fd)?;
mount::mount(
mount(
Some(&*dev.path),
&dev.path[1..],
None::<&str>,
@@ -744,7 +865,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {
if m.destination == "/dev" {
let (flags, _) = parse_mount(m);
if flags.contains(MsFlags::MS_RDONLY) {
mount::mount(
mount(
Some("/dev"),
"/dev",
None::<&str>,
@@ -758,7 +879,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {
if spec.root.as_ref().unwrap().readonly {
let flags = MsFlags::MS_BIND | MsFlags::MS_RDONLY | MsFlags::MS_NODEV | MsFlags::MS_REMOUNT;
mount::mount(Some("/"), "/", None::<&str>, flags, None::<&str>)?;
mount(Some("/"), "/", None::<&str>, flags, None::<&str>)?;
}
stat::umask(Mode::from_bits_truncate(0o022));
unistd::chdir(&olddir)?;
@@ -771,9 +892,7 @@ fn mask_path(path: &str) -> Result<()> {
return Err(nix::Error::Sys(Errno::EINVAL).into());
}
//info!("{}", path);
match mount::mount(
match mount(
Some("/dev/null"),
path,
None::<&str>,
@@ -788,7 +907,6 @@ fn mask_path(path: &str) -> Result<()> {
}
Err(e) => {
//info!("{}: {}", path, e.as_errno().unwrap().desc());
return Err(e.into());
}
@@ -803,9 +921,7 @@ fn readonly_path(path: &str) -> Result<()> {
return Err(nix::Error::Sys(Errno::EINVAL).into());
}
//info!("{}", path);
match mount::mount(
match mount(
Some(&path[1..]),
path,
None::<&str>,
@@ -822,14 +938,13 @@ fn readonly_path(path: &str) -> Result<()> {
}
Err(e) => {
//info!("{}: {}", path, e.as_errno().unwrap().desc());
return Err(e.into());
}
Ok(_) => {}
}
mount::mount(
mount(
Some(&path[1..]),
&path[1..],
None::<&str>,
@@ -839,3 +954,272 @@ fn readonly_path(path: &str) -> Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::skip_if_not_root;
use std::os::unix::io::AsRawFd;
use tempfile::tempdir;
#[test]
#[serial(chdir)]
fn test_init_rootfs() {
let stdout_fd = std::io::stdout().as_raw_fd();
let mut spec = oci::Spec::default();
let cpath = HashMap::new();
let mounts = HashMap::new();
// there is no spec.linux, should fail
let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
assert!(
ret.is_err(),
"Should fail: there is no spec.linux. Got: {:?}",
ret
);
// there is no spec.Root, should fail
spec.linux = Some(oci::Linux::default());
let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
assert!(
ret.is_err(),
"should fail: there is no spec.Root. Got: {:?}",
ret
);
let rootfs = tempdir().unwrap();
let ret = fs::create_dir(rootfs.path().join("dev"));
assert!(ret.is_ok(), "Got: {:?}", ret);
spec.root = Some(oci::Root {
path: rootfs.path().to_str().unwrap().to_string(),
readonly: false,
});
// there is no spec.mounts, but should pass
let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
let _ = fs::remove_dir_all(rootfs.path().join("dev"));
let _ = fs::create_dir(rootfs.path().join("dev"));
// Adding bad mount point to spec.mounts
spec.mounts.push(oci::Mount {
destination: "error".into(),
r#type: "bind".into(),
source: "error".into(),
options: vec!["shared".into(), "rw".into(), "dev".into()],
});
// destination doesn't start with /, should fail
let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
assert!(
ret.is_err(),
"Should fail: destination doesn't start with '/'. Got: {:?}",
ret
);
spec.mounts.pop();
let _ = fs::remove_dir_all(rootfs.path().join("dev"));
let _ = fs::create_dir(rootfs.path().join("dev"));
// mounting a cgroup
spec.mounts.push(oci::Mount {
destination: "/cgroup".into(),
r#type: "cgroup".into(),
source: "/cgroup".into(),
options: vec!["shared".into()],
});
let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
spec.mounts.pop();
let _ = fs::remove_dir_all(rootfs.path().join("dev"));
let _ = fs::create_dir(rootfs.path().join("dev"));
// mounting /dev
spec.mounts.push(oci::Mount {
destination: "/dev".into(),
r#type: "bind".into(),
source: "/dev".into(),
options: vec!["shared".into()],
});
let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
#[serial(chdir)]
fn test_mount_cgroups() {
let stdout_fd = std::io::stdout().as_raw_fd();
let mount = oci::Mount {
destination: "/cgroups".to_string(),
r#type: "cgroup".to_string(),
source: "/cgroups".to_string(),
options: vec!["shared".to_string()],
};
let tempdir = tempdir().unwrap();
let rootfs = tempdir.path().to_str().unwrap().to_string();
let flags = MsFlags::MS_RDONLY;
let mut cpath = HashMap::new();
let mut cgroup_mounts = HashMap::new();
cpath.insert("cpu".to_string(), "cpu".to_string());
cpath.insert("memory".to_string(), "memory".to_string());
cgroup_mounts.insert("default".to_string(), "default".to_string());
cgroup_mounts.insert("cpu".to_string(), "cpu".to_string());
cgroup_mounts.insert("memory".to_string(), "memory".to_string());
let ret = fs::create_dir_all(tempdir.path().join("cgroups"));
assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
let ret = fs::create_dir_all(tempdir.path().join("cpu"));
assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
let ret = fs::create_dir_all(tempdir.path().join("memory"));
assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
let ret = mount_cgroups(
stdout_fd,
&mount,
&rootfs,
flags,
"",
&cpath,
&cgroup_mounts,
);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
#[serial(chdir)]
fn test_pivot_root() {
let ret = pivot_rootfs("/tmp");
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
#[serial(chdir)]
fn test_ms_move_rootfs() {
let ret = ms_move_root("/abc");
assert!(
ret.is_err(),
"Should fail. path doesn't exist. Got: {:?}",
ret
);
let ret = ms_move_root("/tmp");
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
fn test_mask_path() {
let ret = mask_path("abc");
assert!(
ret.is_err(),
"Should fail: path doesn't start with '/'. Got: {:?}",
ret
);
let ret = mask_path("abc/../");
assert!(
ret.is_err(),
"Should fail: path contains '..'. Got: {:?}",
ret
);
let ret = mask_path("/tmp");
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
#[serial(chdir)]
fn test_finish_rootfs() {
let stdout_fd = std::io::stdout().as_raw_fd();
let mut spec = oci::Spec::default();
spec.linux = Some(oci::Linux::default());
spec.linux.as_mut().unwrap().masked_paths = vec!["/tmp".to_string()];
spec.linux.as_mut().unwrap().readonly_paths = vec!["/tmp".to_string()];
spec.root = Some(oci::Root {
path: "/tmp".to_string(),
readonly: true,
});
spec.mounts = vec![oci::Mount {
destination: "/dev".to_string(),
r#type: "bind".to_string(),
source: "/dev".to_string(),
options: vec!["ro".to_string(), "shared".to_string()],
}];
let ret = finish_rootfs(stdout_fd, &spec);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
fn test_readonly_path() {
let ret = readonly_path("abc");
assert!(ret.is_err(), "Should fail. Got: {:?}", ret);
let ret = readonly_path("../../");
assert!(ret.is_err(), "Should fail. Got: {:?}", ret);
let ret = readonly_path("/tmp");
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
#[serial(chdir)]
fn test_mknod_dev() {
skip_if_not_root!();
let tempdir = tempdir().unwrap();
let olddir = unistd::getcwd().unwrap();
defer!(let _ = unistd::chdir(&olddir););
let _ = unistd::chdir(tempdir.path());
let dev = oci::LinuxDevice {
path: "/fifo".to_string(),
r#type: "c".to_string(),
major: 0,
minor: 0,
file_mode: Some(0660),
uid: Some(unistd::getuid().as_raw()),
gid: Some(unistd::getgid().as_raw()),
};
let ret = mknod_dev(&dev);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
let ret = stat::stat("fifo");
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
fn test_check_proc_mount() {
let mount = oci::Mount {
destination: "/proc".to_string(),
r#type: "bind".to_string(),
source: "/test".to_string(),
options: vec!["shared".to_string()],
};
assert!(check_proc_mount(&mount).is_err());
let mount = oci::Mount {
destination: "/proc/cpuinfo".to_string(),
r#type: "bind".to_string(),
source: "/test".to_string(),
options: vec!["shared".to_string()],
};
assert!(check_proc_mount(&mount).is_ok());
let mount = oci::Mount {
destination: "/proc/test".to_string(),
r#type: "bind".to_string(),
source: "/test".to_string(),
options: vec!["shared".to_string()],
};
assert!(check_proc_mount(&mount).is_err());
}
}

View File

@@ -3,24 +3,17 @@
// SPDX-License-Identifier: Apache-2.0
//
// use std::process::{Stdio, Command, ExitStatus};
use libc::pid_t;
use std::fs::File;
use std::os::unix::io::RawFd;
use std::sync::mpsc::Sender;
// use crate::configs::{Capabilities, Rlimit};
// use crate::cgroups::Manager as CgroupManager;
// use crate::intelrdt::Manager as RdtManager;
use nix::fcntl::{fcntl, FcntlArg, OFlag};
use nix::sys::signal::{self, Signal};
use nix::sys::socket::{self, AddressFamily, SockFlag, SockType};
use nix::sys::wait::{self, WaitStatus};
use nix::unistd::{self, Pid};
use nix::Result;
use nix::Error;
use oci::Process as OCIProcess;
use slog::Logger;
@@ -33,8 +26,6 @@ pub struct Process {
pub exit_pipe_r: Option<RawFd>,
pub exit_pipe_w: Option<RawFd>,
pub extra_files: Vec<File>,
// pub caps: Capabilities,
// pub rlimits: Vec<Rlimit>,
pub term_master: Option<RawFd>,
pub tty: bool,
pub parent_stdin: Option<RawFd>,
@@ -151,11 +142,11 @@ mod tests {
#[test]
fn test_create_extended_pipe() {
// Test the default
let (r, w) = create_extended_pipe(OFlag::O_CLOEXEC, 0).unwrap();
let (_r, _w) = create_extended_pipe(OFlag::O_CLOEXEC, 0).unwrap();
// Test setting to the max size
let max_size = get_pipe_max_size();
let (r, w) = create_extended_pipe(OFlag::O_CLOEXEC, max_size).unwrap();
let (_, w) = create_extended_pipe(OFlag::O_CLOEXEC, max_size).unwrap();
let actual_size = get_pipe_size(w);
assert_eq!(max_size, actual_size);
}

View File

@@ -4,8 +4,6 @@
//
use oci::Spec;
// use crate::configs::namespaces;
// use crate::configs::device::Device;
#[derive(Debug)]
pub struct CreateOpts {
@@ -17,143 +15,3 @@ pub struct CreateOpts {
pub rootless_euid: bool,
pub rootless_cgroup: bool,
}
/*
const WILDCARD: i32 = -1;
lazy_static! {
static ref NAEMSPACEMAPPING: HashMap<&'static str, &'static str> = {
let mut m = HashMap::new();
m.insert(oci::PIDNAMESPACE, namespaces::NEWPID);
m.insert(oci::NETWORKNAMESPACE, namespaces::NEWNET);
m.insert(oci::UTSNAMESPACE, namespaces::NEWUTS);
m.insert(oci::MOUNTNAMESPACE, namespaces::NEWNS);
m.insert(oci::IPCNAMESPACE, namespaces::NEWIPC);
m.insert(oci::USERNAMESPACE, namespaces::NEWUSER);
m.insert(oci::CGROUPNAMESPACE, namespaces::NEWCGROUP);
m
};
static ref MOUNTPROPAGATIONMAPPING: HashMap<&'static str, MsFlags> = {
let mut m = HashMap::new();
m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
m.insert("private", MsFlags::MS_PRIVATE);
m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
m.insert("slave", MsFlags::MS_SLAVE);
m.insert("rshared", MsFlags::MS_SHARED | MsFlags::MS_REC);
m.insert("shared", MsFlags::MS_SHARED);
m.insert("runbindable", MsFlags::MS_UNBINDABLE | MsFlags::MS_REC);
m.insert("unbindable", MsFlags::MS_UNBINDABLE);
m
};
static ref ALLOWED_DEVICES: Vec<Device> = {
let mut m = Vec::new();
m.push(Device {
r#type: 'c',
major: WILDCARD,
minor: WILDCARD,
permissions: "m",
allow: true,
});
m.push(Device {
r#type: 'b',
major: WILDCARD,
minor: WILDCARD,
permissions: "m",
allow: true,
});
m.push(Device {
r#type: 'c',
path: "/dev/null".to_string(),
major: 1,
minor: 3,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from("/dev/random"),
major: 1,
minor: 8,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from("/dev/full"),
major: 1,
minor: 7,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from("/dev/tty"),
major: 5,
minor: 0,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from("/dev/zero"),
major: 1,
minor: 5,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from("/dev/urandom"),
major: 1,
minor: 9,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from("/dev/console"),
major: 5,
minor: 1,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from(""),
major: 136,
minor: WILDCARD,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from(""),
major: 5,
minor: 2,
permissions: "rwm",
allow: true,
});
m.push(Device {
r#type: 'c',
path: String::from(""),
major: 10,
minor: 200,
permissions: "rwm",
allow: true,
});
m
};
}
*/

View File

@@ -23,7 +23,8 @@ macro_rules! log_child {
let lfd = $fd;
let mut log_str = format_args!($($arg)+).to_string();
log_str.push('\n');
write_count(lfd, log_str.as_bytes(), log_str.len());
// Ignore error writing to the logger, not much we can do
let _ = write_count(lfd, log_str.as_bytes(), log_str.len());
})
}
@@ -142,21 +143,15 @@ pub fn write_sync(fd: RawFd, msg_type: i32, data_str: &str) -> Result<()> {
},
SYNC_DATA => {
let length: i32 = data_str.len() as i32;
match write_count(fd, &length.to_be_bytes(), MSG_SIZE) {
Ok(_count) => (),
Err(e) => {
unistd::close(fd)?;
return Err(anyhow!(e).context("error in send message to process"));
}
}
write_count(fd, &length.to_be_bytes(), MSG_SIZE).or_else(|e| {
unistd::close(fd)?;
Err(anyhow!(e).context("error in send message to process"))
})?;
match write_count(fd, data_str.as_bytes(), data_str.len()) {
Ok(_count) => (),
Err(e) => {
unistd::close(fd)?;
return Err(anyhow!(e).context("error in send message to process"));
}
}
write_count(fd, data_str.as_bytes(), data_str.len()).or_else(|e| {
unistd::close(fd)?;
Err(anyhow!(e).context("error in send message to process"))
})?;
}
_ => (),

View File

@@ -8,7 +8,6 @@ use anyhow::{anyhow, Result};
use lazy_static;
use nix::errno::Errno;
use oci::{LinuxIDMapping, LinuxNamespace, Spec};
use protobuf::RepeatedField;
use std::collections::HashMap;
use std::path::{Component, PathBuf};
@@ -226,7 +225,8 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
}
if linux.gid_mappings.len() == 0 || linux.gid_mappings.len() == 0 {
if linux.uid_mappings.len() == 0 || linux.gid_mappings.len() == 0 {
// rootless containers requires at least one UID/GID mapping
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
}

View File

@@ -40,6 +40,36 @@ pub struct agentConfig {
pub unified_cgroup_hierarchy: bool,
}
// parse_cmdline_param parse commandline parameters.
macro_rules! parse_cmdline_param {
// commandline flags, without func to parse the option values
($param:ident, $key:ident, $field:expr) => {
if $param.eq(&$key) {
$field = true;
continue;
}
};
// commandline options, with func to parse the option values
($param:ident, $key:ident, $field:expr, $func:ident) => {
if $param.starts_with(format!("{}=", $key).as_str()) {
let val = $func($param)?;
$field = val;
continue;
}
};
// commandline options, with func to parse the option values, and match func
// to valid the values
($param:ident, $key:ident, $field:expr, $func:ident, $guard:expr) => {
if $param.starts_with(format!("{}=", $key).as_str()) {
let val = $func($param)?;
if $guard(val) {
$field = val;
}
continue;
}
};
}
impl agentConfig {
pub fn new() -> agentConfig {
agentConfig {
@@ -60,51 +90,49 @@ impl agentConfig {
let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
for param in params.iter() {
// parse cmdline flags
if param.eq(&DEBUG_CONSOLE_FLAG) {
self.debug_console = true;
}
if param.eq(&DEV_MODE_FLAG) {
self.dev_mode = true;
}
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);
// parse cmdline options
if param.starts_with(format!("{}=", LOG_LEVEL_OPTION).as_str()) {
let level = get_log_level(param)?;
self.log_level = level;
}
parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
if param.starts_with(format!("{}=", HOTPLUG_TIMOUT_OPTION).as_str()) {
let hotplugTimeout = get_hotplug_timeout(param)?;
// ensure the timeout is a positive value
if hotplugTimeout.as_secs() > 0 {
self.hotplug_timeout = hotplugTimeout;
}
}
// ensure the timeout is a positive value
parse_cmdline_param!(
param,
HOTPLUG_TIMOUT_OPTION,
self.hotplug_timeout,
get_hotplug_timeout,
|hotplugTimeout: time::Duration| hotplugTimeout.as_secs() > 0
);
if param.starts_with(format!("{}=", DEBUG_CONSOLE_VPORT_OPTION).as_str()) {
let port = get_vsock_port(param)?;
if port > 0 {
self.debug_console_vport = port;
}
}
// vsock port should be positive values
parse_cmdline_param!(
param,
DEBUG_CONSOLE_VPORT_OPTION,
self.debug_console_vport,
get_vsock_port,
|port| port > 0
);
parse_cmdline_param!(
param,
LOG_VPORT_OPTION,
self.log_vport,
get_vsock_port,
|port| port > 0
);
if param.starts_with(format!("{}=", LOG_VPORT_OPTION).as_str()) {
let port = get_vsock_port(param)?;
if port > 0 {
self.log_vport = port;
}
}
if param.starts_with(format!("{}=", CONTAINER_PIPE_SIZE_OPTION).as_str()) {
let container_pipe_size = get_container_pipe_size(param)?;
self.container_pipe_size = container_pipe_size
}
if param.starts_with(format!("{}=", UNIFIED_CGROUP_HIERARCHY_OPTION).as_str()) {
let b = get_bool_value(param, false);
self.unified_cgroup_hierarchy = b;
}
parse_cmdline_param!(
param,
CONTAINER_PIPE_SIZE_OPTION,
self.container_pipe_size,
get_container_pipe_size
);
parse_cmdline_param!(
param,
UNIFIED_CGROUP_HIERARCHY_OPTION,
self.unified_cgroup_hierarchy,
get_bool_value
);
}
if let Ok(addr) = env::var(SERVER_ADDR_ENV_VAR) {
@@ -185,32 +213,26 @@ fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
Ok(time::Duration::from_secs(value.unwrap()))
}
fn get_bool_value(param: &str, default: bool) -> bool {
fn get_bool_value(param: &str) -> Result<bool> {
let fields: Vec<&str> = param.split("=").collect();
if fields.len() != 2 {
return default;
return Ok(false);
}
let v = fields[1];
// bool
let t: std::result::Result<bool, std::str::ParseBoolError> = v.parse();
if t.is_ok() {
return t.unwrap();
}
// integer
let i: std::result::Result<u64, std::num::ParseIntError> = v.parse();
if i.is_err() {
return default;
}
// only `0` returns false, otherwise returns true
match i.unwrap() {
0 => false,
_ => true,
}
// first try to parse as bool value
v.parse::<bool>().or_else(|_err1| {
// then try to parse as integer value
v.parse::<u64>().or_else(|_err2| Ok(0)).and_then(|v| {
// only `0` returns false, otherwise returns true
Ok(match v {
0 => false,
_ => true,
})
})
})
}
fn get_container_pipe_size(param: &str) -> Result<i32> {

View File

@@ -28,8 +28,15 @@ macro_rules! sl {
const VM_ROOTFS: &str = "/";
struct DevIndexEntry {
idx: usize,
residx: Vec<usize>,
}
struct DevIndex(HashMap<String, DevIndexEntry>);
// DeviceHandler is the type of callback to be defined to handle every type of device driver.
type DeviceHandler = fn(&Device, &mut Spec, &Arc<Mutex<Sandbox>>) -> Result<()>;
type DeviceHandler = fn(&Device, &mut Spec, &Arc<Mutex<Sandbox>>, &DevIndex) -> Result<()>;
// DeviceHandlerList lists the supported drivers.
#[cfg_attr(rustfmt, rustfmt_skip)]
@@ -130,17 +137,14 @@ fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<Stri
info!(sl!(), "Waiting on channel for device notification\n");
let hotplug_timeout = AGENT_CONFIG.read().unwrap().hotplug_timeout;
let dev_name = match rx.recv_timeout(hotplug_timeout) {
Ok(name) => name,
Err(_) => {
GLOBAL_DEVICE_WATCHER.lock().unwrap().remove_entry(dev_addr);
return Err(anyhow!(
"Timeout reached after {:?} waiting for device {}",
hotplug_timeout,
dev_addr
));
}
};
let dev_name = rx.recv_timeout(hotplug_timeout).map_err(|_| {
GLOBAL_DEVICE_WATCHER.lock().unwrap().remove_entry(dev_addr);
anyhow!(
"Timeout reached after {:?} waiting for device {}",
hotplug_timeout,
dev_addr
)
})?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
}
@@ -194,7 +198,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
// the same device in the list of devices provided through the OCI spec.
// This is needed to update information about minor/major numbers that cannot
// be predicted from the caller.
fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> {
fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> {
let major_id: c_uint;
let minor_id: c_uint;
@@ -207,10 +211,10 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> {
));
}
let linux = match spec.linux.as_mut() {
None => return Err(anyhow!("Spec didn't container linux field")),
Some(l) => l,
};
let linux = spec
.linux
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't container linux field"))?;
if !Path::new(&device.vm_path).exists() {
return Err(anyhow!("vm_path:{} doesn't exist", device.vm_path));
@@ -228,44 +232,44 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> {
"got the device: dev_path: {}, major: {}, minor: {}\n", &device.vm_path, major_id, minor_id
);
let devices = linux.devices.as_mut_slice();
for dev in devices.iter_mut() {
if dev.path == device.container_path {
let host_major = dev.major;
let host_minor = dev.minor;
if let Some(idxdata) = devidx.0.get(device.container_path.as_str()) {
let dev = &mut linux.devices[idxdata.idx];
let host_major = dev.major;
let host_minor = dev.minor;
dev.major = major_id as i64;
dev.minor = minor_id as i64;
dev.major = major_id as i64;
dev.minor = minor_id as i64;
info!(
sl!(),
"change the device from major: {} minor: {} to vm device major: {} minor: {}",
host_major,
host_minor,
major_id,
minor_id
);
// Resources must be updated since they are used to identify
// the device in the devices cgroup.
for ridx in &idxdata.residx {
// unwrap is safe, because residx would be empty if there
// were no resources
let res = &mut linux.resources.as_mut().unwrap().devices[*ridx];
res.major = Some(major_id as i64);
res.minor = Some(minor_id as i64);
info!(
sl!(),
"change the device from major: {} minor: {} to vm device major: {} minor: {}",
host_major,
host_minor,
major_id,
minor_id
"set resources for device major: {} minor: {}\n", major_id, minor_id
);
// Resources must be updated since they are used to identify the
// device in the devices cgroup.
if let Some(res) = linux.resources.as_mut() {
let ds = res.devices.as_mut_slice();
for d in ds.iter_mut() {
if d.major == Some(host_major) && d.minor == Some(host_minor) {
d.major = Some(major_id as i64);
d.minor = Some(minor_id as i64);
info!(
sl!(),
"set resources for device major: {} minor: {}\n", major_id, minor_id
);
}
}
}
}
Ok(())
} else {
Err(anyhow!(
"Should have found a matching device {} in the spec",
device.vm_path
))
}
Ok(())
}
// device.Id should be the predicted device name (vda, vdb, ...)
@@ -274,12 +278,13 @@ fn virtiommio_blk_device_handler(
device: &Device,
spec: &mut Spec,
_sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
if device.vm_path == "" {
return Err(anyhow!("Invalid path for virtio mmio blk device"));
}
update_spec_device_list(device, spec)
update_spec_device_list(device, spec, devidx)
}
// device.Id should be the PCI address in the format "bridgeAddr/deviceAddr".
@@ -289,6 +294,7 @@ fn virtio_blk_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
@@ -298,7 +304,7 @@ fn virtio_blk_device_handler(
dev.vm_path = get_pci_device_name(sandbox, &device.id)?;
}
update_spec_device_list(&dev, spec)
update_spec_device_list(&dev, spec, devidx)
}
// device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
@@ -306,22 +312,49 @@ fn virtio_scsi_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
dev.vm_path = get_scsi_device_name(sandbox, &device.id)?;
update_spec_device_list(&dev, spec)
update_spec_device_list(&dev, spec, devidx)
}
fn virtio_nvdimm_device_handler(
device: &Device,
spec: &mut Spec,
_sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
if device.vm_path == "" {
return Err(anyhow!("Invalid path for nvdimm device"));
}
update_spec_device_list(device, spec)
update_spec_device_list(device, spec, devidx)
}
impl DevIndex {
fn new(spec: &Spec) -> DevIndex {
let mut map = HashMap::new();
for linux in spec.linux.as_ref() {
for (i, d) in linux.devices.iter().enumerate() {
let mut residx = Vec::new();
for linuxres in linux.resources.as_ref() {
for (j, r) in linuxres.devices.iter().enumerate() {
if r.r#type == d.r#type
&& r.major == Some(d.major)
&& r.minor == Some(d.minor)
{
residx.push(j);
}
}
}
map.insert(d.path.clone(), DevIndexEntry { idx: i, residx });
}
}
DevIndex(map)
}
}
pub fn add_devices(
@@ -329,14 +362,21 @@ pub fn add_devices(
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
) -> Result<()> {
let devidx = DevIndex::new(spec);
for device in devices.iter() {
add_device(device, spec, sandbox)?;
add_device(device, spec, sandbox, &devidx)?;
}
Ok(())
}
fn add_device(device: &Device, spec: &mut Spec, sandbox: &Arc<Mutex<Sandbox>>) -> Result<()> {
fn add_device(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
// log before validation to help with debugging gRPC protocol version differences.
info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}",
device.id, device.field_type, device.vm_path, device.container_path, device.options);
@@ -355,7 +395,7 @@ fn add_device(device: &Device, spec: &mut Spec, sandbox: &Arc<Mutex<Sandbox>>) -
match DEVICEHANDLERLIST.get(device.field_type.as_str()) {
None => Err(anyhow!("Unknown device type {}", device.field_type)),
Some(dev_handler) => dev_handler(device, spec, sandbox),
Some(dev_handler) => dev_handler(device, spec, sandbox, devidx),
}
}
@@ -368,10 +408,10 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
let major = stat::major(rdev) as i64;
let minor = stat::minor(rdev) as i64;
let linux = match spec.linux.as_mut() {
None => return Err(anyhow!("Spec didn't container linux field")),
Some(l) => l,
};
let linux = spec
.linux
.as_mut()
.ok_or_else(|| anyhow!("Spec didn't container linux field"))?;
if linux.resources.is_none() {
linux.resources = Some(LinuxResources::default());
@@ -413,4 +453,263 @@ mod tests {
assert_eq!(devices[0].major, Some(major));
assert_eq!(devices[0].minor, Some(minor));
}
#[test]
fn test_update_spec_device_list() {
let (major, minor) = (7, 2);
let mut device = Device::default();
let mut spec = Spec::default();
// container_path empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_err());
device.container_path = "/dev/null".to_string();
// linux is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_err());
spec.linux = Some(Linux::default());
// linux.devices is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_err());
spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice {
path: "/dev/null2".to_string(),
major,
minor,
..oci::LinuxDevice::default()
}];
// vm_path empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_err());
device.vm_path = "/dev/null".to_string();
// guest and host path are not the same
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_err(), "device={:?} spec={:?}", device, spec);
spec.linux.as_mut().unwrap().devices[0].path = device.container_path.clone();
// spec.linux.resources is empty
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_ok());
// update both devices and cgroup lists
spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice {
path: device.container_path.clone(),
major,
minor,
..oci::LinuxDevice::default()
}];
spec.linux.as_mut().unwrap().resources = Some(oci::LinuxResources {
devices: vec![oci::LinuxDeviceCgroup {
major: Some(major),
minor: Some(minor),
..oci::LinuxDeviceCgroup::default()
}],
..oci::LinuxResources::default()
});
let devidx = DevIndex::new(&spec);
let res = update_spec_device_list(&device, &mut spec, &devidx);
assert!(res.is_ok());
}
#[test]
fn test_update_spec_device_list_guest_host_conflict() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let zero_rdev = fs::metadata("/dev/zero").unwrap().rdev();
let full_rdev = fs::metadata("/dev/full").unwrap().rdev();
let host_major_a = stat::major(null_rdev) as i64;
let host_minor_a = stat::minor(null_rdev) as i64;
let host_major_b = stat::major(zero_rdev) as i64;
let host_minor_b = stat::minor(zero_rdev) as i64;
let mut spec = Spec {
linux: Some(Linux {
devices: vec![
oci::LinuxDevice {
path: "/dev/a".to_string(),
r#type: "c".to_string(),
major: host_major_a,
minor: host_minor_a,
..oci::LinuxDevice::default()
},
oci::LinuxDevice {
path: "/dev/b".to_string(),
r#type: "c".to_string(),
major: host_major_b,
minor: host_minor_b,
..oci::LinuxDevice::default()
},
],
resources: Some(LinuxResources {
devices: vec![
oci::LinuxDeviceCgroup {
r#type: "c".to_string(),
major: Some(host_major_a),
minor: Some(host_minor_a),
..oci::LinuxDeviceCgroup::default()
},
oci::LinuxDeviceCgroup {
r#type: "c".to_string(),
major: Some(host_major_b),
minor: Some(host_minor_b),
..oci::LinuxDeviceCgroup::default()
},
],
..LinuxResources::default()
}),
..Linux::default()
}),
..Spec::default()
};
let devidx = DevIndex::new(&spec);
let dev_a = Device {
container_path: "/dev/a".to_string(),
vm_path: "/dev/zero".to_string(),
..Device::default()
};
let guest_major_a = stat::major(zero_rdev) as i64;
let guest_minor_a = stat::minor(zero_rdev) as i64;
let dev_b = Device {
container_path: "/dev/b".to_string(),
vm_path: "/dev/full".to_string(),
..Device::default()
};
let guest_major_b = stat::major(full_rdev) as i64;
let guest_minor_b = stat::minor(full_rdev) as i64;
let specdevices = &spec.linux.as_ref().unwrap().devices;
assert_eq!(host_major_a, specdevices[0].major);
assert_eq!(host_minor_a, specdevices[0].minor);
assert_eq!(host_major_b, specdevices[1].major);
assert_eq!(host_minor_b, specdevices[1].minor);
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(host_major_a), specresources.devices[0].major);
assert_eq!(Some(host_minor_a), specresources.devices[0].minor);
assert_eq!(Some(host_major_b), specresources.devices[1].major);
assert_eq!(Some(host_minor_b), specresources.devices[1].minor);
let res = update_spec_device_list(&dev_a, &mut spec, &devidx);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
assert_eq!(guest_major_a, specdevices[0].major);
assert_eq!(guest_minor_a, specdevices[0].minor);
assert_eq!(host_major_b, specdevices[1].major);
assert_eq!(host_minor_b, specdevices[1].minor);
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(guest_major_a), specresources.devices[0].major);
assert_eq!(Some(guest_minor_a), specresources.devices[0].minor);
assert_eq!(Some(host_major_b), specresources.devices[1].major);
assert_eq!(Some(host_minor_b), specresources.devices[1].minor);
let res = update_spec_device_list(&dev_b, &mut spec, &devidx);
assert!(res.is_ok());
let specdevices = &spec.linux.as_ref().unwrap().devices;
assert_eq!(guest_major_a, specdevices[0].major);
assert_eq!(guest_minor_a, specdevices[0].minor);
assert_eq!(guest_major_b, specdevices[1].major);
assert_eq!(guest_minor_b, specdevices[1].minor);
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(guest_major_a), specresources.devices[0].major);
assert_eq!(Some(guest_minor_a), specresources.devices[0].minor);
assert_eq!(Some(guest_major_b), specresources.devices[1].major);
assert_eq!(Some(guest_minor_b), specresources.devices[1].minor);
}
#[test]
fn test_update_spec_device_list_char_block_conflict() {
let null_rdev = fs::metadata("/dev/null").unwrap().rdev();
let guest_major = stat::major(null_rdev) as i64;
let guest_minor = stat::minor(null_rdev) as i64;
let host_major: i64 = 99;
let host_minor: i64 = 99;
let mut spec = Spec {
linux: Some(Linux {
devices: vec![
oci::LinuxDevice {
path: "/dev/char".to_string(),
r#type: "c".to_string(),
major: host_major,
minor: host_minor,
..oci::LinuxDevice::default()
},
oci::LinuxDevice {
path: "/dev/block".to_string(),
r#type: "b".to_string(),
major: host_major,
minor: host_minor,
..oci::LinuxDevice::default()
},
],
resources: Some(LinuxResources {
devices: vec![
LinuxDeviceCgroup {
r#type: "c".to_string(),
major: Some(host_major),
minor: Some(host_minor),
..LinuxDeviceCgroup::default()
},
LinuxDeviceCgroup {
r#type: "b".to_string(),
major: Some(host_major),
minor: Some(host_minor),
..LinuxDeviceCgroup::default()
},
],
..LinuxResources::default()
}),
..Linux::default()
}),
..Spec::default()
};
let devidx = DevIndex::new(&spec);
let dev = Device {
container_path: "/dev/char".to_string(),
vm_path: "/dev/null".to_string(),
..Device::default()
};
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(host_major), specresources.devices[0].major);
assert_eq!(Some(host_minor), specresources.devices[0].minor);
assert_eq!(Some(host_major), specresources.devices[1].major);
assert_eq!(Some(host_minor), specresources.devices[1].minor);
let res = update_spec_device_list(&dev, &mut spec, &devidx);
assert!(res.is_ok());
// Only the char device, not the block device should be updated
let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap();
assert_eq!(Some(guest_major), specresources.devices[0].major);
assert_eq!(Some(guest_minor), specresources.devices[0].minor);
assert_eq!(Some(host_major), specresources.devices[1].major);
assert_eq!(Some(host_minor), specresources.devices[1].minor);
}
}

View File

@@ -25,22 +25,25 @@ extern crate scopeguard;
#[macro_use]
extern crate slog;
#[macro_use]
extern crate netlink;
use crate::netlink::{RtnlHandle, NETLINK_ROUTE};
use anyhow::{anyhow, Context, Result};
use nix::fcntl::{self, OFlag};
use nix::fcntl::{FcntlArg, FdFlag};
use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
use nix::pty;
use nix::sys::select::{select, FdSet};
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
use nix::sys::wait::{self, WaitStatus};
use nix::unistd;
use nix::unistd::dup;
use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
use prctl::set_child_subreaper;
use signal_hook::{iterator::Signals, SIGCHLD};
use std::collections::HashMap;
use std::env;
use std::ffi::OsStr;
use std::ffi::{CStr, CString, OsStr};
use std::fs::{self, File};
use std::io::{Read, Write};
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs as unixfs;
use std::os::unix::io::AsRawFd;
@@ -75,6 +78,8 @@ const NAME: &str = "kata-agent";
const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
const CONSOLE_PATH: &str = "/dev/console";
const DEFAULT_BUF_SIZE: usize = 8 * 1024;
lazy_static! {
static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Sender<String>>>> =
Arc::new(Mutex::new(HashMap::new()));
@@ -123,18 +128,8 @@ fn main() -> Result<()> {
// support vsock log
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let writer = unsafe { File::from_raw_fd(wfd) };
let agentConfig = AGENT_CONFIG.clone();
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
{
let mut config = agentConfig.write().unwrap();
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
}
let config = agentConfig.read().unwrap();
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
@@ -148,8 +143,25 @@ fn main() -> Result<()> {
// since before do the base mount, it wouldn't access "/proc/cmdline"
// to get the customzied debug level.
let logger = logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
// Must mount proc fs before parsing kernel command line
general_mount(&logger).map_err(|e| {
error!(logger, "fail general mount: {}", e);
e
})?;
let mut config = agentConfig.write().unwrap();
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
} else {
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
let mut config = agentConfig.write().unwrap();
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
}
let config = agentConfig.read().unwrap();
let log_vport = config.log_vport as u32;
let log_handle = thread::spawn(move || -> Result<()> {
@@ -205,7 +217,7 @@ fn start_sandbox(logger: &Logger, config: &agentConfig, init_mode: bool) -> Resu
let handle = builder.spawn(move || {
let shells = shells.lock().unwrap();
let result = setup_debug_console(shells.to_vec(), debug_console_vport);
let result = setup_debug_console(&thread_logger, shells.to_vec(), debug_console_vport);
if result.is_err() {
// Report error, but don't fail
warn!(thread_logger, "failed to setup debug console";
@@ -335,8 +347,13 @@ fn setup_signal_handler(logger: &Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
// init_agent_as_init will do the initializations such as setting up the rootfs
// when this agent has been run as the init process.
fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
general_mount(logger)?;
cgroups_mount(logger, unified_cgroup_hierarchy)?;
cgroups_mount(logger, unified_cgroup_hierarchy).map_err(|e| {
error!(
logger,
"fail cgroups mount, unified_cgroup_hierarchy {}: {}", unified_cgroup_hierarchy, e
);
e
})?;
fs::remove_file(Path::new("/dev/ptmx"))?;
unixfs::symlink(Path::new("/dev/pts/ptmx"), Path::new("/dev/ptmx"))?;
@@ -393,9 +410,9 @@ use crate::config::agentConfig;
use nix::sys::stat::Mode;
use std::os::unix::io::{FromRawFd, RawFd};
use std::path::PathBuf;
use std::process::{exit, Command, Stdio};
use std::process::exit;
fn setup_debug_console(shells: Vec<String>, port: u32) -> Result<()> {
fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
let mut shell: &str = "";
for sh in shells.iter() {
let binary = PathBuf::from(sh);
@@ -409,7 +426,7 @@ fn setup_debug_console(shells: Vec<String>, port: u32) -> Result<()> {
return Err(anyhow!("no shell found to launch debug console"));
}
let f: RawFd = if port > 0 {
if port > 0 {
let listenfd = socket::socket(
AddressFamily::Vsock,
SockType::Stream,
@@ -419,36 +436,204 @@ fn setup_debug_console(shells: Vec<String>, port: u32) -> Result<()> {
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
socket::bind(listenfd, &addr)?;
socket::listen(listenfd, 1)?;
socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?
loop {
let f: RawFd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
match run_debug_console_shell(logger, shell, f) {
Ok(_) => {
info!(logger, "run_debug_console_shell session finished");
}
Err(err) => {
error!(logger, "run_debug_console_shell failed: {:?}", err);
}
}
}
} else {
let mut flags = OFlag::empty();
flags.insert(OFlag::O_RDWR);
flags.insert(OFlag::O_CLOEXEC);
fcntl::open(CONSOLE_PATH, flags, Mode::empty())?
loop {
let f: RawFd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
match run_debug_console_shell(logger, shell, f) {
Ok(_) => {
info!(logger, "run_debug_console_shell session finished");
}
Err(err) => {
error!(logger, "run_debug_console_shell failed: {:?}", err);
}
}
}
};
}
fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
where
R: Read,
W: Write,
{
let mut buf = [0; DEFAULT_BUF_SIZE];
let buf_len;
match reader.read(&mut buf) {
Ok(0) => return Ok(0),
Ok(len) => buf_len = len,
Err(err) => return Err(err),
};
let cmd = Command::new(shell)
.arg("-i")
.stdin(unsafe { Stdio::from_raw_fd(f) })
.stdout(unsafe { Stdio::from_raw_fd(f) })
.stderr(unsafe { Stdio::from_raw_fd(f) })
.spawn();
// write and return
match writer.write_all(&buf[..buf_len]) {
Ok(_) => return Ok(buf_len as u64),
Err(err) => return Err(err),
}
}
let mut cmd = match cmd {
Ok(c) => c,
Err(_) => return Err(anyhow!("failed to spawn shell")),
};
fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Result<()> {
let pseduo = pty::openpty(None, None)?;
let _ = fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let _ = fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
cmd.wait()?;
let slave_fd = pseduo.slave;
return Ok(());
match fork() {
Ok(ForkResult::Child) => {
// create new session with child as session leader
setsid()?;
// dup stdin, stdout, stderr to let child act as a terminal
dup2(slave_fd, STDIN_FILENO)?;
dup2(slave_fd, STDOUT_FILENO)?;
dup2(slave_fd, STDERR_FILENO)?;
// set tty
unsafe {
libc::ioctl(0, libc::TIOCSCTTY);
}
let cmd = CString::new(shell).unwrap();
let args: Vec<&CStr> = vec![];
// run shell
let _ = unistd::execvp(cmd.as_c_str(), args.as_slice()).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
_ => std::process::exit(-2),
});
}
Ok(ForkResult::Parent { child: child_pid }) => {
info!(logger, "get debug shell pid {:?}", child_pid);
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let master_fd = pseduo.master;
let debug_shell_logger = logger.clone();
// channel that used to sync between thread and main process
let (tx, rx) = mpsc::channel::<i32>();
// start a thread to do IO copy between socket and pseduo.master
thread::spawn(move || {
let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };
let mut socket_writer = unsafe { File::from_raw_fd(socket_fd) };
loop {
let mut fd_set = FdSet::new();
fd_set.insert(rfd);
fd_set.insert(master_fd);
fd_set.insert(socket_fd);
match select(
Some(fd_set.highest().unwrap() + 1),
&mut fd_set,
None,
None,
None,
) {
Ok(_) => (),
Err(e) => {
if e == nix::Error::from(nix::errno::Errno::EINTR) {
continue;
} else {
error!(debug_shell_logger, "select error {:?}", e);
tx.send(1).unwrap();
break;
}
}
}
if fd_set.contains(rfd) {
info!(
debug_shell_logger,
"debug shell process {} exited", child_pid
);
tx.send(1).unwrap();
break;
}
if fd_set.contains(master_fd) {
match io_copy(&mut master_reader, &mut socket_writer) {
Ok(0) => {
debug!(debug_shell_logger, "master fd closed");
tx.send(1).unwrap();
break;
}
Ok(_) => {}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => {
error!(debug_shell_logger, "read master fd error {:?}", e);
tx.send(1).unwrap();
break;
}
}
}
if fd_set.contains(socket_fd) {
match io_copy(&mut socket_reader, &mut master_writer) {
Ok(0) => {
debug!(debug_shell_logger, "socket fd closed");
tx.send(1).unwrap();
break;
}
Ok(_) => {}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => {
error!(debug_shell_logger, "read socket fd error {:?}", e);
tx.send(1).unwrap();
break;
}
}
}
}
});
let wait_status = wait::waitpid(child_pid, None);
info!(logger, "debug console process exit code: {:?}", wait_status);
info!(logger, "notify debug monitor thread to exit");
// close pipe to exit select loop
let _ = close(wfd);
// wait for thread exit.
let _ = rx.recv().unwrap();
info!(logger, "debug monitor thread has exited");
// close files
let _ = close(rfd);
let _ = close(master_fd);
let _ = close(slave_fd);
}
Err(err) => {
return Err(anyhow!("fork error: {:?}", err));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;
#[test]
@@ -459,8 +644,9 @@ mod tests {
let shells_ref = SHELLS.clone();
let mut shells = shells_ref.lock().unwrap();
shells.clear();
let logger = slog_scope::logger();
let result = setup_debug_console(shells.to_vec(), 0);
let result = setup_debug_console(&logger, shells.to_vec(), 0);
assert!(result.is_err());
assert_eq!(
@@ -485,8 +671,9 @@ mod tests {
.to_string();
shells.push(shell);
let logger = slog_scope::logger();
let result = setup_debug_console(shells.to_vec(), 0);
let result = setup_debug_console(&logger, shells.to_vec(), 0);
assert!(result.is_err());
assert_eq!(

View File

@@ -125,7 +125,7 @@ lazy_static! {
// type of storage driver.
type StorageHandler = fn(&Logger, &Storage, Arc<Mutex<Sandbox>>) -> Result<String>;
// StorageHandlerList lists the supported drivers.
// STORAGEHANDLERLIST lists the supported drivers.
#[cfg_attr(rustfmt, rustfmt_skip)]
lazy_static! {
pub static ref STORAGEHANDLERLIST: HashMap<&'static str, StorageHandler> = {
@@ -251,10 +251,7 @@ fn ephemeral_storage_handler(
return Ok("".to_string());
}
if let Err(err) = fs::create_dir_all(Path::new(&storage.mount_point)) {
return Err(err.into());
}
fs::create_dir_all(Path::new(&storage.mount_point))?;
common_storage_handler(logger, storage)?;
Ok("".to_string())
@@ -449,21 +446,17 @@ pub fn add_storages(
"subsystem" => "storage",
"storage-type" => handler_name.to_owned()));
let handler = match STORAGEHANDLERLIST.get(&handler_name.as_str()) {
None => {
return Err(anyhow!(
let handler = STORAGEHANDLERLIST
.get(&handler_name.as_str())
.ok_or_else(|| {
anyhow!(
"Failed to find the storage handler {}",
storage.driver.to_owned()
));
}
Some(f) => f,
};
)
})?;
let mount_point = match handler(&logger, &storage, sandbox.clone()) {
// Todo need to rollback the mounted storage if err met.
Err(e) => return Err(e),
Ok(m) => m,
};
// Todo need to rollback the mounted storage if err met.
let mount_point = handler(&logger, &storage, sandbox.clone())?;
if mount_point.len() > 0 {
mount_list.push(mount_point);
@@ -482,15 +475,18 @@ fn mount_to_rootfs(logger: &Logger, m: &INIT_MOUNT) -> Result<()> {
fs::create_dir_all(Path::new(m.dest)).context("could not create directory")?;
if let Err(err) = bare_mount.mount() {
bare_mount.mount().or_else(|e| {
if m.src != "dev" {
return Err(err.into());
return Err(e);
}
error!(
logger,
"Could not mount filesystem from {} to {}", m.src, m.dest
);
}
Ok(())
})?;
Ok(())
}
@@ -510,7 +506,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {
get_mount_fs_type_from_file(PROC_MOUNTSTATS, mount_point)
}
// get_mount_fs_type returns the FS type corresponding to the passed mount point and
// get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
// any error ecountered.
pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
if mount_point == "" {
@@ -643,7 +639,7 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
// Enable memory hierarchical account.
// For more information see https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
online_device("/sys/fs/cgroup/memory//memory.use_hierarchy")?;
online_device("/sys/fs/cgroup/memory/memory.use_hierarchy")?;
Ok(())
}
@@ -654,15 +650,14 @@ pub fn remove_mounts(mounts: &Vec<String>) -> Result<()> {
Ok(())
}
// ensureDestinationExists will recursively create a given mountpoint. If directories
// are created, their permissions are initialized to mountPerm
// ensure_destination_exists will recursively create a given mountpoint. If directories
// are created, their permissions are initialized to mountPerm(0755)
fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
let d = Path::new(destination);
if !d.exists() {
let dir = match d.parent() {
Some(d) => d,
None => return Err(anyhow!("mount destination {} doesn't exist", destination)),
};
let dir = d
.parent()
.ok_or_else(|| anyhow!("mount destination {} doesn't exist", destination))?;
if !dir.exists() {
fs::create_dir_all(dir).context(format!("create dir all failed on {:?}", dir))?;
}
@@ -1088,7 +1083,7 @@ mod tests {
#[test]
fn test_get_cgroup_v2_mounts() {
let dir = tempdir().expect("failed to create tmpdir");
let _ = tempdir().expect("failed to create tmpdir");
let drain = slog::Discard;
let logger = slog::Logger::root(drain, o!());
let result = get_cgroup_mounts(&logger, "", true);

View File

@@ -3,15 +3,15 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Result};
use nix::mount::MsFlags;
use nix::sched::{unshare, CloneFlags};
use nix::unistd::{getpid, gettid};
use std::fmt;
use std::fs;
use std::fs::File;
use std::os::unix::io::AsRawFd;
use std::path::{Path, PathBuf};
use std::thread;
use std::thread::{self};
use crate::mount::{BareMount, FLAGS};
use slog::Logger;
@@ -75,12 +75,10 @@ impl Namespace {
self
}
// setup_persistent_ns creates persistent namespace without switching to it.
// setup creates persistent namespace without switching to it.
// Note, pid namespaces cannot be persisted.
pub fn setup(mut self) -> Result<Self, String> {
if let Err(err) = fs::create_dir_all(&self.persistent_ns_dir) {
return Err(err.to_string());
}
pub fn setup(mut self) -> Result<Self> {
fs::create_dir_all(&self.persistent_ns_dir)?;
let ns_path = PathBuf::from(&self.persistent_ns_dir);
let ns_type = self.ns_type.clone();
@@ -88,33 +86,23 @@ impl Namespace {
let new_ns_path = ns_path.join(&ns_type.get());
if let Err(err) = File::create(new_ns_path.as_path()) {
return Err(err.to_string());
}
File::create(new_ns_path.as_path())?;
self.path = new_ns_path.clone().into_os_string().into_string().unwrap();
let hostname = self.hostname.clone();
let new_thread = thread::spawn(move || {
let new_thread = thread::spawn(move || -> Result<()> {
let origin_ns_path = get_current_thread_ns_path(&ns_type.get());
let _origin_ns_fd = match File::open(Path::new(&origin_ns_path)) {
Err(err) => return Err(err.to_string()),
Ok(file) => file.as_raw_fd(),
};
File::open(Path::new(&origin_ns_path))?;
// Create a new netns on the current thread.
let cf = ns_type.get_flags().clone();
if let Err(err) = unshare(cf) {
return Err(err.to_string());
}
unshare(cf)?;
if ns_type == NamespaceType::UTS && hostname.is_some() {
match nix::unistd::sethostname(hostname.unwrap()) {
Err(err) => return Err(err.to_string()),
Ok(_) => (),
}
nix::unistd::sethostname(hostname.unwrap())?;
}
// Bind mount the new namespace from the current thread onto the mount point to persist it.
let source: &str = origin_ns_path.as_str();
@@ -131,23 +119,21 @@ impl Namespace {
};
let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
if let Err(err) = bare_mount.mount() {
return Err(format!(
bare_mount.mount().map_err(|e| {
anyhow!(
"Failed to mount {} to {} with err:{:?}",
source, destination, err
));
}
source,
destination,
e
)
})?;
Ok(())
});
match new_thread.join() {
Ok(t) => match t {
Err(err) => return Err(err),
Ok(()) => (),
},
Err(err) => return Err(format!("Failed to join thread {:?}!", err)),
}
new_thread
.join()
.map_err(|e| anyhow!("Failed to join thread {:?}!", e))??;
Ok(self)
}

View File

@@ -3,15 +3,13 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use nix::mount::{self, MntFlags, MsFlags};
use anyhow::{anyhow, Result};
use nix::mount::{self, MsFlags};
use protocols::types::{Interface, Route};
use slog::Logger;
use std::collections::HashMap;
use std::fs;
use crate::Sandbox;
const KATA_GUEST_SANDBOX_DNS_FILE: &str = "/run/kata-containers/sandbox/resolv.conf";
const GUEST_DNS_FILE: &str = "/etc/resolv.conf";

File diff suppressed because it is too large Load Diff

View File

@@ -7,10 +7,8 @@
use crate::linux_abi::*;
use crate::mount::{get_mount_fs_type, remove_mounts, TYPEROOTFS};
use crate::namespace::Namespace;
use crate::namespace::NSTYPEPID;
use crate::network::Network;
use anyhow::{anyhow, Context, Result};
use cgroups;
use libc::pid_t;
use netlink::{RtnlHandle, NETLINK_ROUTE};
use oci::{Hook, Hooks};
@@ -145,16 +143,10 @@ impl Sandbox {
// It's assumed that caller is calling this method after
// acquiring a lock on sandbox.
pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
match self.unset_sandbox_storage(path) {
Ok(res) => {
if res {
return self.remove_sandbox_storage(path);
}
}
Err(err) => {
return Err(err);
}
if self.unset_sandbox_storage(path)? {
return self.remove_sandbox_storage(path);
}
Ok(())
}
@@ -168,23 +160,17 @@ impl Sandbox {
pub fn setup_shared_namespaces(&mut self) -> Result<bool> {
// Set up shared IPC namespace
self.shared_ipcns = match Namespace::new(&self.logger).as_ipc().setup() {
Ok(ns) => ns,
Err(err) => {
return Err(anyhow!(err).context("Failed to setup persistent IPC namespace"));
}
};
self.shared_ipcns = Namespace::new(&self.logger)
.as_ipc()
.setup()
.context("Failed to setup persistent IPC namespace")?;
// // Set up shared UTS namespace
self.shared_utsns = match Namespace::new(&self.logger)
self.shared_utsns = Namespace::new(&self.logger)
.as_uts(self.hostname.as_str())
.setup()
{
Ok(ns) => ns,
Err(err) => {
return Err(anyhow!(err).context("Failed to setup persistent UTS namespace"));
}
};
.context("Failed to setup persistent UTS namespace")?;
Ok(true)
}
@@ -318,10 +304,9 @@ impl Sandbox {
thread::spawn(move || {
for event in rx {
info!(logger, "got an OOM event {:?}", event);
match tx.send(container_id.clone()) {
Err(err) => error!(logger, "failed to send message: {:?}", err),
Ok(_) => {}
}
let _ = tx
.send(container_id.clone())
.map_err(|e| error!(logger, "failed to send message: {:?}", e));
}
});
}

View File

@@ -99,14 +99,14 @@ impl Uevent {
let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
// It's a memory hot-add event.
if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
if let Err(e) = online_device(online_path.as_ref()) {
let _ = online_device(online_path.as_ref()).map_err(|e| {
error!(
*logger,
"failed to online device";
"device" => &self.devpath,
"error" => format!("{}", e),
);
}
)
});
return;
}
}

View File

@@ -1,5 +0,0 @@
# Contributing
## This repo is part of [Kata Containers](https://katacontainers.io)
For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).

View File

@@ -43,7 +43,8 @@ include $(ARCH_FILE)
PROJECT_TYPE = kata
PROJECT_NAME = Kata Containers
PROJECT_TAG = kata-containers
PROJECT_URL = https://github.com/kata-containers
PROJECT_ORG = $(PROJECT_TAG)
PROJECT_URL = https://github.com/$(PROJECT_ORG)
PROJECT_BUG_URL = $(PROJECT_URL)/kata-containers/issues/new
# list of scripts to install
@@ -82,7 +83,6 @@ QEMUBINDIR := $(PREFIXDEPS)/bin
CLHBINDIR := $(PREFIXDEPS)/bin
FCBINDIR := $(PREFIXDEPS)/bin
ACRNBINDIR := $(PREFIXDEPS)/bin
VIRTIOFSDBINDIR := $(PREFIXDEPS)/bin
SYSCONFDIR := /etc
LOCALSTATEDIR := /var
@@ -94,6 +94,13 @@ COLLECT_SCRIPT = data/kata-collect-data.sh
COLLECT_SCRIPT_SRC = $(COLLECT_SCRIPT).in
GENERATED_FILES += $(COLLECT_SCRIPT)
GENERATED_VARS = \
VERSION \
CONFIG_ACRN_IN \
CONFIG_QEMU_IN \
CONFIG_CLH_IN \
CONFIG_FC_IN \
$(USER_VARS)
SCRIPTS += $(COLLECT_SCRIPT)
SCRIPTS_DIR := $(BINDIR)
@@ -119,25 +126,30 @@ HYPERVISOR_FC = firecracker
JAILER_FC = jailer
HYPERVISOR_QEMU = qemu
HYPERVISOR_CLH = cloud-hypervisor
HYPERVISOR_QEMU_VIRTIOFS = qemu-virtiofs
# Determines which hypervisor is specified in $(CONFIG_FILE).
DEFAULT_HYPERVISOR ?= $(HYPERVISOR_QEMU)
# List of hypervisors this build system can generate configuration for.
HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_QEMU_VIRTIOFS) $(HYPERVISOR_CLH)
HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH)
QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]
QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD)
QEMUVALIDVIRTIOFSPATHS := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD)
CLHPATH := $(CLHBINDIR)/$(CLHCMD)
CLHVALIDHYPERVISORPATHS := [\"$(CLHBINDIR)/$(CLHCMD)\"]
FCPATH = $(FCBINDIR)/$(FCCMD)
FCVALIDPATHS = [\"$(FCPATH)\"]
FCJAILERPATH = $(FCBINDIR)/$(FCJAILERCMD)
FCVALIDJAILERPATHS = [\"$(FCJAILERPATH)\"]
ACRNPATH := $(ACRNBINDIR)/$(ACRNCMD)
ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"]
ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD)
ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]
SHIMCMD := $(BIN_PREFIX)-shim
SHIMPATH := $(PKGLIBEXECDIR)/$(SHIMCMD)
@@ -160,6 +172,7 @@ DEFMEMSZ := 2048
DEFMEMSLOTS := 10
#Default number of bridges
DEFBRIDGES := 1
DEFENABLEANNOTATIONS := []
DEFDISABLEGUESTSECCOMP := true
#Default experimental features enabled
DEFAULTEXPFEATURES := []
@@ -170,21 +183,26 @@ DEFENTROPYSOURCE := /dev/urandom
DEFDISABLEBLOCK := false
DEFSHAREDFS := virtio-9p
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
DEFVIRTIOFSDAEMON := $(VIRTIOFSDBINDIR)/virtiofsd
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
# Default DAX mapping cache size in MiB
DEFVIRTIOFSCACHESIZE := 1024
#if value is 0, DAX is not enabled
DEFVIRTIOFSCACHESIZE := 0
DEFVIRTIOFSCACHE ?= auto
# Format example:
# [\"-o\", \"arg1=xxx,arg2\", \"-o\", \"hello world\", \"--arg3=yyy\"]
#
# see `virtiofsd -h` for possible options.
# Make sure you quote args.
DEFVIRTIOFSEXTRAARGS ?= []
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\"]
DEFENABLEIOTHREADS := false
DEFENABLEMEMPREALLOC := false
DEFENABLEHUGEPAGES := false
DEFENABLEVHOSTUSERSTORE := false
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
DEFFILEMEMBACKEND := ""
DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
DEFENABLESWAP := false
DEFENABLEDEBUG := false
DEFDISABLENESTINGCHECKS := false
@@ -244,28 +262,6 @@ ifneq (,$(QEMUCMD))
KERNELPATH = $(KERNELDIR)/$(KERNELNAME)
endif
ifneq (,$(QEMUVIRTIOFSCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_QEMU_VIRTIOFS)
CONFIG_FILE_QEMU_VIRTIOFS = configuration-qemu-virtiofs.toml
CONFIG_QEMU_VIRTIOFS = $(CLI_DIR)/config/$(CONFIG_FILE_QEMU_VIRTIOFS)
CONFIG_QEMU_VIRTIOFS_IN = $(CONFIG_QEMU_VIRTIOFS).in
CONFIG_PATH_QEMU_VIRTIOFS = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_VIRTIOFS))
CONFIG_PATHS += $(CONFIG_PATH_QEMU_VIRTIOFS)
SYSCONFIG_QEMU_VIRTIOFS = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_VIRTIOFS))
SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_VIRTIOFS)
CONFIGS += $(CONFIG_QEMU_VIRTIOFS)
# qemu-specific options (all should be suffixed by "_QEMU")
DEFBLOCKSTORAGEDRIVER_QEMU_VIRTIOFS := virtio-fs
DEFNETWORKMODEL_QEMU := tcfilter
KERNELNAMEVIRTIOFS = $(call MAKE_KERNEL_VIRTIOFS_NAME,$(KERNELTYPE))
KERNELVIRTIOFSPATH = $(KERNELDIR)/$(KERNELNAMEVIRTIOFS)
endif
ifneq (,$(CLHCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_CLH)
@@ -383,16 +379,28 @@ SHAREDIR := $(SHAREDIR)
# list of variables the user may wish to override
USER_VARS += ARCH
USER_VARS += BINDIR
USER_VARS += CONFIG_ACRN_IN
USER_VARS += CONFIG_CLH_IN
USER_VARS += CONFIG_FC_IN
USER_VARS += CONFIG_PATH
USER_VARS += CONFIG_QEMU_IN
USER_VARS += DESTDIR
USER_VARS += DEFAULT_HYPERVISOR
USER_VARS += DEFENABLEMSWAP
USER_VARS += ACRNCMD
USER_VARS += ACRNCTLCMD
USER_VARS += ACRNPATH
USER_VARS += ACRNVALIDHYPERVISORPATHS
USER_VARS += ACRNCTLPATH
USER_VARS += ACRNVALIDCTLPATHS
USER_VARS += CLHPATH
USER_VARS += CLHVALIDHYPERVISORPATHS
USER_VARS += FIRMWAREPATH_CLH
USER_VARS += FCCMD
USER_VARS += FCPATH
USER_VARS += FCVALIDHYPERVISORPATHS
USER_VARS += FCJAILERPATH
USER_VARS += FCVALIDJAILERPATHS
USER_VARS += SYSCONFIG
USER_VARS += IMAGENAME
USER_VARS += IMAGEPATH
@@ -404,6 +412,11 @@ USER_VARS += KERNELTYPE
USER_VARS += KERNELTYPE_FC
USER_VARS += KERNELTYPE_ACRN
USER_VARS += KERNELTYPE_CLH
USER_VARS += KERNELPATH_ACRN
USER_VARS += KERNELPATH
USER_VARS += KERNELPATH_CLH
USER_VARS += KERNELPATH_FC
USER_VARS += KERNELVIRTIOFSPATH
USER_VARS += FIRMWAREPATH
USER_VARS += MACHINEACCELERATORS
USER_VARS += CPUFEATURES
@@ -416,15 +429,22 @@ USER_VARS += PKGLIBDIR
USER_VARS += PKGLIBEXECDIR
USER_VARS += PKGRUNDIR
USER_VARS += PREFIX
USER_VARS += PROJECT_BUG_URL
USER_VARS += PROJECT_NAME
USER_VARS += PROJECT_ORG
USER_VARS += PROJECT_PREFIX
USER_VARS += PROJECT_TAG
USER_VARS += PROJECT_TYPE
USER_VARS += PROJECT_URL
USER_VARS += NETMONPATH
USER_VARS += QEMUBINDIR
USER_VARS += QEMUCMD
USER_VARS += QEMUPATH
USER_VARS += QEMUVALIDHYPERVISORPATHS
USER_VARS += QEMUVIRTIOFSCMD
USER_VARS += QEMUVIRTIOFSPATH
USER_VARS += QEMUVALIDVIRTIOFSPATHS
USER_VARS += RUNTIME_NAME
USER_VARS += SHAREDIR
USER_VARS += SHIMPATH
USER_VARS += SYSCONFDIR
@@ -435,6 +455,7 @@ USER_VARS += DEFMEMSZ
USER_VARS += DEFMEMSLOTS
USER_VARS += DEFBRIDGES
USER_VARS += DEFNETWORKMODEL_ACRN
USER_VARS += DEFNETWORKMODEL_CLH
USER_VARS += DEFNETWORKMODEL_FC
USER_VARS += DEFNETWORKMODEL_QEMU
USER_VARS += DEFDISABLEGUESTSECCOMP
@@ -443,18 +464,22 @@ USER_VARS += DEFDISABLEBLOCK
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU_VIRTIOFS
USER_VARS += DEFSHAREDFS
USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
USER_VARS += DEFVIRTIOFSDAEMON
USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS
USER_VARS += DEFVIRTIOFSCACHESIZE
USER_VARS += DEFVIRTIOFSCACHE
USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFENABLEMEMPREALLOC
USER_VARS += DEFENABLEHUGEPAGES
USER_VARS += DEFENABLEVHOSTUSERSTORE
USER_VARS += DEFVHOSTUSERSTOREPATH
USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS
USER_VARS += DEFFILEMEMBACKEND
USER_VARS += DEFVALIDFILEMEMBACKENDS
USER_VARS += DEFENABLESWAP
USER_VARS += DEFENABLEDEBUG
USER_VARS += DEFDISABLENESTINGCHECKS
@@ -581,7 +606,6 @@ $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST)
.PHONY: \
check \
check-go-static \
check-go-test \
coverage \
default \
install \
@@ -597,83 +621,7 @@ GENERATED_FILES += $(CONFIGS)
$(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
$(QUIET_GENERATE)$(SED) \
-e "s|@COMMIT@|$(shell cat .git-commit)|g" \
-e "s|@VERSION@|$(VERSION)|g" \
-e "s|@CONFIG_ACRN_IN@|$(CONFIG_ACRN_IN)|g" \
-e "s|@CONFIG_QEMU_IN@|$(CONFIG_QEMU_IN)|g" \
-e "s|@CONFIG_QEMU_VIRTIOFS_IN@|$(CONFIG_QEMU_VIRTIOFS_IN)|g" \
-e "s|@CONFIG_CLH_IN@|$(CONFIG_CLH_IN)|g" \
-e "s|@CONFIG_FC_IN@|$(CONFIG_FC_IN)|g" \
-e "s|@CONFIG_PATH@|$(CONFIG_PATH)|g" \
-e "s|@FCPATH@|$(FCPATH)|g" \
-e "s|@FCJAILERPATH@|$(FCJAILERPATH)|g" \
-e "s|@ACRNPATH@|$(ACRNPATH)|g" \
-e "s|@ACRNCTLPATH@|$(ACRNCTLPATH)|g" \
-e "s|@CLHPATH@|$(CLHPATH)|g" \
-e "s|@SYSCONFIG@|$(SYSCONFIG)|g" \
-e "s|@IMAGEPATH@|$(IMAGEPATH)|g" \
-e "s|@KERNELPATH_ACRN@|$(KERNELPATH_ACRN)|g" \
-e "s|@KERNELPATH_FC@|$(KERNELPATH_FC)|g" \
-e "s|@KERNELPATH_CLH@|$(KERNELPATH_CLH)|g" \
-e "s|@KERNELPATH@|$(KERNELPATH)|g" \
-e "s|@KERNELVIRTIOFSPATH@|$(KERNELVIRTIOFSPATH)|g" \
-e "s|@INITRDPATH@|$(INITRDPATH)|g" \
-e "s|@FIRMWAREPATH@|$(FIRMWAREPATH)|g" \
-e "s|@MACHINEACCELERATORS@|$(MACHINEACCELERATORS)|g" \
-e "s|@CPUFEATURES@|$(CPUFEATURES)|g" \
-e "s|@FIRMWAREPATH_CLH@|$(FIRMWAREPATH_CLH)|g" \
-e "s|@DEFMACHINETYPE_CLH@|$(DEFMACHINETYPE_CLH)|g" \
-e "s|@KERNELPARAMS@|$(KERNELPARAMS)|g" \
-e "s|@LOCALSTATEDIR@|$(LOCALSTATEDIR)|g" \
-e "s|@PKGLIBEXECDIR@|$(PKGLIBEXECDIR)|g" \
-e "s|@PKGRUNDIR@|$(PKGRUNDIR)|g" \
-e "s|@NETMONPATH@|$(NETMONPATH)|g" \
-e "s|@PROJECT_BUG_URL@|$(PROJECT_BUG_URL)|g" \
-e "s|@PROJECT_URL@|$(PROJECT_URL)|g" \
-e "s|@PROJECT_NAME@|$(PROJECT_NAME)|g" \
-e "s|@PROJECT_TAG@|$(PROJECT_TAG)|g" \
-e "s|@PROJECT_TYPE@|$(PROJECT_TYPE)|g" \
-e "s|@QEMUPATH@|$(QEMUPATH)|g" \
-e "s|@QEMUVIRTIOFSPATH@|$(QEMUVIRTIOFSPATH)|g" \
-e "s|@RUNTIME_NAME@|$(TARGET)|g" \
-e "s|@MACHINETYPE@|$(MACHINETYPE)|g" \
-e "s|@SHIMPATH@|$(SHIMPATH)|g" \
-e "s|@DEFVCPUS@|$(DEFVCPUS)|g" \
-e "s|@DEFMAXVCPUS@|$(DEFMAXVCPUS)|g" \
-e "s|@DEFMAXVCPUS_ACRN@|$(DEFMAXVCPUS_ACRN)|g" \
-e "s|@DEFMEMSZ@|$(DEFMEMSZ)|g" \
-e "s|@DEFMEMSLOTS@|$(DEFMEMSLOTS)|g" \
-e "s|@DEFBRIDGES@|$(DEFBRIDGES)|g" \
-e "s|@DEFNETWORKMODEL_ACRN@|$(DEFNETWORKMODEL_ACRN)|g" \
-e "s|@DEFNETWORKMODEL_CLH@|$(DEFNETWORKMODEL_CLH)|g" \
-e "s|@DEFNETWORKMODEL_FC@|$(DEFNETWORKMODEL_FC)|g" \
-e "s|@DEFNETWORKMODEL_QEMU@|$(DEFNETWORKMODEL_QEMU)|g" \
-e "s|@DEFDISABLEGUESTSECCOMP@|$(DEFDISABLEGUESTSECCOMP)|g" \
-e "s|@DEFAULTEXPFEATURES@|$(DEFAULTEXPFEATURES)|g" \
-e "s|@DEFDISABLEBLOCK@|$(DEFDISABLEBLOCK)|g" \
-e "s|@DEFBLOCKSTORAGEDRIVER_ACRN@|$(DEFBLOCKSTORAGEDRIVER_ACRN)|g" \
-e "s|@DEFBLOCKSTORAGEDRIVER_FC@|$(DEFBLOCKSTORAGEDRIVER_FC)|g" \
-e "s|@DEFBLOCKSTORAGEDRIVER_QEMU@|$(DEFBLOCKSTORAGEDRIVER_QEMU)|g" \
-e "s|@DEFBLOCKSTORAGEDRIVER_QEMU_VIRTIOFS@|$(DEFBLOCKSTORAGEDRIVER_QEMU_VIRTIOFS)|g" \
-e "s|@DEFSHAREDFS@|$(DEFSHAREDFS)|g" \
-e "s|@DEFSHAREDFS_QEMU_VIRTIOFS@|$(DEFSHAREDFS_QEMU_VIRTIOFS)|g" \
-e "s|@DEFVIRTIOFSDAEMON@|$(DEFVIRTIOFSDAEMON)|g" \
-e "s|@DEFVIRTIOFSCACHESIZE@|$(DEFVIRTIOFSCACHESIZE)|g" \
-e "s|@DEFVIRTIOFSCACHE@|$(DEFVIRTIOFSCACHE)|g" \
-e "s|@DEFVIRTIOFSEXTRAARGS@|$(DEFVIRTIOFSEXTRAARGS)|g" \
-e "s|@DEFENABLEIOTHREADS@|$(DEFENABLEIOTHREADS)|g" \
-e "s|@DEFENABLEMEMPREALLOC@|$(DEFENABLEMEMPREALLOC)|g" \
-e "s|@DEFENABLEHUGEPAGES@|$(DEFENABLEHUGEPAGES)|g" \
-e "s|@DEFENABLEVHOSTUSERSTORE@|$(DEFENABLEVHOSTUSERSTORE)|g" \
-e "s|@DEFVHOSTUSERSTOREPATH@|$(DEFVHOSTUSERSTOREPATH)|g" \
-e "s|@DEFENABLEMSWAP@|$(DEFENABLESWAP)|g" \
-e "s|@DEFENABLEDEBUG@|$(DEFENABLEDEBUG)|g" \
-e "s|@DEFDISABLENESTINGCHECKS@|$(DEFDISABLENESTINGCHECKS)|g" \
-e "s|@DEFMSIZE9P@|$(DEFMSIZE9P)|g" \
-e "s|@DEFHOTPLUGVFIOONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFPCIEROOTPORT@|$(DEFPCIEROOTPORT)|g" \
-e "s|@DEFENTROPYSOURCE@|$(DEFENTROPYSOURCE)|g" \
-e "s|@DEFSANDBOXCGROUPONLY@|$(DEFSANDBOXCGROUPONLY)|g" \
-e "s|@FEATURE_SELINUX@|$(FEATURE_SELINUX)|g" \
$(foreach v,$(GENERATED_VARS),-e "s|@$v@|$($v)|g") \
$< > $@
generate-config: $(CONFIGS)
@@ -686,9 +634,9 @@ go-test: $(GENERATED_FILES)
go test -v -mod=vendor ./...
check-go-static:
$(QUIET_CHECK).ci/static-checks.sh
$(QUIET_CHECK).ci/go-no-os-exit.sh ./cli
$(QUIET_CHECK).ci/go-no-os-exit.sh ./virtcontainers
$(QUIET_CHECK)../../ci/static-checks.sh
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cli
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./virtcontainers
coverage:
go test -v -mod=vendor -covermode=atomic -coverprofile=coverage.txt ./...

View File

@@ -25,6 +25,9 @@ const projectPrefix = "@PROJECT_TYPE@"
// original URL for this project
const projectURL = "@PROJECT_URL@"
// Project URL's organisation name
const projectORG = "@PROJECT_ORG@"
const defaultRootDirectory = "@PKGRUNDIR@"
// commit is the git commit the runtime is compiled from.

View File

@@ -16,6 +16,22 @@ ctlpath = "@ACRNCTLPATH@"
kernel = "@KERNELPATH_ACRN@"
image = "@IMAGEPATH@"
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = @DEFENABLEANNOTATIONS@
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @ACRNVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @ACRNVALIDHYPERVISORPATHS@
# List of valid annotations values for ctlpath
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @ACRNVALIDCTLPATHS@
valid_ctlpaths = @ACRNVALIDCTLPATHS@
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
@@ -127,6 +143,13 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@"
#trace_mode = "dynamic"
#trace_type = "isolated"
# Enable debug console.
# If enabled, user can connect guest OS running inside hypervisor
# through "kata-runtime exec <sandbox-id>" command
#debug_console_enabled = true
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional

View File

@@ -15,6 +15,17 @@ path = "@CLHPATH@"
kernel = "@KERNELPATH_CLH@"
image = "@IMAGEPATH@"
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = @DEFENABLEANNOTATIONS@
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @CLHVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @CLHVALIDHYPERVISORPATHS@
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
@@ -62,6 +73,11 @@ default_memory = @DEFMEMSZ@
# Path to vhost-user-fs daemon.
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
# List of valid annotations values for the virtiofs daemon
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
# Default size of DAX cache in MiB
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
@@ -125,6 +141,12 @@ block_device_driver = "virtio-blk"
#trace_mode = "dynamic"
#trace_type = "isolated"
# Enable debug console.
# If enabled, user can connect guest OS running inside hypervisor
# through "kata-runtime exec <sandbox-id>" command
#debug_console_enabled = true
[netmon]
# If enabled, the network monitoring process gets started when the

View File

@@ -12,6 +12,20 @@
[hypervisor.firecracker]
path = "@FCPATH@"
kernel = "@KERNELPATH_FC@"
image = "@IMAGEPATH@"
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = @DEFENABLEANNOTATIONS@
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @FCVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @FCVALIDHYPERVISORPATHS@
# Path for the jailer specific to firecracker
# If the jailer path is not set kata will launch firecracker
# without a jail. If the jailer is set firecracker will be
@@ -19,8 +33,13 @@ path = "@FCPATH@"
# This is disabled by default as additional setup is required
# for this feature today.
#jailer_path = "@FCJAILERPATH@"
kernel = "@KERNELPATH_FC@"
image = "@IMAGEPATH@"
# List of valid jailer path values for the hypervisor
# Each member of the list can be a regular expression
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @FCVALIDJAILERPATHS@
valid_jailer_paths = @FCVALIDJAILERPATHS@
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -87,10 +106,10 @@ default_memory = @DEFMEMSZ@
#memory_offset = 0
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# 9pfs is used instead to pass the rootfs.
disable_block_device_use = @DEFDISABLEBLOCK@
@@ -126,7 +145,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# stacks within the container. This will automatically
# result in memory pre allocation
#enable_hugepages = true
@@ -256,6 +275,13 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
#
kernel_modules=[]
# Enable debug console.
# If enabled, user can connect guest OS running inside hypervisor
# through "kata-runtime exec <sandbox-id>" command
#debug_console_enabled = true
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional

View File

@@ -1,440 +0,0 @@
# Copyright (c) 2017-2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# XXX: WARNING: this file is auto-generated.
# XXX:
# XXX: Source file: "@CONFIG_QEMU_VIRTIOFS_IN@"
# XXX: Project:
# XXX: Name: @PROJECT_NAME@
# XXX: Type: @PROJECT_TYPE@
[hypervisor.qemu]
path = "@QEMUVIRTIOFSPATH@"
kernel = "@KERNELVIRTIOFSPATH@"
image = "@IMAGEPATH@"
machine_type = "@MACHINETYPE@"
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
#
# WARNING: - any parameter specified here will take priority over the default
# parameter value of the same name used to start the virtual machine.
# Do not set values here unless you understand the impact of doing so as you
# may stop the virtual machine from booting.
# To see the list of default parameters, enable hypervisor debug, create a
# container and look for 'default-kernel-parameters' log entries.
kernel_params = "@KERNELPARAMS@"
# Path to the firmware.
# If you want that qemu uses the default firmware leave this option empty
firmware = "@FIRMWAREPATH@"
# Machine accelerators
# comma-separated list of machine accelerators to pass to the hypervisor.
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
machine_accelerators="@MACHINEACCELERATORS@"
# CPU features
# comma-separated list of cpu features to pass to the cpu
# For example, `cpu_features = "pmu=off,vmx=off"
cpu_features="@CPUFEATURES@"
# Default number of vCPUs per SB/VM:
# unspecified or 0 --> will be set to @DEFVCPUS@
# < 0 --> will be set to the actual number of physical cores
# > 0 <= number of physical cores --> will be set to the specified number
# > number of physical cores --> will be set to the actual number of physical cores
default_vcpus = 1
# Default maximum number of vCPUs per SB/VM:
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
# of vCPUs supported by KVM if that number is exceeded
# > 0 <= number of physical cores --> will be set to the specified number
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
# of vCPUs supported by KVM if that number is exceeded
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
# the actual number of physical cores is greater than it.
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
# unless you know what are you doing.
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
default_maxvcpus = @DEFMAXVCPUS@
# Bridges can be used to hot plug devices.
# Limitations:
# * Currently only pci bridges are supported
# * Until 30 devices per bridge can be hot plugged.
# * Until 5 PCI bridges can be cold plugged per VM.
# This limitation could be a bug in qemu or in the kernel
# Default number of bridges per SB/VM:
# unspecified or 0 --> will be set to @DEFBRIDGES@
# > 1 <= 5 --> will be set to the specified number
# > 5 --> will be set to 5
default_bridges = @DEFBRIDGES@
# Default memory size in MiB for SB/VM.
# If unspecified then it will be set @DEFMEMSZ@ MiB.
default_memory = @DEFMEMSZ@
#
# Default memory slots per SB/VM.
# If unspecified then it will be set @DEFMEMSLOTS@.
# This is will determine the times that memory will be hotadded to sandbox/VM.
#memory_slots = @DEFMEMSLOTS@
# The size in MiB will be plused to max memory of hypervisor.
# It is the memory address space for the NVDIMM devie.
# If set block storage driver (block_device_driver) to "nvdimm",
# should set memory_offset to the size of block device.
# Default 0
#memory_offset = 0
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# 9pfs is used instead to pass the rootfs.
disable_block_device_use = @DEFDISABLEBLOCK@
# Shared file system type:
# - virtio-fs (default)
# - virtio-9p
shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@"
# Path to vhost-user-fs daemon.
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
# Default size of DAX cache in MiB
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
# Extra args for virtiofsd daemon
#
# Format example:
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
#
# see `virtiofsd -h` for possible options.
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
# Cache mode:
#
# - none
# Metadata, data, and pathname lookup are not cached in guest. They are
# always fetched from host and any changes are immediately pushed to host.
#
# - auto
# Metadata and pathname lookup cache expires after a configured amount of
# time (default is 1 second). Data is cached while the file is open (close
# to open consistency).
#
# - always
# Metadata, data, and pathname lookup are cached in guest and never expire.
virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
# Block storage driver to be used for the hypervisor in case the container
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
# or nvdimm.
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
# Specifies cache-related options will be set to block devices or not.
# Default false
#block_device_cache_set = true
# Specifies cache-related options for block devices.
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
# Default false
#block_device_cache_direct = true
# Specifies cache-related options for block devices.
# Denotes whether flush requests for the device are ignored.
# Default false
#block_device_cache_noflush = true
# Enable iothreads (data-plane) to be used. This causes IO to be
# handled in a separate IO thread. This is currently only implemented
# for SCSI.
#
enable_iothreads = @DEFENABLEIOTHREADS@
# Enable pre allocation of VM RAM, default false
# Enabling this will result in lower container density
# as all of the memory will be allocated and locked
# This is useful when you want to reserve all the memory
# upfront or in the cases where you want memory latencies
# to be very predictable
# Default false
#enable_mem_prealloc = true
# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# result in memory pre allocation
#enable_hugepages = true
# Enable vhost-user storage device, default false
# Enabling this will result in some Linux reserved block type
# major range 240-254 being chosen to represent vhost-user devices.
enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
# The base directory specifically used for vhost-user devices.
# Its sub-path "block" is used for block devices; "block/sockets" is
# where we expect vhost-user sockets to live; "block/devices" is where
# simulated block device nodes for vhost-user devices to live.
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Enable vIOMMU, default false
# Enabling this will result in the VM having a vIOMMU device
# This will also add the following options to the kernel's
# command line: intel_iommu=on,iommu=pt
#enable_iommu = true
# Enable IOMMU_PLATFORM, default false
# Enabling this will result in the VM device having iommu_platform=on set
#enable_iommu_platform = true
# Enable file based guest memory support. The default is an empty string which
# will disable this feature. In the case of virtio-fs, this is enabled
# automatically and '/dev/shm' is used as the backing folder.
# This option will be ignored if VM templating is enabled.
#file_mem_backend = ""
# Enable swap of vm memory. Default false.
# The behaviour is undefined if mem_prealloc is also set to true
#enable_swap = true
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available.
#
# Default false
#enable_debug = true
# Disable the customizations done in the runtime when it detects
# that it is running on top a VMM. This will result in the runtime
# behaving as it would when running on bare metal.
#
#disable_nesting_checks = true
# This is the msize used for 9p shares. It is the number of bytes
# used for 9p packet payload.
#msize_9p = @DEFMSIZE9P@
# If false and nvdimm is supported, use nvdimm device to plug guest image.
# Otherwise virtio-block device is used.
# Default false
#disable_image_nvdimm = true
# VFIO devices are hotplugged on a bridge by default.
# Enable hotplugging on root bus. This may be required for devices with
# a large PCI bar, as this is a current limitation with hotplugging on
# a bridge. This value is valid for "pc" machine type.
# Default false
#hotplug_vfio_on_root_bus = true
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
#disable_vhost_net = true
#
# Default entropy source.
# The path to a host source of entropy (including a real hardware RNG)
# /dev/urandom and /dev/random are two main options.
# Be aware that /dev/random is a blocking source of entropy. If the host
# runs out of entropy, the VMs boot time will increase leading to get startup
# timeouts.
# The source of entropy /dev/urandom is non-blocking and provides a
# generally acceptable source of entropy. It should work well for pretty much
# all practical purposes.
#entropy_source= "@DEFENTROPYSOURCE@"
# Path to OCI hook binaries in the *guest rootfs*.
# This does not affect host-side hooks which must instead be added to
# the OCI spec passed to the runtime.
#
# You can create a rootfs with hooks by customizing the osbuilder scripts:
# https://github.com/kata-containers/osbuilder
#
# Hooks must be stored in a subdirectory of guest_hook_path according to their
# hook type, i.e. "guest_hook_path/{prestart,postart,poststop}".
# The agent will scan these directories for executable files and add them, in
# lexicographical order, to the lifecycle of the guest container.
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
# Warnings will be logged if any error is encountered will scanning for hooks,
# but it will not abort container execution.
#guest_hook_path = "/usr/share/oci/hooks"
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
# agent memory by mapping it readonly. It helps speeding up new container
# creation and saves a lot of memory if there are many kata containers running
# on the same host.
#
# When disabled, new VMs are created from scratch.
#
# Note: Requires "initrd=" to be set ("image=" is not supported).
#
# Default false
#enable_template = true
# Specifies the path of template.
#
# Default "/run/vc/vm/template"
#template_path = "/run/vc/vm/template"
# The number of caches of VMCache:
# unspecified or == 0 --> VMCache is disabled
# > 0 --> will be set to the specified number
#
# VMCache is a function that creates VMs as caches before using it.
# It helps speed up new container creation.
# The function consists of a server and some clients communicating
# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto.
# The VMCache server will create some VMs and cache them by factory cache.
# It will convert the VM to gRPC format and transport it when gets
# requestion from clients.
# Factory grpccache is the VMCache client. It will request gRPC format
# VM and convert it back to a VM. If VMCache function is enabled,
# kata-runtime will request VM from factory grpccache when it creates
# a new sandbox.
#
# Default 0
#vm_cache_number = 0
# Specify the address of the Unix socket that is used by VMCache.
#
# Default /var/run/kata-containers/cache.sock
#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)
#enable_debug = true
# Enable agent tracing.
#
# If enabled, the default trace mode is "dynamic" and the
# default trace type is "isolated". The trace mode and type are set
# explicity with the `trace_type=` and `trace_mode=` options.
#
# Notes:
#
# - Tracing is ONLY enabled when `enable_tracing` is set: explicitly
# setting `trace_mode=` and/or `trace_type=` without setting `enable_tracing`
# will NOT activate agent tracing.
#
# - See https://github.com/kata-containers/agent/blob/master/TRACING.md for
# full details.
#
# (default: disabled)
#enable_tracing = true
#
#trace_mode = "dynamic"
#trace_type = "isolated"
# Comma separated list of kernel modules and their parameters.
# These modules will be loaded in the guest kernel using modprobe(8).
# The following example can be used to load two kernel modules with parameters
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
# The first word is considered as the module name and the rest as its parameters.
# Container will not be started when:
# * A kernel module is specified and the modprobe command is not installed in the guest
# or it fails loading the module.
# * The module is not available in the guest or it doesn't met the guest kernel
# requirements, like architecture and version.
#
kernel_modules=[]
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional
# network being added to the existing network namespace, after the
# sandbox has been created.
# (default: disabled)
#enable_netmon = true
# Specify the path to the netmon binary.
path = "@NETMONPATH@"
# If enabled, netmon messages will be sent to the system log
# (default: disabled)
#enable_debug = true
[runtime]
# If enabled, the runtime will log additional debug messages to the
# system log
# (default: disabled)
#enable_debug = true
#
# Internetworking model
# Determines how the VM should be connected to the
# the container network interface
# Options:
#
# - bridged (Deprecated)
# Uses a linux bridge to interconnect the container interface to
# the VM. Works for most cases except macvlan and ipvlan.
# ***NOTE: This feature has been deprecated with plans to remove this
# feature in the future. Please use other network models listed below.
#
# - macvtap
# Used when the Container network interface can be bridged using
# macvtap.
#
# - none
# Used when customize network. Only creates a tap device. No veth pair.
#
# - tcfilter
# Uses tc filter rules to redirect traffic from the network interface
# provided by plugin to a tap interface connected to the VM.
#
internetworking_model="@DEFNETWORKMODEL_QEMU@"
# disable guest seccomp
# Determines whether container seccomp profiles are passed to the virtual
# machine and applied by the kata agent. If set to true, seccomp is not applied
# within the guest
# (default: true)
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# If enabled, the runtime will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
# (default: disabled)
#enable_tracing = true
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `enable_netmon`
# `disable_new_netns` conflicts with `internetworking_model=bridged` and `internetworking_model=macvtap`. It works only
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
# (like OVS) directly.
# If you are using docker, `disable_new_netns` only works with `docker run --net=none`
# (default: false)
#disable_new_netns = true
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
# The container cgroups in the host are not created, just one single cgroup per sandbox.
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.
# Supported experimental features:
# (default: [])
experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true

View File

@@ -16,6 +16,17 @@ kernel = "@KERNELPATH@"
image = "@IMAGEPATH@"
machine_type = "@MACHINETYPE@"
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = @DEFENABLEANNOTATIONS@
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
@@ -101,21 +112,26 @@ default_memory = @DEFMEMSZ@
#enable_virtio_mem = true
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# 9pfs is used instead to pass the rootfs.
disable_block_device_use = @DEFDISABLEBLOCK@
# Shared file system type:
# - virtio-9p (default)
# - virtio-fs
shared_fs = "@DEFSHAREDFS@"
shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@"
# Path to vhost-user-fs daemon.
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
# List of valid annotations values for the virtiofs daemon
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
# Default size of DAX cache in MiB
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
@@ -180,7 +196,7 @@ enable_iothreads = @DEFENABLEIOTHREADS@
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# stacks within the container. This will automatically
# result in memory pre allocation
#enable_hugepages = true
@@ -205,11 +221,21 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Enabling this will result in the VM device having iommu_platform=on set
#enable_iommu_platform = true
# List of valid annotations values for the vhost user store path
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
# Enable file based guest memory support. The default is an empty string which
# will disable this feature. In the case of virtio-fs, this is enabled
# automatically and '/dev/shm' is used as the backing folder.
# This option will be ignored if VM templating is enabled.
#file_mem_backend = ""
#file_mem_backend = "@DEFFILEMEMBACKEND@"
# List of valid annotations values for the file_mem_backend annotation
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@
valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
# Enable swap of vm memory. Default false.
# The behaviour is undefined if mem_prealloc is also set to true
@@ -217,17 +243,17 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available.
#
#
# Default false
#enable_debug = true
# Disable the customizations done in the runtime when it detects
# that it is running on top a VMM. This will result in the runtime
# behaving as it would when running on bare metal.
#
#
#disable_nesting_checks = true
# This is the msize used for 9p shares. It is the number of bytes
# This is the msize used for 9p shares. It is the number of bytes
# used for 9p packet payload.
#msize_9p = @DEFMSIZE9P@
@@ -236,9 +262,9 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Default is false
#disable_image_nvdimm = true
# VFIO devices are hotplugged on a bridge by default.
# VFIO devices are hotplugged on a bridge by default.
# Enable hotplugging on root bus. This may be required for devices with
# a large PCI bar, as this is a current limitation with hotplugging on
# a large PCI bar, as this is a current limitation with hotplugging on
# a bridge. This value is valid for "pc" machine type.
# Default false
#hotplug_vfio_on_root_bus = true
@@ -251,7 +277,7 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
#pcie_root_port = 2
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
# security (vhost-net runs ring0) for network I/O performance.
#disable_vhost_net = true
#
@@ -375,6 +401,12 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
#
kernel_modules=[]
# Enable debug console.
# If enabled, user can connect guest OS running inside hypervisor
# through "kata-runtime exec <sandbox-id>" command
#debug_console_enabled = true
[netmon]
# If enabled, the network monitoring process gets started when the

View File

@@ -10,6 +10,7 @@ import (
"os"
"testing"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/stretchr/testify/assert"
)
@@ -22,6 +23,9 @@ func TestConsoleFromFile(t *testing.T) {
}
func TestNewConsole(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
assert := assert.New(t)
console, err := newConsole()
@@ -34,6 +38,9 @@ func TestNewConsole(t *testing.T) {
}
func TestIsTerminal(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
assert := assert.New(t)
var fd uintptr = 4

View File

@@ -22,9 +22,9 @@ func shimConfig(config *shim.Config) {
func main() {
if len(os.Args) == 2 && os.Args[1] == "--version" {
fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", project, types.KataRuntimeName, version, commit)
fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", project, types.DefaultKataRuntimeName, version, commit)
os.Exit(0)
}
shim.Run(types.KataRuntimeName, containerdshim.New, shimConfig)
shim.Run(types.DefaultKataRuntimeName, containerdshim.New, shimConfig)
}

Some files were not shown because too many files have changed in this diff Show More