Compare commits

...

279 Commits

Author SHA1 Message Date
snir911
a57515bdae Merge pull request #4384 from snir911/2.5.0-alpha2-branch-bump
# Kata Containers 2.5.0-alpha2
2022-06-08 19:32:57 +03:00
Eric Ernst
4ebf9d38b9 Merge pull request #4310 from egernst/core-sched
shim: add support for core scheduling
2022-06-08 17:42:45 +02:00
Snir Sheriber
eb24e97150 release: Kata Containers 2.5.0-alpha2
- docs: Update storage documentation link
- rustjail: get home dir using nix crate
- runk: Support `list` sub-command
- docs: Update vGPU use-case
- runtime: ignore ESRCH error from stop container
- docs: Update configuration reference for snap documentation
- workflows: add workflow_dispatch triggering to test-kata-deploy
- snap: Use helper script and cleanup
- feature: add ability to interact with IPTables within the guest
- agent: return mount file content if parse mountinfo failed
- docs: Update Intel QAT documentation links
- osbuilder: add iptables package
- runk: Return error when tty is used without console socket
- runk: Add Podman guide in README
- agent: Pass standard I/O to container launched by runk
- agent, runk: Enable test for the agent built with standard-oci-runtime feature
- runk: Handle rootfs path in config.json properly
- Update containerd docs
- clh: Update to v24.0
- snap: Build and package rust version of virtiofsd
- runk: merge oci-kata-agent into runk
- virtiofsd: static build virtiofsd from rust code for non-x86
- Fix issues with direct-volume stats feature
- runtime: fix incorrect Action function for direct-volume stats
- runtime: Adding the correct detection of mediated PCIe devices
- runtime: remove duplicate 'types' import
- runtime: sync docstrings with function names
- qemu: allow using legacy serial device for the console
- docs: Remove clear containers reference in README
- runtime: do not check for EOF error in console watcher
- kernel: Remove nemu.conf from packaging
- tools: delete unused param from get_from_kata_deps callers
- agent: Fix is_signal_handled failing parsing str to u64
- Improve Go unit test script
- packaging: Add kernel config option for SGX in Gramine
- ci: Don't run Docs URL Alive Check workflow on forks
- tools: Add QEMU patches for SGX numa support
- docs: Update runc containerd runtime
- Build and distribute the rust version of virtiofsd
- doc: Update log parser link
- Move the kata-log-parser from the tests repo
- versions: Upgrade to Cloud Hypervisor v23.1
- agent: Add a macro to skip a loop easier
- runk: use custom Kill command to support --all option
- agent: add test coverage for functions find_process and online_resources

fe3c1d9cd docs: Update storage documentation link
9d27c1fce agent: ignore ESRCH error when destroying containers
9726f56fd runtime: force stop container after the container process exits
168f325c4 docs: Update configuration reference for snap documentation
38a318820 runk: Support `list` sub-command
b9fc24ff3 docs: update release process github token instructions
c1476a174 docs: update release process with latest workflow triggering
002f2cd10 snap: Use helper script and cleanup
2e04833fb docs: Update Intel QAT documentation links
8b57bf97a workflows: add workflow_dispatch triggering to test-kata-deploy
6d0ff901a docs: Update vGPU use-case
9b108d993 docs: Improve snap formatting
894f661cc docs: Add warning to snap build
d759f6c3e snap: Fix CH architecture check
590381574 agent: Pass standard I/O to container launched by runk
af2ef3f7a agent-ctl: introduce handle for iptables get/set
65f0cef16 kata-runtime: add iptables CLI to test http endpoint
3201ad083 shim-client: ensure we check resp status for Put/Post
0706fb28a kata-runtime: shmgmt: make url usage consistent
2a09378dd shim-client: add support for DoPut
640173cfc shim-mgmt: Add endpoint handler for interacting with iptables
0136be22c virtcontainers: plumb iptable set/get from sandbox to agent
bd50d463b agent: iptables: get/set handling for iptables
7c4049aab osbuilder: add iptables package
03176a9e0 proto: update generated code based on proto update
38ebbc705 proto: update to add set/get iptables
78d45b434 agent: return mount file content if parse mountinfo failed
c7b3941c9 runk: Enable test for the agent built with standard-oci-runtime feature
6dbce7c3d agent: Remove unused import in console test
6ecea84bc rustjail: get home dir using nix crate
648b8d0ae runk: Return error when tty is used without console socket
5205efd9b runk: Add Podman guide in README
d862ca059 runk: Handle rootfs path in config.json properly
56591804b docs: Improve snap build instructions
cb2b30970 snap: Build using destructive mode
60823abb9 docs: Move snap README
fff832874 clh: Update to v24.0
49361749e snap: Build and package rust version of virtiofsd
27d903b76 snap: Put the yq binary in the staging bin directory
d7b4ce049 snap: Remove unused variable
43de5440e snap: Fix unbound variable error
c9b291509 snap: Fix whitespace
122a85e22 agent: remove bin oci-kata-agent
35619b45a runk: merge oci-kata-agent into runk
10c13d719 qemu: remove virtiofsd option in qemu config
d20bc5a4d virtiofsd: build rust based virtiofsd from source for non-x86_64
c95ba63c0 docs: Remove information related to Kata 1.x
34b80382b docs: Get rid of note related to networking.
dfad5728a docs: Mention --cni flag while invoking ctr
8e7c5975c agent: fix direct-assigned volume stats
4428ceae1 runtime: direct-volume stats use correct name
ffdc065b4 runtime: direct-volume stats update to use GET parameter
f29595318 runtime: fix incorrect Action function for direct-volume stats
7a5ccd126 runtime: sync docstrings with function names
ce2e521a0 runtime: remove duplicate 'types' import
834f93ce8 docs: fix annotations example
f4994e486 runtime: allow annotation configuration to use_legacy_serial
24a2b0f6a docs: Remove clear containers reference in README
abad33eba kernel: Remove nemu.conf from packaging
e87eb13c4 tools: delete unused param from get_from_kata_deps callers
8052fe62f runtime: do not check for EOF error in console watcher
c67b9d297 qemu: allow using legacy serial device for the console
44814dce1 qemu: treat console kernel params within appendConsole
4f586d2a9 packaging: Add kernel config option for SGX in Gramine
4b437d91f agent: Fix is_signal_handled failing parsing str to u64
88fb9b72e docs: Update runc containerd runtime
d1f2852d8 tools: Stop building virtiofsd with qemu (for x86_64)
c39852e83 runtime: Use ${LIBEXEC}/virtiofsd as the default virtiofsd path
b4b9068cb tools: Add QEMU patches for SGX numa support
a475956ab workflows: Add support for building virtiofsd
71f59f3a7 local-build: Add support for building virtiofsd
c7ac55b6d dockerbuild: Install unzip
8e2042d05 tools: add script to pull virtiofsd
dbedea508 versions: Add virtiofsd entry
e73b70baf runtime: Don't run unit tests verbose by default
f24a6e761 runtime: Consolidate flags setting in unit tests script
cf465feb0 runtime: Don't change test behaviour based on $CI or $KATA_DEV_MODE
34c4ac599 runtime: Remove redundant subcommands from go-test.sh
0aff5aaa3 runtime: Simplify package listing in go-test.sh
557c4cfd0 runtime: Don't chmod coverage files in Go tests
04c8b52e0 runtime: Remove HTML coverage option from go-test.sh
7f7691442 runtime: Add coverage.txt.tmp to gitignore
13c257700 runtime: Move go testing script locally
421064680 doc: Update log parser link
271933fec log-parser: fix some of the documentation
c7dacb121 log-parser: move the kata-log-parser from the tests repo
82ea01828 versions: Upgrade to Cloud Hypervisor v23.1
2a1d39414 runtime: Adding the correct detection of mediated PCIe devices
7bc4ab68c ci: Don't run Docs URL Alive Check workflow on forks
475e3bf38 agent: add test coverage for functions find_process and online_resources
383be2203 agent: Add a macro to skip a loop easier
97d7b1845 runk: use custom Kill command to support --all option

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-06-08 11:56:30 +03:00
GabyCT
5bd81ba232 Merge pull request #4399 from GabyCT/topic/updatestoragedoc
docs: Update storage documentation link
2022-06-07 09:13:45 -05:00
Gabriela Cervantes
fe3c1d9cdd docs: Update storage documentation link
This PR updates the storage documentation link for the devicemapper
snapshotter.

Fixes #4398

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-06-06 14:48:34 +00:00
Bin Liu
a238d8c6bd Merge pull request #4300 from justxuewei/fix/rustjail/home-env
rustjail: get home dir using nix crate
2022-06-06 11:03:46 +08:00
Bin Liu
f981190621 Merge pull request #4383 from cyyzero/runk-list
runk: Support `list` sub-command
2022-06-06 10:25:33 +08:00
Bin Liu
f7b22eb777 Merge pull request #4344 from zvonkok/vgpu-documentation
docs: Update vGPU use-case
2022-06-06 10:25:05 +08:00
Eric Ernst
430da47215 Merge pull request #4360 from fengwang666/shim-leak
runtime: ignore ESRCH error from stop container
2022-06-02 12:42:19 -07:00
GabyCT
9c9e5984ba Merge pull request #4342 from GabyCT/topic/updatesnapdoc
docs: Update configuration reference for snap documentation
2022-06-02 14:00:22 -05:00
Feng Wang
9d27c1fced agent: ignore ESRCH error when destroying containers
destroy() method should ignore the ESRCH error from signal::kill
and continue the operation as ESRCH is often considered harmless.

Fixes: #4359

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2022-06-02 08:19:48 -07:00
Feng Wang
9726f56fdc runtime: force stop container after the container process exits
Set thestop container force flag to true so that the container state is always set to
“StateStopped” after the container wait goroutine is finished. This is necessary for
the following delete container step to succeed.

Fixes: #4359

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2022-06-02 08:17:08 -07:00
Gabriela Cervantes
168f325c43 docs: Update configuration reference for snap documentation
This PR updates the url link for the kata containers configuration
for the general snap documentation.

Fixes #4341

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-06-02 14:55:06 +00:00
Chen Yiyang
38a3188206 runk: Support list sub-command
Support list sub-command. It will traverse the root directory, parse
status file and print basic information of containers. Behavior and
print format consistent with runc. To handle race with runk delete
or system user modify, the loop will continue to traverse when errors
are encountered.

Fixes: #4362

Signed-off-by: Chen Yiyang <cyyzero@qq.com>
2022-06-02 18:24:51 +08:00
snir911
a0805742d6 Merge pull request #4350 from snir911/fix_workflow
workflows: add workflow_dispatch triggering to test-kata-deploy
2022-06-02 13:19:13 +03:00
Fabiano Fidêncio
24182d72d9 Merge pull request #4322 from jodh-intel/snap-cleanup
snap: Use helper script and cleanup
2022-06-02 11:47:02 +02:00
Peng Tao
295a01f9b1 Merge pull request #4159 from egernst/topic/iptables
feature: add ability to interact with IPTables within the guest
2022-06-02 11:19:41 +08:00
Tim Zhang
b8e98b175c Merge pull request #4355 from liubin/fix/add-debug-info-for-parse-mount-error
agent: return mount file content if parse mountinfo failed
2022-06-02 10:31:46 +08:00
GabyCT
e8d0be364f Merge pull request #4375 from GabyCT/topic/updateqat
docs: Update Intel QAT documentation links
2022-06-01 15:52:02 -05:00
Chelsea Mafrica
25b1317ead Merge pull request #4357 from egernst/iptables-pkg
osbuilder: add iptables package
2022-06-01 09:28:38 -07:00
Snir Sheriber
b9fc24ff3a docs: update release process github token instructions
and fix the gpg generating key url

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-06-01 19:08:41 +03:00
Snir Sheriber
c1476a174b docs: update release process with latest workflow triggering
instructions

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-06-01 19:08:25 +03:00
James O. D. Hunt
002f2cd109 snap: Use helper script and cleanup
Move the common shell code to a helper script that is sourced by all
parts.

Add extra quoting to some variables in the snap config file
and simplify.

Fixes: #4304.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-06-01 16:09:29 +01:00
Gabriela Cervantes
2e04833fb9 docs: Update Intel QAT documentation links
This PR updates some Intel QAT documentation url links.

Fixes #4374

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-06-01 14:41:00 +00:00
Snir Sheriber
8b57bf97ab workflows: add workflow_dispatch triggering to test-kata-deploy
This will allow to trigger the test-kata-deploy workflow manually from
any branch instead of using always the one that is defined on main

See: https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/

Fixes: #4349
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-06-01 16:21:01 +03:00
Zvonko Kaiser
6d0ff901ab docs: Update vGPU use-case
Now that #4213 is merged we need updated documentation for vGPU time-sliced or vGPU MIG-backed.

Fixes: #4343

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2022-06-01 05:58:46 -07:00
James O. D. Hunt
9b108d9937 docs: Improve snap formatting
Improve the snap docs by using more consistent formatting and proper
shell code in the shell example.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-06-01 12:00:40 +01:00
James O. D. Hunt
894f661cc4 docs: Add warning to snap build
Since we must build with `--destructive-mode`, add a warning that the
host environment could change the behaviour of the build, depending on
the packages installed on the system.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-06-01 12:00:40 +01:00
James O. D. Hunt
d759f6c3e5 snap: Fix CH architecture check
Correct the `cloud-hypervisor` part architecture check to use `x86_64`, not
`x64_64`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-06-01 12:00:38 +01:00
Bin Liu
3e2817f7b5 Merge pull request #4325 from ManaSugi/runk/error-terminal
runk: Return error when tty is used without console socket
2022-06-01 13:58:38 +08:00
Bin Liu
a9a3074828 Merge pull request #4339 from ManaSugi/runk/add-podman-instruction
runk: Add Podman guide in README
2022-06-01 11:05:42 +08:00
Bin Liu
9f81c2dbf0 Merge pull request #4328 from ManaSugi/runk/output-stdout
agent: Pass standard I/O to container launched by runk
2022-06-01 11:00:26 +08:00
Manabu Sugimoto
5903815746 agent: Pass standard I/O to container launched by runk
The `kata-agent` passes its standard I/O file descriptors
through to the container process that will be launched
by `runk` without manipulation or modification in order to
allow the container process can handle its I/O operations.

Fixes: #4327

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-06-01 10:19:57 +09:00
Bin Liu
9658c6218e Merge pull request #4353 from ManaSugi/runk/enable-agent-unit-tests
agent, runk: Enable test for the agent built with standard-oci-runtime feature
2022-06-01 07:39:01 +08:00
Eric Ernst
d2df1209a5 docs: describe kata handling for core-scheduling
Add initial documentation for core-scheduling.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 16:17:00 -07:00
Michael Crosby
22b6a94a84 shim: add support for core scheduling
In linux 5.14 and hopefully some backports, core scheduling allows processes to
be co scheduled within the same domain on SMT enabled systems.

Containerd impl sets the core sched domain when launching a shim. This
allows a clean way for each shim(container/pod) to be in its own domain and any
additional containers, (v2 pods) be be launched with the same domain as well as
any exec'd process added to the container.

kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html

For Kata specifically, we will look for SCHED_CORE environment variable
to be set to indicate we shuold create a new schedule core domain.

This is equivalent to the containerd shim's PR: e48bbe8394

Fixes: #4309

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
Signed-off-by: Michael Crosby <michael@thepasture.io>
2022-05-31 10:10:40 -07:00
Eric Ernst
af2ef3f7a5 agent-ctl: introduce handle for iptables get/set
Add support for the updated agent API for iptables

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
65f0cef16c kata-runtime: add iptables CLI to test http endpoint
While end users can connect directly to the shim, let's provide a way to
easily get/set iptables from kata-runtime itself.

Fixes: #4080
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
3201ad0830 shim-client: ensure we check resp status for Put/Post
Without this, potential errors are silently dropped. Let's ensure we
return the error code as well as potenial data from the response.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
0706fb28ac kata-runtime: shmgmt: make url usage consistent
Before, we had a mix of slash, etc. Unfortunately, when cleaning URL
paths, serve mux seems to mangle the request method, resulting in each
request being a GET (instead of PUT or POST).

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
2a09378dd9 shim-client: add support for DoPut
While at it, make sure we check for nil in DoPost

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
640173cfc2 shim-mgmt: Add endpoint handler for interacting with iptables
Add two endpoints: ip6tables, iptables.

Each url handler supports GET and PUT operations. PUT expects
the requests' data to be []bytes, and to contain iptable information in
format to be consumed by iptables-restore.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
0136be22ca virtcontainers: plumb iptable set/get from sandbox to agent
Introduce get/set iptable handling. We add a sandbox API for getting and
setting the IPTables within the guest. This routes it from sandbox
interface, through kata-agent, ultimately making requests to the guest
agent.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
bd50d463b2 agent: iptables: get/set handling for iptables
Initial support for getting and setting iptables in the guest.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:27:58 -07:00
Eric Ernst
7c4049aabb osbuilder: add iptables package
Since we are introducing an agent API for interacting with guest
iptables, let's ensure that our example rootfs' have iptables-save/restore
installed.

Fixes: #4356

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 09:21:02 -07:00
Eric Ernst
03176a9e09 proto: update generated code based on proto update
Update the generated agent.pb.go code based on proto update.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 08:45:59 -07:00
Eric Ernst
38ebbc705b proto: update to add set/get iptables
Update the agent protocol definition to introduce support for setting
and getting iptables from the guest.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-05-31 08:45:59 -07:00
Bin Liu
78d45b434f agent: return mount file content if parse mountinfo failed
Include mount file content in error message when parsing
mountinfo failed for debug.

Fixes: #4246, #4103

Signed-off-by: Bin Liu <bin@hyper.sh>
2022-05-31 23:36:14 +08:00
Manabu Sugimoto
c7b3941c96 runk: Enable test for the agent built with standard-oci-runtime feature
This enables tests for the kata-agent for runk that is built
with standard-oci-runtime feature in CI.

Fixes: #4351

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-05-31 21:54:28 +09:00
Manabu Sugimoto
6dbce7c3de agent: Remove unused import in console test
Remove some unused imports in console test module
used by runk's test.

Fixes: #4351

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-05-31 21:54:02 +09:00
Xuewei Niu
6ecea84bc5 rustjail: get home dir using nix crate
Get user's home dir using `nix::unistd` crate instead of `utils` crate,
and remove useless code from agent.

Fixes: #4209

Signed-off-by: Xuewei Niu <justxuewei@apache.org>
2022-05-31 15:04:33 +08:00
Manabu Sugimoto
648b8d0aec runk: Return error when tty is used without console socket
runk always launches containers with detached mode,
so users have to use a console socket with run or
create operation when a terminal is used.
If users set `terminal` to `true` in `config.json` and
try to launch a container without specifying a console
socket, runk returns an error with a message early.

Fixes: #4324

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-05-31 09:55:39 +09:00
James O. D. Hunt
96c8df40b5 Merge pull request #4335 from ManaSugi/runk/fix-invalid-rootfs
runk: Handle rootfs path in config.json properly
2022-05-30 14:03:58 +01:00
Manabu Sugimoto
5205efd9b4 runk: Add Podman guide in README
runk can launch containers using Podman, so add the guide
in README.

Fixes: #4338

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-05-30 19:06:46 +09:00
James O. D. Hunt
d157f9b71e Merge pull request #3871 from amshinde/update-containerd-docs
Update containerd docs
2022-05-30 08:38:07 +01:00
Manabu Sugimoto
d862ca0590 runk: Handle rootfs path in config.json properly
This commit enables runk to handle `root.path` in `config.json`
properly even if the path is specified by a relative path that
includes the single (`.`) or the double (`..`) dots.
For example, with a bundle at `/to/bundle` and a rootfs directly
under `/to/bundle` such as `/to/bundle/{bin,dev,etc,home,...}`,
the `root.path` value can be either `/to/bundle` or just `.`.
This behavior conforms to OCI runtime spec.
Accordingly, a bundle path managed by runk's status file
(`status.json`) always is statically stored as a canonical path.
Previously, a bundle path has been got by `oci_state()` of rustjail's
API that returns the path as the parent directory path of a rootfs
(`root.path`). In case of the kata-agent, this works properly because
the kata containers assume that the rootfs path is always
`/to/bundle/rootfs`. However in case of standard OCI runtimes,
a rootfs can be placed anywhere under a bundle, so the rootfs path
doesn't always have to be at a `/to/bundle/rootfs`.

Fixes: #4334

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-05-30 14:41:26 +09:00
snir911
d50937435d Merge pull request #4318 from fidencio/topic/update-clh-to-v24.0
clh: Update to v24.0
2022-05-29 15:06:17 +03:00
James O. D. Hunt
56591804b3 docs: Improve snap build instructions
Make it clearer how to build the snap package manually.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-26 15:56:36 +01:00
James O. D. Hunt
cb2b30970d snap: Build using destructive mode
Destructive mode is required to build the Kata Containers snap. See:

```
.github/workflows/snap-release.yaml
.github/workflows/snap.yaml
```

Hence, update the last file that we forgot to update with
`--destructive-mode`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-26 15:56:36 +01:00
James O. D. Hunt
60823abb9c docs: Move snap README
Move the snap README to a subdirectory to resolve the warning given by
`snapcraft` (folded and reformatted slightly for clarity):

```
The 'snap' directory is meant specifically for snapcraft,
but it contains the following non-snapcraft-related paths,
which is unsupported and will cause unexpected behavior:

- README.md

If you must store these files within the 'snap' directory,
move them to 'snap/local', which is ignored by snapcraft.
```

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-26 15:56:36 +01:00
James O. D. Hunt
4134beee39 Merge pull request #4301 from jodh-intel/snap-package-rust-virtiofsd
snap: Build and package rust version of virtiofsd
2022-05-26 15:55:06 +01:00
Fabiano Fidêncio
fff832874e clh: Update to v24.0
This release has been tracked through the v24.0 project.

virtio-iommu specification describes how a device can be attached by default
to a bypass domain. This feature is particularly helpful for booting a VM with
guest software which doesn't support virtio-iommu but still need to access
the device. Now that Cloud Hypervisor supports this feature, it can boot a VM
with Rust Hypervisor Firmware or OVMF even if the virtio-block device exposing
the disk image is placed behind a virtual IOMMU.

Multiple checks have been added to the code to prevent devices with identical
identifiers from being created, and therefore avoid unexpected behaviors at boot
or whenever a device was hot plugged into the VM.

Sparse mmap support has been added to both VFIO and vfio-user devices. This
allows the device regions that are not fully mappable to be partially mapped.
And the more a device region can be mapped into the guest address space, the
fewer VM exits will be generated when this device is accessed. This directly
impacts the performance related to this device.

A new serial_number option has been added to --platform, allowing a user to
set a specific serial number for the platform. This number is exposed to the
guest through the SMBIOS.

* Fix loading RAW firmware (#4072)
* Reject compressed QCOW images (#4055)
* Reject virtio-mem resize if device is not activated (#4003)
* Fix potential mmap leaks from VFIO/vfio-user MMIO regions (#4069)
* Fix algorithm finding HOB memory resources (#3983)

* Refactor interrupt handling (#4083)
* Load kernel asynchronously (#4022)
* Only create ACPI memory manager DSDT when resizable (#4013)

Deprecated features will be removed in a subsequent release and users should
plan to use alternatives

* The mergeable option from the virtio-pmem support has been deprecated
(#3968)
* The dax option from the virtio-fs support has been deprecated (#3889)

Fixes: #4317

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-26 08:51:18 +00:00
James O. D. Hunt
49361749ed snap: Build and package rust version of virtiofsd
Update the snap config file to build the rust version of `virtiofsd` for
x86_64, but build QEMU's C version for other platforms.

Fixes: #4261.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-25 17:04:05 +01:00
James O. D. Hunt
27d903b76a snap: Put the yq binary in the staging bin directory
Rather than putting the `yq` binary in the staging directory itself,
put it in the `bin/` sub-directory.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-25 09:40:09 +01:00
James O. D. Hunt
d7b4ce049e snap: Remove unused variable
Remove the unused `kata_url` variable and use the value in the `website`
YAML metadata instead.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-25 09:40:09 +01:00
James O. D. Hunt
43de5440e5 snap: Fix unbound variable error
Don't assume `GITHUB_REF` is set.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-25 09:40:09 +01:00
James O. D. Hunt
c9b291509d snap: Fix whitespace
Remove trailing space.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-05-25 09:40:09 +01:00
Fupan Li
62d1ed0651 Merge pull request #4290 from Tim-Zhang/remove-oci-kata-agent
runk: merge oci-kata-agent into runk
2022-05-25 11:31:25 +08:00
Fabiano Fidêncio
8a2b82ff51 Merge pull request #4276 from jongwu/build_rust_virtiofsd
virtiofsd: static build virtiofsd from rust code for non-x86
2022-05-24 14:57:21 +02:00
Eric Ernst
6d00701ec9 Merge pull request #4298 from yibozhuang/fix-direct-volume
Fix issues with direct-volume stats feature
2022-05-23 15:23:51 -07:00
Tim Zhang
122a85e222 agent: remove bin oci-kata-agent
Fixes: #4291

Signed-off-by: Tim Zhang <tim@hyper.sh>
2022-05-23 16:55:16 +08:00
Tim Zhang
35619b45aa runk: merge oci-kata-agent into runk
Merge two bins into one.

Fixes: #4291

Signed-off-by: Tim Zhang <tim@hyper.sh>
2022-05-23 16:54:09 +08:00
Fabiano Fidêncio
b9315af092 Merge pull request #4294 from yibozhuang/direct-volume-stats
runtime: fix incorrect Action function for direct-volume stats
2022-05-23 10:22:29 +02:00
Jianyong Wu
10c13d719a qemu: remove virtiofsd option in qemu config
As virtiofsd will be built base on rust, "virtiofsd" option is no longer
needed in qemu.

Fixes: #4258
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2022-05-23 12:57:59 +08:00
Jianyong Wu
d20bc5a4d2 virtiofsd: build rust based virtiofsd from source for non-x86_64
Based on @fidencio's opoinon,
On Arm: static build virtiofsd using musl lib;
on ppc64 & s390: static build virtiofsd using gnu lib;

Fixes: #4258
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2022-05-23 12:57:59 +08:00
Archana Shinde
c95ba63c0c docs: Remove information related to Kata 1.x
Since Kata 2.x does not support runtime cli, remove information
related to it. Update the configuration snippet accordingly.

Fixes #3870

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2022-05-21 07:19:28 +05:30
Archana Shinde
34b80382b6 docs: Get rid of note related to networking.
One may want to use standalone containerd without k8s
and still have network enabled for the container.
Getting rid of note due to inaccuracy.

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2022-05-21 07:19:28 +05:30
Archana Shinde
dfad5728a7 docs: Mention --cni flag while invoking ctr
Specify that the `--cni` flag needs to be passed to the `ctr` tool
while starting a container in order to have networking enabled for the
container. This flag allows containerd to call into the configured
network plugin which in turn creates a network interface for the
container.

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2022-05-21 07:19:28 +05:30
Yibo Zhuang
8e7c5975c6 agent: fix direct-assigned volume stats
The current implementation of walking the
disks to match with the requested volume path
in agent doesn't work because the volume path
provided by the shim to the agent is the mount
path within the guest and not the device name.
The current logic is trying to match the
device name to the volume path which will never
match.

This change will simplify the
get_volume_capacity_stats and
get_volume_inode_stats to just call statfs and
get the bytes and inodes usage of the volume
path directly.

Fixes: #4297

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-05-20 18:43:27 -07:00
Yibo Zhuang
4428ceae16 runtime: direct-volume stats use correct name
Today the shim does a translation when doing
direct-volume stats where it takes the source and
returns the mount path within the guest.

The source for a direct-assigned volume is actually
the device path on the host and not the publish
volume path.

This change will perform a lookup of the mount info
during direct-volume stats to ensure that the
device path is provided to the shim for querying
the volume stats.

Fixes: #4297

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-05-20 18:42:47 -07:00
Yibo Zhuang
ffdc065b4c runtime: direct-volume stats update to use GET parameter
The go default http mux AFAIK doesn’t support pattern
routing so right now client is padding the url
for direct-volume stats with a subpath of the volume
path and this will always result in 404 not found returned
by the shim.

This change will update the shim to take the volume
path as a GET query parameter instead of a subpath.
If the parameter is missing or empty, then return
400 BadRequest to the client.

Fixes: #4297

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-05-20 18:41:51 -07:00
Yibo Zhuang
f295953183 runtime: fix incorrect Action function for direct-volume stats
The action function expects a function that returns error
but the current direct-volume stats Action returns
(string, error) which is invalid.

This change fixes the format and print out the stats from
the command instead.

Fixes: #4293

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-05-20 14:55:00 -07:00
Peng Tao
2c238c8504 Merge pull request #4213 from zvonkok/vfio
runtime: Adding the correct detection of mediated PCIe devices
2022-05-20 15:00:23 +08:00
Fabiano Fidêncio
811ac6a8ce Merge pull request #4282 from r4f4/runtime-dedup-types-import
runtime: remove duplicate 'types' import
2022-05-19 22:15:36 +02:00
Chelsea Mafrica
d8be0f8e9f Merge pull request #4281 from r4f4/runtime-qemu-comments
runtime: sync docstrings with function names
2022-05-19 09:17:38 -07:00
Rafael Fonseca
7a5ccd1264 runtime: sync docstrings with function names
The functions were renamed but their docstrings were not.

Fixes #4006

Signed-off-by: Rafael Fonseca <r4f4rfs@gmail.com>
2022-05-19 14:31:47 +02:00
Greg Kurz
fa61bd43ee Merge pull request #4238 from snir911/wip/legacy_console
qemu: allow using legacy serial device for the console
2022-05-19 14:30:59 +02:00
Rafael Fonseca
ce2e521a0f runtime: remove duplicate 'types' import
Fallout of 09f7962ff

Fixes #4285

Signed-off-by: Rafael Fonseca <r4f4rfs@gmail.com>
2022-05-19 13:49:47 +02:00
Snir Sheriber
834f93ce8a docs: fix annotations example
annotation value should always be quoted, regardless to its type

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-05-19 09:52:30 +03:00
GabyCT
d7aded7238 Merge pull request #4279 from GabyCT/topic/updateosbuilderreadme
docs: Remove clear containers reference in README
2022-05-18 14:26:56 -05:00
Snir Sheriber
f4994e486b runtime: allow annotation configuration to use_legacy_serial
and update the docs and test

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-05-18 18:58:21 +03:00
Gabriela Cervantes
24a2b0f6a2 docs: Remove clear containers reference in README
This PR removes the clear containers reference as this is not longer
being used and is deprecated at the rootfs builder README.

Fixes #4278

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-05-18 14:53:17 +00:00
Fabiano Fidêncio
c88a48be21 Merge pull request #4271 from r4f4/runtime-err-check-fix
runtime: do not check for EOF error in console watcher
2022-05-18 09:49:48 +02:00
GabyCT
9458cc0053 Merge pull request #4273 from GabyCT/topic/removenemuconf
kernel: Remove nemu.conf from packaging
2022-05-17 16:06:45 -05:00
Greg Kurz
42c64b3d2c Merge pull request #4269 from r4f4/remove-unused-param-get_kata_deps
tools: delete unused param from get_from_kata_deps callers
2022-05-17 18:54:47 +02:00
Gabriela Cervantes
abad33eba0 kernel: Remove nemu.conf from packaging
This PR removes the nemu.conf as we are not longer using NEMU from
the kernel configurations.

Fixes #4272

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-05-17 16:23:17 +00:00
Chelsea Mafrica
04bd8f16f0 Merge pull request #4252 from Champ-Goblem/patch/fix-is-signal-handled
agent: Fix is_signal_handled failing parsing str to u64
2022-05-17 08:31:48 -07:00
GabyCT
12f0ab120a Merge pull request #4191 from dgibson/go-test-script
Improve Go unit test script
2022-05-17 10:27:04 -05:00
Rafael Fonseca
e87eb13c4f tools: delete unused param from get_from_kata_deps callers
The param was deleted by a09e58fa80, so
update the callers not to use it.

Fixes #4245

Signed-off-by: Rafael Fonseca <r4f4rfs@gmail.com>
2022-05-17 15:18:41 +02:00
Rafael Fonseca
8052fe62fa runtime: do not check for EOF error in console watcher
The documentation of the bufio package explicitly says

"Err returns the first non-EOF error that was encountered by the
Scanner."

When io.EOF happens, `Err()` will return `nil` and `Scan()` will return
`false`.

Fixes #4079

Signed-off-by: Rafael Fonseca <r4f4rfs@gmail.com>
2022-05-17 15:14:33 +02:00
Fabiano Fidêncio
5d43718494 Merge pull request #4267 from cmaf/packaging-config-add-numa
packaging: Add kernel config option for SGX in Gramine
2022-05-17 13:10:24 +02:00
Snir Sheriber
c67b9d2975 qemu: allow using legacy serial device for the console
This allows to get guest early boot logs which are usually
missed when virtconsole is used.
- It utilizes previous work on the govmm side:
https://github.com/kata-containers/govmm/pull/203
- unit test added

Fixes: #4237
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-05-17 12:06:11 +03:00
Snir Sheriber
44814dce19 qemu: treat console kernel params within appendConsole
as it is tightly coupled with the appended console device
additionally have it tested

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-05-17 12:05:31 +03:00
Fupan Li
856c8e81f1 Merge pull request #4220 from liubin/fix/4219
ci: Don't run Docs URL Alive Check workflow on forks
2022-05-17 12:19:55 +08:00
Chelsea Mafrica
4f586d2a91 packaging: Add kernel config option for SGX in Gramine
For the Gramine Shielded Containers guest kernel, CONFIG_NUMA must be
enabled.

Fixes  #4266

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2022-05-16 16:58:26 -07:00
Champ-Goblem
4b437d91f0 agent: Fix is_signal_handled failing parsing str to u64
In the is_signal_handled function, when parsing the hex string returned
from `/proc/<pid>/status` the space/tab character after the colon
is not removed.

This patch trims the result of SigCgt so that
all whitespace characters are removed. It also extends the existing
test cases to check for this scenario.

Fixes: #4250
Signed-off-by: Champ-Goblem <cameron@northflank.com>
2022-05-16 20:34:26 +02:00
Fabiano Fidêncio
6ffdebd202 Merge pull request #4255 from cmaf/tools-patch-qemu-sgx-numa
tools: Add QEMU patches for SGX numa support
2022-05-16 18:10:41 +02:00
Chelsea Mafrica
ee9ee77388 Merge pull request #4264 from GabyCT/topic/updatecontainerdrunt
docs: Update runc containerd runtime
2022-05-16 08:56:26 -07:00
Gabriela Cervantes
88fb9b72e2 docs: Update runc containerd runtime
As we are using a containerd version > 1.4 we need to update
the runc containerd runtime.

Fixes #4263

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-05-16 14:33:48 +00:00
Fabiano Fidêncio
d1f2852d8b tools: Stop building virtiofsd with qemu (for x86_64)
As we finally can move to using the rust virtiofs daemon, let's stop
bulding and packaging the C version of the virtiofsd for x86_64.

Fixes: #4249
Depends-on: github.com/kata-containers/tests#4785

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-16 09:30:24 +02:00
Fabiano Fidêncio
c39852e83f runtime: Use ${LIBEXEC}/virtiofsd as the default virtiofsd path
As now we build and ship the rust version of virtiofsd, which is not
tied to QEMU, we need to update its default location to match with where
we're installing this binary.

Fixes: #4249

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-16 09:30:24 +02:00
Chelsea Mafrica
b4b9068cb7 tools: Add QEMU patches for SGX numa support
There are a few patches for SGX numa support in QEMU added after the
6.2.0 release. Add them for SGX support in Kata.

Fixes #4254

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2022-05-13 16:34:57 -07:00
Fabiano Fidêncio
b780be99d7 Merge pull request #4233 from fidencio/topic/virtiofsd-switch-to-the-rust-version
Build and distribute the rust version of virtiofsd
2022-05-13 19:38:01 +02:00
Fabiano Fidêncio
a475956abd workflows: Add support for building virtiofsd
As already done for the other assets we rely on, let's build (well, pull
in this very specific case) the virtiofsd binary, as we're relying on
its standlone rust version from now on.

Fixes: #4234

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-13 11:37:36 +02:00
Fabiano Fidêncio
71f59f3a7b local-build: Add support for building virtiofsd
As done for the other binaries we release, let's add support for
"building" (or pulling down) the static binary we ship as part of the
kata-containers static tarball (the same one used by kata-deploy).

Right now the virtiofsd is installed in /opt/kata/libexec/virtiofsd, a
different path than the virtiofsd that comes with QEMU.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-13 11:37:36 +02:00
Fabiano Fidêncio
c7ac55b6d7 dockerbuild: Install unzip
As virtiofsd comes in the `zip` format, let's install unzip in the
containers and then be able to access the virtiofsd binary.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-13 11:37:36 +02:00
Fabiano Fidêncio
8e2042d055 tools: add script to pull virtiofsd
Right now this is very much x86_64 specific, but I'd like to count on
the maintainers of the other architectures to expand it.

Also, the name as it's now may be misleading, as we're actually only
pulling the binary that's statically built using `musl` and released as
part of virtiofsd official releases.  But we'll need to build it for the
other architectures, thus I'm following the naming of the scripts used
by the other components.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-13 11:37:21 +02:00
Fabiano Fidêncio
dbedea5086 versions: Add virtiofsd entry
As we're switching to using the rust version of the virtiofsd, let's
give it its own entry in the versions.yaml file, as it's no longer part
of QEMU.

It's important to mention that GitLab doesn't provide a well formed URL
for the releases.  Instead, it adds there a hash, leading us to have to
add the specific link for the tarball.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-13 11:23:39 +02:00
David Gibson
e73b70baff runtime: Don't run unit tests verbose by default
go-test.sh by default adds the -v option to 'go test' meaning that output
will be printed from all the passing tests as well as any failing ones.
This results in a lot of output in which it's often difficult to locate the
failing tests you're interested in.

So, remove -v from the default flags.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:22:31 +10:00
David Gibson
f24a6e761f runtime: Consolidate flags setting in unit tests script
One of the responsibilities of the go-test.sh script is setting up the
default flags for 'go test'.  This is constructed across several different
places in the script using several unneeded intermediate variables though.

Consolidate all the flag construction into one place.

fixes #4190

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:22:29 +10:00
David Gibson
cf465feb02 runtime: Don't change test behaviour based on $CI or $KATA_DEV_MODE
go-test.sh changes behaviour based on both the $CI and $KATA_DEV_MODE
variables, but not in a way that makes a lot of sense.

If either one is set it uses the test_coverage path, instead of the
test_local path.  That collects coverage information, as the name
suggests, but it also means it runs the tests twice as root and
non-root, which is very non-obvious.

It's not clear what use case the test_local path is for at all.
Developer local builds will typically have $KATA_DEV_MODE set and CI
builds will have $CI set.  There's essentially no downside to running
coverage all the time - it has little impact on the test runtime.

In addition, if *both* $CI and $KATA_DEV_MODE are set, the script
refuses to run things as root, considering it "unsafe".  While having
both set might be unwise in a general sense, there's not really any
way running sudo can be any more unsafe than it is with either one
set.

So, simplify everything by just always running the test_coverage path.
This leaves the test_local path unused, so we can remove it entirely.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
David Gibson
34c4ac599c runtime: Remove redundant subcommands from go-test.sh
go-test.sh accepts subcommands, however invoking it in the usual way via
the Makefile doesn't use them.  In fact the only remaining subcommand is
"help" and we already have another way of getting the usage information
(-h or --help).  We don't need a second way, so just drop subcommand
handling.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
David Gibson
0aff5aaa39 runtime: Simplify package listing in go-test.sh
go-test.sh defaults to testing all the packages listed by go list, except
for a number filtered out.  It turns out that none of those filters are
necessary any more:
  * We've long required a Go newer than 1.9 which means the vendor filter
    isn't needed
  * The agent filter doesn't do anything now that we've moved to the Kata
    2.x unified repo
  * The tests filters don't hit anything on the list of modules in
    src/runtime (which is the only user of the script)

But since we don't need to filter anything out any more, we don't even need
to iterate through a list ourselves.  We can simply pass "./..." directly
to go test and it will iterate through all the sub-packages itself.

Interestingly this more than doubles the speed of "make test" for me - I
suspect because go test's internal paralellism works better over a larger
pool of tests.

This also lets us remove handling of non-existent coverage files from
test_go_package(), since with default options we will no longer test packages without tests
by default.  If the user explicitly requests testing of a package with no
tests, then failing makes sense.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
David Gibson
557c4cfd00 runtime: Don't chmod coverage files in Go tests
The go-test.sh script has an explicit chmod command, run as root, to
set the mode of the temporary coverage files to 0644.  AFAICT the
point of this is specifically the 004 bit allowing world read access,
so that we can then merge the temporary coverage file into the main
coverage file.

That's a convoluted way of doing things.  Instead we can just run the tail
command which reads the temporary file as the same user that generated it.

In addition, go-test.sh became root to remove that temporary coverage
file.  This is not necessary, since deleting a regular file just requires
write access to the directory, not the file itself.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
David Gibson
04c8b52e04 runtime: Remove HTML coverage option from go-test.sh
The html-coverage option to this script doesn't really alter behaviour
it just does the same thing as normal coverage, then converts the
report to HTML.  That conversion is a single command, plus a chmod to
make the final output mode 0644.  That overrides any umask the user
has set, which doesn't seem like a policy decision this script should
be making.

Nothing in the kata-containers or tests repository uses this, so it doesn't
really make sense to keep this logic inside this script.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
David Gibson
7f76914422 runtime: Add coverage.txt.tmp to gitignore
In addition to coverage.txt, the go-test.sh script creates
coverage.txt.tmp files while running.  These are temporary and
certainly shouldn't be committed, so add them to the gitignore file.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
David Gibson
13c2577004 runtime: Move go testing script locally
The go unit tests for the runtime are invoked by the helper script
ci/go-test.sh.  Which calls the run_go_test() function in ci/lib.sh.  Which
calls into .ci/go-test.sh from the tests repository.

But.. the runtime is the only user of this script, and generally stuff for
unit tests (rather than functional or integration tests) lives in the main
repository, not the tests repository.

So, just move the actual script into src/runtime.  A change to remove it
from the tests repo will follow.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-05-13 13:14:37 +10:00
Wainer Moschetta
97425a7fe6 Merge pull request #4240 from stevenhorsman/dev-guide-broken-link
doc: Update log parser link
2022-05-12 11:51:51 -03:00
stevenhorsman
4210646802 doc: Update log parser link
- Update log-parser link to reflect new location
- Also update the link to be relative

Fixes: #4239
Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2022-05-12 14:23:13 +01:00
snir911
51fa4ab671 Merge pull request #4165 from snir911/mv_parser
Move the kata-log-parser from the tests repo
2022-05-11 10:33:36 +03:00
Bo Chen
79fb4fc5cb Merge pull request #4223 from likebreath/0509/clh_v23.1
versions: Upgrade to Cloud Hypervisor v23.1
2022-05-10 10:40:22 -07:00
Snir Sheriber
271933fec0 log-parser: fix some of the documentation
minor fixes of links and text

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-05-10 13:23:25 +03:00
Snir Sheriber
c7dacb1211 log-parser: move the kata-log-parser from the tests repo
to the kata-containers repo under the src/tools/log-parser folder
and vendor the modules

Fixes: #4100
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2022-05-10 13:23:25 +03:00
GabyCT
61a167139c Merge pull request #4186 from liubin/fix/4185-skip-loop-by-user
agent: Add a macro to skip a loop easier
2022-05-09 16:58:29 -05:00
Bo Chen
82ea018281 versions: Upgrade to Cloud Hypervisor v23.1
The following issues have been addressed from the latest bug fix release
v23.1 of Cloud Hypervisor: 1) Add some missing seccomp rules; 2) Remove
virtio-fs filesystem entries from config on removal; 3) Do not delete
API socket on API server start; 4) Reject virtio-mem resize if the guest
doesn't activate the device; 5) Fix OpenAPI naming of I/O throttling
knobs;

Fixes: #4222

Signed-off-by: Bo Chen <chen.bo@intel.com>
2022-05-09 14:15:12 -07:00
Fupan Li
8aad2c59c5 Merge pull request #4184 from liubin/fix/4182-runk-kill-all
runk: use custom Kill command to support --all option
2022-05-09 17:56:10 +08:00
Zvonko Kaiser
2a1d394147 runtime: Adding the correct detection of mediated PCIe devices
Fixes #4212

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2022-05-09 00:57:06 -07:00
Bin Liu
7bc4ab68c3 ci: Don't run Docs URL Alive Check workflow on forks
This workflow is a scheduled job that runs at 23:00
every Sunday, it should only run the main repo
but not the forked ones.

Fixes: #4219

Signed-off-by: Bin Liu <bin@hyper.sh>
2022-05-09 11:54:25 +08:00
James O. D. Hunt
79d93f1fe7 Merge pull request #4137 from Shensd/sandbox-tests-online_resources
agent: add test coverage for functions find_process and online_resources
2022-05-06 09:20:57 +01:00
Chelsea Mafrica
e2f68c6093 Merge pull request #4187 from fidencio/test-hook-grpc-to-oci
rustjail: Add tests for hook_grpc_to_oci
2022-05-04 09:25:45 -07:00
Fabiano Fidêncio
d16097a805 Merge pull request #4203 from fidencio/2.5.0-alpha1-branch-bump
# Kata Containers 2.5.0-alpha1
2022-05-04 17:53:48 +02:00
Fabiano Fidêncio
9b863b0e01 release: Kata Containers 2.5.0-alpha1
- agent watchers: ensure uid/gid is preserved on copy/mkdir
- clh: Rely on Cloud Hypervisor for generating the device ID
- agent: add tests for create_logger_task function
- runk: set BinaryName for runk for containerd
- tools: Add a Rust-based standard OCI container runtime based on Kata agent
- rustjail: add tests for parse_mount_table
- Virtcontainers: Enable hot plugging vhost-user-blk device on ARM
- docs: repropose direct-assigned volume
- versions: change qemu tdx url and tag
- doc: Update for NVIDIA GPUs
- agent-ctl: Fix abstract socket connections
- Implement network and disk rate limiter for Cloud Hypervisor
- kata-deploy: Add support to RKE2
- docs: Update containerd link to installation guide
- docs: remove pc machine type supports
- Agent: Unit tests for random.rs
- rustjail: Add tests for mount_grpc_to_oci
- packaging: Fix broken path in `build-static-clh.sh`
- Fix Go unit tests to clean up /tmp after themselves
- rustjail: add tests for mount_from function
- rustjail: Add tests for hooks_grpc_to_oci
- agent: modify the type of swappiness to u64
- libs/safe-path: add crate to safely resolve fs paths
- agent: move assert_result macro to test_utils file
- rustjail: Add tests for root_grpc_to_oci
- agent: add tests for mount_to_rootfs function
- agent: add tests for update_container_namespaces
- agent: add tests for is_signal_handled function
- Upgrade to Cloud Hypervisor v23.0
- agent: best-effort removing mount point
- test: Fix golangci-lint error for s390x
- fsGroup support for direct-assigned volume
- kata-monitor: add the README file
- kata-monitor: update the hrefs in the debug/pprof index page
- runtime: Base64 encode the direct volume mountInfo path
- runtime: no need to write virtiofsd error to log
- kata-monitor: add some links when generating pages for browsers
- agent: Avoid agent panic when reading empty stats
- docs: Update link to contributions guide
- agent: add tests for mount_storage
- agent: add test coverage for parse_mount_flags_and_options function
- agent: add tests for do_write_stream function
- runtime: delete debug option in virtiofsd
- rustjail: add test coverage for process_grpc_to_oci function
- agent: Allow the agent to be rebuilt with the change of Cargo features
- protocols: add src/csi.rs to .gitignore
- kata-runtime enable hugepage support
- docs: Add a firecracker installation guide
- runtime: Allow and require no initrd for SE
- test: use `T.TempDir` to create temporary test directory
- clh: Expose service offload configuration

33a8b705 clh: Rely on Cloud Hypervisor for generating the device ID
70eda2fa agent: watchers: ensure uid/gid is preserved on copy/mkdir
7772f7dd runk: set BinaryName for runk for containerd
7ffe5a16 docs: Direct-assigned volume design
081f6de8 versions: change qemu tdx url and tag
666aee54 docs: Add VSOCK localhost example for agent-ctl
86d348e0 docs: Use VM term in agent-ctl doc
4b9b62bb agent-ctl: Fix abstract socket connections
b6467ddd clh: Expose disk rate limiter config
7580bb5a clh: Expose net rate limiter config
a88adaba clh: Cloud Hypervisor has a built-in Rate Limiter
63c4da03 clh: Implement the Disk RateLimiter logic
511f7f82 config: Add DiskRateLimiter* to Cloud Hypervisor
5b18575d hypervisor: Add disk bandwidth and operations rate limiters
1cf94692 clh: Implement the Network RateLimiter logic
00a5b1bd utils: Define DefaultRateLimiterRefillTimeMilliSecs
be1bb7e3 utils: Move FC's function to revert bytes to utils
c9f6496d config: Add NetRateLimiter* to Cloud Hypervisor
2d35e606 hypervisor: Add network bandwidth and operations rate limiters
b0e439cb rustjail: add tests for parse_mount_table
ccb01839 kata-deploy: Add support to RKE2
9d39362e kata-deploy: Reestructure the installing section
18d27f79 kata-deploy: Add a missing `$` prefix in the README
6948b4b3 docs: Update containerd link to installation guide
b221a259 tools: Add runk
2c218a07 agent: Modify Kata agent for runk
dd4bd7f4 doc: Added initial doc update for NV GPUs
832c33d5 docs: remove pc machine type supports
b658dccc tools: fix typo in clh directory name
afbd60da packaging: Fix clh build from source fall-back
4b9e78b8 rustjail: Add tests for mount_grpc_to_oci
81f6b486 agent: add tests for create_logger_task function
96bc3ec2 rustjail: Add tests for hooks_grpc_to_oci
02395027 agent: modify the type of swappiness to u64
1b931f42 runtime: Allock mockfs storage to be placed in any directory
ef6d54a7 runtime: Let MockFSInit create a mock fs driver at any path
5d8438e9 runtime: Move mockfs control global into mockfs.go
963d03ea runtime: Export StoragePathSuffix
1719a8b4 runtime: Don't abuse MockStorageRootPath() for factory tests
bec59f9e runtime: Make bind mount tests better clean up after themselves
f7ba21c8 runtime: Clean up mock hook logs in tests
90b2f5b7 runtime: Make SetupOCIConfigFile clean up after itself
2eeb5dc2 runtime: Don't use fixed /tmp/mountPoint path
0ad89ebd safe-path: add more unit test cases
b63774ec libs/safe-path: add crate to safely resolve fs paths
f385b21b rustjail: add tests for mount_from function
0e7f1a5e agent: move assert_result macro to test_utils file
2256bcb6 rustjail: Add tests for root_grpc_to_oci
7b2ff026 kata-monitor: add a README file
29e569aa virtcontainers: clh: Re-generate the client code
6012c197 versions: Upgrade to Cloud Hypervisor v23.0
aabcebbf agent: best-effort removing mount point
d136c9c2 test: Fix golangci-lint error for s390x
86977ff7 kata-monitor: update the hrefs in the debug/pprof index page
78f30c33 agent: Avoid agent panic when reading empty stats
6e79042a runtime: no need to write virtiofsd error to log
9b6f24b2 agent: add tests for mount_to_rootfs function
c3776b17 agent: add tests for is_signal_handled function
9c22d955 agent: add tests for update_container_namespaces
92c00c7e agent: fsGroup support for direct-assigned volume
6e9e4e8c docs: Update link to contributions guide
532d5397 runtime: fsGroup support for direct-assigned volume
6a47b82c proto: fsGroup support for direct-assigned volume
9d5e7ee0 agent: add tests for mount_storage
f8cc5d1a kata-monitor: add some links when generating pages for browsers
c31cd0e8 rustjail: add test coverage for process_grpc_to_oci function
1118a3d2 agent: add test coverage for parse_mount_flags_and_options function
9d5b03a1 runtime: delete debug option in virtiofsd
eff7c7e0 agent: Allow the agent to be rebuilt with the change of Cargo features
b975f2e8 Virtcontainers: Enable hot plugging vhost-user-blk device on ARM
962d05ec protocols: add src/csi.rs to .gitignore
354cd3b9 runtime: Base64 encode the direct volume mountInfo path
485aeabb agent: add tests for do_write_stream function
4405b188 docs: Add a firecracker installation guide
98750d79 clh: Expose service offload configuration
59c7165e test: use `T.TempDir` to create temporary test directory
ff17c756 runtime: Allow and require no initrd for SE
1cad3a46 agent/random: Ensure data.len > 0
33c953ac agent: Add test_ressed_rng_not_root
39a35b69 agent: Add test to random::reseed_rng()
d8f39fb2 agent/random: Rename RNDRESEEDRNG to RNDRESEEDCRNG
a2f5c176 runtime/virtcontainers: Pass the hugepages resources to agent

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-04 16:16:53 +02:00
Fabiano Fidêncio
bd5da4a7d9 Merge pull request #4189 from yibozhuang/watchable-mount-permission
agent watchers: ensure uid/gid is preserved on copy/mkdir
2022-05-04 12:29:24 +02:00
Fabiano Fidêncio
ec250c10e9 Merge pull request #4197 from fidencio/topic/workaround-race-condition-on-removing-and-adding-device-with-clh
clh: Rely on Cloud Hypervisor for generating the device ID
2022-05-04 11:50:14 +02:00
Fabiano Fidêncio
33a8b70558 clh: Rely on Cloud Hypervisor for generating the device ID
We're currently hitting a race condition on the Cloud Hypervisor's
driver code when quickly removing and adding a block device.

This happens because the device removal is an asynchronous operation,
and we currently do *not* monitor events coming from Cloud Hypervisor to
know when the device was actually removed.  Together with this, the
sandbox code doesn't know about that and when a new device is attached
it'll quickly assign what may be the very same ID to the new device,
leading to the Cloud Hypervisor's driver trying to hotplug a device with
the very same ID of the device that was not yet removed.

This is, in a nutshell, why the tests with Cloud Hypervisor and
devmapper have been failing every now and then.

The workaround taken to solve the issue is basically *not* passing down
the device ID to Cloud Hypervisor and simply letting Cloud Hypervisor
itself generate those, as Cloud Hypervisor does it in a manner that
avoids such conflicts.  With this addition we have then to keep a map of
the device ID and the Cloud Hypervisor's generated ID, so we can
properly remove the device.

This workaround will probably stay for a while, at least till someone
has enough cycles to implement a way to watch the device removal event
and then properly act on that.  Spoiler alert, this will be a complex
change that may not even be worth it considering the race can be avoided
with this commit.

Fixes: #4176

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-05-04 09:04:03 +02:00
Jack Hance
475e3bf38f agent: add test coverage for functions find_process and online_resources
Add test coverage for the functions find_process and online_resources in src/sandbox.rs.

Fixes #4085
Fixes #4136

Signed-off-by: Jack Hance <jack.hance@ndsu.edu>
2022-05-03 16:00:24 -05:00
Yibo Zhuang
70eda2fa6c agent: watchers: ensure uid/gid is preserved on copy/mkdir
Today in agent watchers, when we copy files/symlinks
or create directories, the ownership of the source path
is not preserved which can lead to permission issues.

In copy, ensure that we do a chown of the source path
uid/gid to the destination file/symlink after copy to
ensure that ownership matches the source ownership.
fs::copy() takes care of setting the permissions.

For directory creation, ensure that we set the
permissions of the created directory to the source
directory permissions and also perform a chown of the
source path uid/gid to ensure directory ownership
and permissions matches to the source.

Fixes: #4188

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-05-03 09:57:31 -07:00
Garrett Mahin
4a1e13bd8f rustjail: Add tests for hook_grpc_to_oci
Add test coverage for hook_grpc_to_oci in rustjail/src/lib.rs

Fixes: #4125

Signed-off-by: Garrett Mahin <garrett.mahin@gmail.com>
2022-05-02 23:59:33 +02:00
Bin Liu
383be2203a agent: Add a macro to skip a loop easier
Add a macro to skip a loop easier without using a
if {} else {} condition check.

Fixes: #4185

Signed-off-by: Bin Liu <bin@hyper.sh>
2022-04-30 20:45:41 +08:00
Bin Liu
c633780ba7 Merge pull request #4119 from bradenrayhorn/test-create-logger-task
agent: add tests for create_logger_task function
2022-04-30 19:48:07 +08:00
Bin Liu
97d7b1845b runk: use custom Kill command to support --all option
runk uses liboci-cli crate to parse command line options,
but liboci-cli does not support --all option for kill command,
though this is the runtime spec behavior.

But crictl will issue kill --all command when stopping containers,
as a workaround, we use a custom kill command instead of the one
provided by liboci-cli.

Fixes: #4182

Signed-off-by: Bin Liu <bin@hyper.sh>
2022-04-30 19:34:18 +08:00
Fabiano Fidêncio
1dd6f85a17 Merge pull request #4178 from liubin/4177
runk: set BinaryName for runk for containerd
2022-04-29 21:17:37 +02:00
Bin Liu
7772f7dd99 runk: set BinaryName for runk for containerd
The default runtime for io.containerd.runc.v2 is runc,
to use runk, the containerd configuration should set the
default runtime to runk or add BinaryName options for the
runtime.

Fixes: #4177

Signed-off-by: Bin Liu <bin@hyper.sh>
2022-04-29 22:26:32 +08:00
James O. D. Hunt
cc839772d3 Merge pull request #2785 from ManaSugi/standard-container-runtime
tools: Add a Rust-based standard OCI container runtime based on Kata agent
2022-04-29 13:20:59 +01:00
James O. D. Hunt
2d5f11501c Merge pull request #4083 from bradenrayhorn/test-parse-mount-table
rustjail: add tests for parse_mount_table
2022-04-29 11:34:22 +01:00
Jianyong Wu
982c32358a Merge pull request #4031 from Jaylyn-Ren/kata-spdk
Virtcontainers: Enable hot plugging vhost-user-blk device on ARM
2022-04-29 12:16:38 +08:00
Feng Wang
da11c21b4a Merge pull request #3248 from fengwang666/direct-blk-design
docs: repropose direct-assigned volume
2022-04-28 16:55:50 -07:00
Feng Wang
7ffe5a16f2 docs: Direct-assigned volume design
Detail design description on direct-assigned volume

Fixes: #1468

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2022-04-28 14:47:36 -07:00
Julio Montes
ea857bb1b8 Merge pull request #4172 from devimc/2022-04-28/fixQEMU
versions: change qemu tdx url and tag
2022-04-28 15:31:52 -05:00
Archana Shinde
9fdc88101f Merge pull request #3907 from zvonkok/nvidia
doc: Update for NVIDIA GPUs
2022-04-28 12:42:44 -07:00
Julio Montes
081f6de874 versions: change qemu tdx url and tag
https://github.com/intel/qemu-dcp is the new repo that supports
qemu with Intel TDX

fixes #4171

Signed-off-by: Julio Montes <julio.montes@intel.com>
2022-04-28 13:46:11 -05:00
Chelsea Mafrica
3f069c7acb Merge pull request #4166 from jodh-intel/agent-ctl-fix-abstract
agent-ctl: Fix abstract socket connections
2022-04-28 10:17:28 -07:00
James O. D. Hunt
666aee54d2 docs: Add VSOCK localhost example for agent-ctl
Update the `agent-ctl` docs to show how to use a VSOCK local address
when running the agent and the tool in the same environment. This is an
alternative to using a Unix socket.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-04-28 13:33:23 +01:00
James O. D. Hunt
86d348e065 docs: Use VM term in agent-ctl doc
Use the standard "VM" acronym to mean Virtual Machine.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-04-28 13:33:19 +01:00
James O. D. Hunt
4b9b62bb3e agent-ctl: Fix abstract socket connections
Unbreak the `agent-ctl` tool connecting to the agent with a Unix domain
socket.

It appears that [1] changed the behaviour of connecting to the agent
using a local Unix socket (which is not used by Kata under normal
operation).

The change can be seen by reverting to commit
72b8144b56 (the one before [1]) and
running the agent manually as:

```bash
$ sudo KATA_AGENT_SERVER_ADDR=unix:///tmp/foo.socket target/x86_64-unknown-linux-musl/release/kata-agent
```

Before [1], in another terminal we see this:

```bash
$ sudo lsof -U 2>/dev/null |grep foo|awk '{print $9}'
@/tmp/foo.socket@
```

But now, we see the following:

```bash
$ sudo lsof -U 2>/dev/null |grep foo|awk '{print $9}'
@/tmp/foo.socket
```

Note the last byte which represents a nul (`\0`) value.

The `agent-ctl` tool used to add that trailing nul but now it seems to not
be needed, so this change removes it, restoring functionality. No
external changes are necessary so the `agent-ctl` tool can connect to
the agent as below like this:

```bash
$ cargo run -- -l debug connect --server-address "unix://@/tmp/foo.socket" --bundle-dir "$bundle_dir" -c Check -c GetGuestDetails
```

[1] - https://github.com/kata-containers/kata-containers/issues/3124

Fixes: #4164.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2022-04-28 13:33:09 +01:00
Fabiano Fidêncio
c4dd029566 Merge pull request #4135 from fidencio/topic/clh-net-rate-limitting
Implement network and disk rate limiter for Cloud Hypervisor
2022-04-28 13:33:10 +02:00
Fabiano Fidêncio
9fb9c80fd3 Merge pull request #4161 from fidencio/topic/kata-deploy-plus-rke2
kata-deploy: Add support to RKE2
2022-04-28 11:35:11 +02:00
Fabiano Fidêncio
b6467ddd73 clh: Expose disk rate limiter config
With everything implemented, let's now expose the disk rate limiter
configuration options in the Cloud Hypervisor configuration file.

Fixes: #4139

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:28:29 +02:00
Fabiano Fidêncio
7580bb5a78 clh: Expose net rate limiter config
With everything implemented, let's now expose the net rate limiter
configuration options in the Cloud Hypervisor configuration file.

Fixes: #4017

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:28:13 +02:00
Fabiano Fidêncio
a88adabaae clh: Cloud Hypervisor has a built-in Rate Limiter
The notion of "built-in rate limiter" was added as part of
bd8658e362, and that commit considered
that only Firecracker had a built-in rate limiter, which I think was the
case when that was introduced (mid 2020).

Nowadays, however, Cloud Hypervisor takes advantage of the very same crate
used by Firecraker to do I/O throttling.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:27:56 +02:00
Fabiano Fidêncio
63c4da03a9 clh: Implement the Disk RateLimiter logic
Let's take advantage of the newly added DiskRateLimiter* options and
apply those to the network device configuration.

The logic here is identical to the one already present in the Network
part of Cloud Hypervisor's driver.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:27:53 +02:00
Fabiano Fidêncio
511f7f822d config: Add DiskRateLimiter* to Cloud Hypervisor
Let's add the newly added disk rate limiter configurations to the Cloud
Hypervisor's hypervisor configuration.

Right now those are not used anywhere, and there's absolutely no way the
users can set those up.  That's coming later in this very same series.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:27:15 +02:00
Fabiano Fidêncio
5b18575dfe hypervisor: Add disk bandwidth and operations rate limiters
This is the disk counterpart of the what was introduced for the network
as part of the previous commits in this series.

The newly added fields are:
* DiskRateLimiterBwMaxRate, defined in bits per second, which is used to
  control the network I/O bandwidth at the VM level.
* DiskRateLimiterBwOneTimeBurst, also defined in bits per second, which
  is used to define an *initial* max rate, which doesn't replenish.
* DiskRateLimiterOpsMaxRate, the operations per second equivalent of the
  DiskRateLimiterBwMaxRate.
* DiskRateLimiterOpsOneTimeBurst, the operations per second equivalent of
  the DiskRateLimiterBwOneTimeBurst.

For now those extra fields have only been added to the hypervisor's
configuration and they'll be used in the coming patches of this very
same series.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:27:11 +02:00
Fabiano Fidêncio
1cf9469297 clh: Implement the Network RateLimiter logic
Let's take advantage of the newly added NetRateLimiter* options and
apply those to the network device configuration.

The logic here is quite similar to the one already present in the
Firecracker's driver, with the main difference being the single Inbound
/ Outbound MaxRate and the presence of both Bandwidth and Operations
rate limiter.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:26:38 +02:00
Fabiano Fidêncio
00a5b1bda9 utils: Define DefaultRateLimiterRefillTimeMilliSecs
Firecracker's driver doesn't expose the RefillTime option of the rate
limiter to the user.  Instead, it uses a contant value of 1000
miliseconds (1 second).

As we're following Firecracker's driver implementation, let's expose
create a new constant, use it as part of the Firecracker's driver, and
later on re-use it as part of the Cloud Hypervisor's driver.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:22:42 +02:00
Fabiano Fidêncio
be1bb7e39f utils: Move FC's function to revert bytes to utils
Firecracker's revertBytes function, now called "RevertBytes", can be
exposed as part of the virtcontainers' utils file, as this function will
be reused by Cloud Hypervisor, when adding the rate limiter logic there.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:22:42 +02:00
Fabiano Fidêncio
c9f6496d6d config: Add NetRateLimiter* to Cloud Hypervisor
Let's add the newly added network rate limiter configurations to the
Cloud Hypervisor's hypervisor configuration.

Right now those are not used anywhere, and there's absolutely no way the
users can set those up.  That's coming later in this very same series.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:22:42 +02:00
Fabiano Fidêncio
2d35e6066d hypervisor: Add network bandwidth and operations rate limiters
In a similar way to what's already exposed as RxRateLimiterMaxRate and
TxRateLimiterMaxRate, let's add four new fields to the Hypervisor's
configuration.

The values added are related to bandwidth and operations rate limiters,
which have to be added so we can expose I/O throttling configurations to
users using Cloud Hypervisor as their preferred VMM.

The reason we cannot simply re-use {Rx,Tx}RateLimiterMaxRate is because
Cloud Hypervisor exposes a single MaxRate to be used for both inbound
and outbound queues.

The newly added fields are:
* NetRateLimiterBwMaxRate, defined in bits per second, which is used to
  control the network I/O bandwidth at the VM level.
* NetRateLimiterBwOneTimeBurst, also defined in bits per second, which
  is used to define an *initial* max rate, which doesn't replenish.
* NetRateLimiterOpsMaxRate, the operations per second equivalent of the
  NetRateLimiterBwMaxRate.
* NetRateLimiterOpsOneTimeBurst, the operations per second equivalent of
  the NetRateLimiterBwOneTimeBurst.

For now those extra fields have only been added to the hypervisor's
configuration and they'll be used in the coming patches of this very
same series.

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-28 10:22:42 +02:00
Braden Rayhorn
b0e439cb66 rustjail: add tests for parse_mount_table
Add tests for parse_mount_table function in rustjail/src/mount.rs.
Includes some minor refactoring improve the testability of the
function and improve its error values.

Fixes: #4082

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-27 20:06:01 -05:00
Chelsea Mafrica
ab067cf074 Merge pull request #4163 from GabyCT/topic/fixdoccontainerd
docs: Update containerd link to installation guide
2022-04-27 16:18:57 -07:00
Fabiano Fidêncio
ccb0183934 kata-deploy: Add support to RKE2
"RKE2 - Rancher's Next Generation Kuberentes Distribution" can easily be
supported by kata-deploy with some simple adjustments to what we've been
relying on for "k3s".

The main differences between k3s and RKE2 are, basically:
1. The location where the containerd configuration is stored
   - k3s: /var/lib/rancher/k3s/agent/etc/containerd/
   - rke2: /var/lib/rancher/rke2/agent/etc/containerd/
2. The name of the systemd services used:
   - k3s: k3s.service or k3s-agent.service
   - rke2: rke2-server.service or rke2-agent.service

Knowing this, let's add a new overlay for RKE2, adapt the kata-deploy
and the kata-cleanup scripts, and that's it.

Fixes: #4160

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-27 19:05:36 +02:00
Fabiano Fidêncio
9d39362e30 kata-deploy: Reestructure the installing section
Let's move the specific installation instructions, such as for k3s,
upper in the document.

This helps reading (and also skipping) according to what the user
is looking for.

Signed-off-by: Fabiano Fidêncio <fabiano@fidencio.org>
2022-04-27 19:05:36 +02:00
Fabiano Fidêncio
18d27f7949 kata-deploy: Add a missing $ prefix in the README
Commit short-log says it all.

Signed-off-by: Fabiano Fidêncio <fabiano@fidencio.org>
2022-04-27 19:05:36 +02:00
Gabriela Cervantes
6948b4b360 docs: Update containerd link to installation guide
This PR updates the containerd url link for the installation guide

Fixes #4162

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-04-27 16:52:53 +00:00
Manabu Sugimoto
b221a2590f tools: Add runk
Add a Rust-based standard OCI container runtime based on
Kata agent.

You can build and install runk as follows:

```sh
$ cd src/tools/runk
$ make
$ sudo make install
$ runk --help
```

Fixes: #2784

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-04-28 00:48:57 +09:00
Manabu Sugimoto
2c218a07b9 agent: Modify Kata agent for runk
Generate an oci-kata-agent which is a customized agent to be
called from runk which is a Rust-based standard OCI container
runtime based on Kata agent.

Fixes: #2784

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-04-28 00:48:57 +09:00
Zvonko Kaiser
dd4bd7f471 doc: Added initial doc update for NV GPUs
Fixed rpm vs deb references
Update to the shell portion

Fixes #3379

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2022-04-27 16:38:35 +02:00
James O. D. Hunt
d02db3a268 Merge pull request #4156 from Kvasscn/kata_dev_fix_docs_pc_machine
docs: remove pc machine type supports
2022-04-27 11:55:58 +01:00
James O. D. Hunt
0a6e7d443e Merge pull request #3910 from etrunko/agent_random
Agent: Unit tests for random.rs
2022-04-27 09:41:02 +01:00
James O. D. Hunt
7b20707197 Merge pull request #4107 from garrettmahin/test-mount-grpc-to-oci
rustjail: Add tests for mount_grpc_to_oci
2022-04-27 08:50:24 +01:00
Fabiano Fidêncio
411053e2bd Merge pull request #4152 from gkurz/fix-clh-build
packaging: Fix broken path in `build-static-clh.sh`
2022-04-27 08:59:43 +02:00
Jason Zhang
832c33d5b5 docs: remove pc machine type supports
Currently the 'pc' machine type is no longer supported in kata configuration,
so remove it in the design docs.

Fixes: #4155

Signed-off-by: Jason Zhang <zhanghj.lc@inspur.com>
2022-04-27 11:28:03 +08:00
Greg Kurz
b658dccc5f tools: fix typo in clh directory name
This allows to get released binaries again.

Fixes: #4151

Signed-off-by: Greg Kurz <groug@kaod.org>
2022-04-26 17:57:32 +02:00
Greg Kurz
afbd60da27 packaging: Fix clh build from source fall-back
If we fail to download the clh binary, we fall-back to build from source.
Unfortunately, `pull_clh_released_binary()` leaves a `cloud_hypervisor`
directory behind, which causes `build_clh_from_source()` not to clone
the git repo:

    [ -d "${repo_dir}" ] || git clone "${cloud_hypervisor_repo}"

When building from a kata-containers git repo, the subsequent calls
to `git` in this function thus apply to the kata-containers repo and
eventually fail, e.g.:

+ git checkout v23.0
error: pathspec 'v23.0' did not match any file(s) known to git

It doesn't quite make sense actually to keep an existing directory the
content of which is arbitrary when we want to it to contain a specific
version of clh. Just remove it instead.

Fixes: #4151

Signed-off-by: Greg Kurz <groug@kaod.org>
2022-04-26 17:57:32 +02:00
Peng Tao
5b6e45ed6c Merge pull request #4141 from dgibson/cleanup-tmp
Fix Go unit tests to clean up /tmp after themselves
2022-04-26 15:43:34 +08:00
Garrett Mahin
4b9e78b837 rustjail: Add tests for mount_grpc_to_oci
Add test coverage for mount_grpc_to_oci in rustjail/src/lib.rs

Fixes: #4106

Signed-off-by: Garrett Mahin <garrett.mahin@gmail.com>
2022-04-25 08:37:17 -05:00
James O. D. Hunt
bc919cc54c Merge pull request #4122 from bradenrayhorn/test-mount-from
rustjail: add tests for mount_from function
2022-04-25 11:55:21 +01:00
James O. D. Hunt
cb8dd0f4fc Merge pull request #4143 from garrettmahin/test-hooks-grpc-to-oci
rustjail: Add tests for hooks_grpc_to_oci
2022-04-25 10:50:52 +01:00
Braden Rayhorn
81f6b48626 agent: add tests for create_logger_task function
Add tests for create_logger_task function in src/main.rs.

Fixes: #4113

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-24 21:38:32 -05:00
Bin Liu
2629c9fc7b Merge pull request #4114 from yangfeiyu20102011/main
agent: modify the type of swappiness to u64
2022-04-24 13:35:18 +08:00
Garrett Mahin
96bc3ec2e9 rustjail: Add tests for hooks_grpc_to_oci
Add test coverage for hooks_grpc_to_oci in rustjail/src/lib.rs

Fixes: #4142

Signed-off-by: Garrett Mahin <garrett.mahin@gmail.com>
2022-04-22 19:20:04 -05:00
holyfei
0239502781 agent: modify the type of swappiness to u64
The type of MemorySwappiness in runtime is uint64, and the type of swappiness in agent is int64,
if we set max uint64 in runtime and pass it to agent, the value will be equal to -1. We should
modify the type of swappiness to u64

Fixes: #4123

Signed-off-by: holyfei <yangfeiyu20092010@163.com>
2022-04-22 16:55:37 +08:00
David Gibson
1b931f4203 runtime: Allock mockfs storage to be placed in any directory
Currently EnableMockTesting() takes no arguments and will always place the
mock storage in the fixed location /tmp/vc/mockfs.  This means that one
test run can interfere with the next one if anything isn't cleaned up
(and there are other bugs which means that happens).  If if those were
fixed this would allow developers testing on the same machine to interfere
with each other.

So, allow the mockfs to be placed at an arbitrary place given as a
parameter to EnableMockTesting().  In TestMain() we place it under our
existing temporary directory, so we don't need any additional cleanup just
for the mockfs.

fixes #4140

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:47:59 +10:00
David Gibson
ef6d54a781 runtime: Let MockFSInit create a mock fs driver at any path
Currently MockFSInit always creates the mockfs at the fixed path
/tmp/vc/mockfs.  This change allows it to be initialized at any path
given as a parameter.  This allows the tests in fs_test.go to be
simplified, because the by using a temporary directory from
t.TempDir(), which is automatically cleaned up, we don't need to
manually trigger initTestDir() (which is misnamed, it's actually a
cleanup function).

For now we still use the fixed path when auto-creating the mockfs in
MockAutoInit(), but we'll change that later.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:23:36 +10:00
David Gibson
5d8438e939 runtime: Move mockfs control global into mockfs.go
virtcontainers/persist/fs/mockfs.go defines a mock filesystem type for
testing.  A global variable in virtcontainers/persist/manager.go is used to
force use of the mock fs rather than a normal one.

This patch moves the global, and the EnableMockTesting() function which
sets it into mockfs.go.  This is slightly cleaner to begin with, and will
allow some further enhancements.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:23:36 +10:00
David Gibson
963d03ea8a runtime: Export StoragePathSuffix
storagePathSuffix defines the file path suffix - "vc" - used for
Kata's persistent storage information, as a private constant.  We
duplicate this information in fc.go which also needs it.

Export it from fs.go instead, so it can be used in fc.go.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:23:36 +10:00
David Gibson
1719a8b491 runtime: Don't abuse MockStorageRootPath() for factory tests
A number of unit tests under virtcontainers/factory use
MockStorageRootPath() as a general purpose temporary directory.  This
doesn't make sense: the mockfs driver isn't even in use here since we only
call EnableMockTesting for the pase virtcontainers package, not the
subpackages.

Instead use t.TempDir() which is for exactly this purpose.  As a bonus it
also handles the cleanup, so we don't need MockStorageDestroy any more.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:23:36 +10:00
David Gibson
bec59f9e39 runtime: Make bind mount tests better clean up after themselves
There are several tests in mount_test.go which perform a sample bind
mount.  These need a corresponding unmount to clean up afterwards or
attempting to delete the temporary files will fail due to the existing
mountpoint.  Most of them had such an unmount, but
TestBindMountInvalidPgtypes was missing one.

In addition, the existing unmounts where done inconsistently - one was
simply inline (so wouldn't be executed if the test fails too early) and one
is a defer.  Change them all to use the t.Cleanup mechanism.

For the dummy mountpoint files, rather than cleaning them up after the
test, the tests were removing them at the beginning of the test.  That
stops the test being messed up by a previous run, but messily.  Since
these are created in a private temporary directory anyway, if there's
something already there, that indicates a problem we shouldn't ignore.
In fact we don't need to explicitly remove these at all - they'll be
removed along with the rest of the private temporary directory.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:20:35 +10:00
David Gibson
f7ba21c86f runtime: Clean up mock hook logs in tests
The tests in hook_test.go run a mock hook binary, which does some debug
logging to /tmp/mock_hook.log.  Currently we don't clean up those logs
when the tests are done.  Use a test cleanup function to do this.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:14:52 +10:00
David Gibson
90b2f5b776 runtime: Make SetupOCIConfigFile clean up after itself
SetupOCIConfigFile creates a temporary directory with os.MkDirTemp().  This
means the callers need to register a deferred function to remove it again.
At least one of them was commented out meaning that a /temp/katatest-
directory was leftover after the unit tests ran.

Change to using t.TempDir() which as well as better matching other parts of
the tests means the testing framework will handle cleaning it up.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:14:52 +10:00
David Gibson
2eeb5dc223 runtime: Don't use fixed /tmp/mountPoint path
Several tests in kata_agent_test.go create /tmp/mountPoint as a dummy
directory to mount.  This is not cleaned up after the test.  Although it
is in /tmp, that's still a little messy and can be confusing to a user.
In addition, because it uses the same name every time, it allows for one
run of the test to interfere with the next.

Use the built in t.TempDir() to use an automatically named and deleted
temporary directory instead.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2022-04-22 14:14:52 +10:00
Jiang Liu
83979ece18 Merge pull request #3462 from jiangliu/safe-path
libs/safe-path: add crate to safely resolve fs paths
2022-04-21 11:17:49 +08:00
Liu Jiang
0ad89ebd7c safe-path: add more unit test cases
Add more unit test cases to improve code coverage.

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2022-04-21 10:01:23 +08:00
Liu Jiang
b63774ec61 libs/safe-path: add crate to safely resolve fs paths
There are always path(symlink) based attacks, so the `safe-path` crate
tries to provde some mechanisms to harden path resolution related code.

Fixes: #3451

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2022-04-21 10:01:21 +08:00
Braden Rayhorn
f385b21b05 rustjail: add tests for mount_from function
Add tests for the mount_from function in rustjail mount.rs file.

Fixes: #4121

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-20 20:04:57 -05:00
Fabiano Fidêncio
baa67d8cc5 Merge pull request #4104 from bradenrayhorn/share-assert-result
agent: move assert_result macro to test_utils file
2022-04-20 17:51:12 +02:00
Braden Rayhorn
0e7f1a5e3a agent: move assert_result macro to test_utils file
Move the assert_result macro to the shared test_utils file
so that it is not duplicated in individual files.

Fixes: #4093

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-19 18:57:16 -05:00
Fabiano Fidêncio
604a795073 Merge pull request #4096 from garrettmahin/test-root-grpc-to-oci
rustjail: Add tests for root_grpc_to_oci
2022-04-19 21:38:58 +02:00
Fabiano Fidêncio
f619c65b6a Merge pull request #4074 from bradenrayhorn/test-mount-to-rootfs
agent: add tests for mount_to_rootfs function
2022-04-19 21:36:11 +02:00
Fabiano Fidêncio
7ec42951f2 Merge pull request #4035 from bradenrayhorn/test-update-container-namespaces
agent: add tests for update_container_namespaces
2022-04-19 21:36:02 +02:00
Fabiano Fidêncio
e6bc912439 Merge pull request #3940 from bradenrayhorn/test-is-signal-handled
agent: add tests for is_signal_handled function
2022-04-19 21:35:48 +02:00
Archana Shinde
33e244f284 Merge pull request #4102 from likebreath/0414/clh_v23.0
Upgrade to Cloud Hypervisor v23.0
2022-04-19 06:01:04 -07:00
Fabiano Fidêncio
dbb0c67523 Merge pull request #4072 from fengwang666/dv-bug
agent: best-effort removing mount point
2022-04-19 10:08:40 +02:00
Chelsea Mafrica
0af13b469d Merge pull request #4086 from BbolroC/s390x-fix
test: Fix golangci-lint error for s390x
2022-04-15 21:07:09 -07:00
Bin Liu
b19bfac7cd Merge pull request #4042 from yibozhuang/direct-assign-fsgroup
fsGroup support for direct-assigned volume
2022-04-16 10:23:15 +08:00
Bin Liu
4ec1967542 Merge pull request #4094 from fgiudici/kata-monitor_readme
kata-monitor: add the README file
2022-04-16 08:27:22 +08:00
Bin Liu
362201605e Merge pull request #4055 from fgiudici/kata-monitor_pprof
kata-monitor: update the hrefs in the debug/pprof index page
2022-04-16 08:12:18 +08:00
Garrett Mahin
2256bcb6ab rustjail: Add tests for root_grpc_to_oci
Add test coverage for root_grpc_to_oci in rustjail/src/lib.rs

Fixes: #4095

Signed-off-by: Garrett Mahin <garrett.mahin@gmail.com>
2022-04-15 11:09:18 -05:00
Francesco Giudici
7b2ff02647 kata-monitor: add a README file
Fixes: #3704

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2022-04-15 18:03:23 +02:00
Bo Chen
29e569aa92 virtcontainers: clh: Re-generate the client code
This patch re-generates the client code for Cloud Hypervisor v23.0.
Note: The client code of cloud-hypervisor's (CLH) OpenAPI is
automatically generated by openapi-generator [1-2].

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Signed-off-by: Bo Chen <chen.bo@intel.com>
2022-04-14 12:56:01 -07:00
Bo Chen
6012c19707 versions: Upgrade to Cloud Hypervisor v23.0
Highlights from the Cloud Hypervisor release v23.0: 1) vDPA Support; 2)
Updated OS Support list (Jammy 22.04 added with EOLed versions removed);
3) AArch64 Memory Map Improvements; 4) AMX Support; 5) Bug Fixes;

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v23.0

Fixes: #4101

Signed-off-by: Bo Chen <chen.bo@intel.com>
2022-04-14 12:52:35 -07:00
Feng Wang
aabcebbf58 agent: best-effort removing mount point
During container exit, the agent tries to remove all the mount point directories,
which can fail if it's a readonly filesytem (e.g. device mapper). This commit ignores
the removal failure and logs a warning message.

Fixes: #4043

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2022-04-13 22:40:23 -07:00
Chelsea Mafrica
32f92e75cc Merge pull request #4021 from fengwang666/direct-volume-bug
runtime: Base64 encode the direct volume mountInfo path
2022-04-13 13:15:38 -07:00
Greg Kurz
4443bb68a4 Merge pull request #4064 from tiezhuoyu/4063/no-need-to-write-error-of-virtiofsd-to-kata-log
runtime: no need to write virtiofsd error to log
2022-04-13 11:59:19 +02:00
Hyounggyu Choi
d136c9c240 test: Fix golangci-lint error for s390x
This is to fix a test failure for the
kata-containers-2.0-ubuntu-20.04-s390x-main-baseline jenkins job

Fixes: #4088

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
2022-04-13 09:20:51 +02:00
Fupan Li
66aa07649b Merge pull request #4062 from liubin/fix/4061-add-links-for-kata-monitor
kata-monitor: add some links when generating pages for browsers
2022-04-13 11:30:21 +08:00
Peng Tao
8d8c0388fa Merge pull request #4078 from fidencio/wip/agent-avoid-panic-when-getting-empty-stats
agent: Avoid agent panic when reading empty stats
2022-04-12 23:07:17 +08:00
Francesco Giudici
86977ff780 kata-monitor: update the hrefs in the debug/pprof index page
kata-monitor allows to get data profiles from the kata shim
instances running on the same node by acting as a proxy
(e.g., http://$NODE_ADDRESS:8090/debug/pprof/?sandbox=$MYSANDBOXID).
In order to proxy the requests and the responses to the right shim,
kata-monitor requires to pass the sandbox id via a query string in the
url.

The profiling index page proxied by kata-monitor contains the link to all
the data profiles available. All the links anyway do not contain the
sandbox id included in the request: the links result then broken when
accessed through kata-monitor.
This happens because the profiling index page comes from the kata shim,
which will not include the query string provided in the http request.

Let's add on-the-fly the sandbox id in each href tag returned by the kata
shim index page before providing the proxied page.

Fixes: #4054

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2022-04-12 15:53:59 +02:00
Fabiano Fidêncio
78f30c33c6 agent: Avoid agent panic when reading empty stats
This was seen in an issue report, where we'd try to unwrap a None value,
leading to a panic.

Fixes: #4077
Related: #4043

Full backtrace:
```
"thread 'tokio-runtime-worker' panicked at 'called `Option::unwrap()` on a `None` value', rustjail/src/cgroups/fs/mod.rs:593:31"
"stack backtrace:"
"   0:     0x7f0390edcc3a - std::backtrace_rs::backtrace::libunwind::trace::hd5eff4de16dbdd15"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5"
"   1:     0x7f0390edcc3a - std::backtrace_rs::backtrace::trace_unsynchronized::h04a775b4c6ab90d6"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5"
"   2:     0x7f0390edcc3a - std::sys_common::backtrace::_print_fmt::h3253c3db9f17d826"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/sys_common/backtrace.rs:67:5"
"   3:     0x7f0390edcc3a - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h02bfc712fc868664"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/sys_common/backtrace.rs:46:22"
"   4:     0x7f0390a91fbc - core::fmt::write::hfd5090d1132106d8"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/core/src/fmt/mod.rs:1149:17"
"   5:     0x7f0390edb804 - std::io::Write::write_fmt::h34acb699c6d6f5a9"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/io/mod.rs:1697:15"
"   6:     0x7f0390edbee0 - std::sys_common::backtrace::_print::hfca761479e3d91ed"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/sys_common/backtrace.rs:49:5"
"   7:     0x7f0390edbee0 - std::sys_common::backtrace::print::hf666af0b87d2b5ba"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/sys_common/backtrace.rs:36:9"
"   8:     0x7f0390edbee0 - std::panicking::default_hook::{{closure}}::hb4617bd1d4a09097"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/panicking.rs:211:50"
"   9:     0x7f0390edb2da - std::panicking::default_hook::h84f684d9eff1eede"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/panicking.rs:228:9"
"  10:     0x7f0390edb2da - std::panicking::rust_panic_with_hook::h8e784f5c39f46346"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/panicking.rs:606:17"
"  11:     0x7f0390f0c416 - std::panicking::begin_panic_handler::{{closure}}::hef496869aa926670"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/panicking.rs:500:13"
"  12:     0x7f0390f0c3b6 - std::sys_common::backtrace::__rust_end_short_backtrace::h8e9b039b8ed3e70f"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/sys_common/backtrace.rs:139:18"
"  13:     0x7f0390f0c372 - rust_begin_unwind"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/panicking.rs:498:5"
"  14:     0x7f03909062c0 - core::panicking::panic_fmt::h568976b83a33ae59"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/core/src/panicking.rs:107:14"
"  15:     0x7f039090641c - core::panicking::panic::he2e71cfa6548cc2c"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/core/src/panicking.rs:48:5"
"  16:     0x7f0390eb443f - <rustjail::cgroups::fs::Manager as rustjail::cgroups::Manager>::get_stats::h85031fc1c59c53d9"
"  17:     0x7f03909c0138 - <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll::hfa6e6cd7516f8d11"
"  18:     0x7f0390d697e5 - <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll::hffbaa534cfa97d44"
"  19:     0x7f039099c0b3 - <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll::hae3ab083a06d0b4b"
"  20:     0x7f0390af9e1e - std::panic::catch_unwind::h1fdd25c8ebba32e1"
"  21:     0x7f0390b7c4e6 - tokio::runtime::task::raw::poll::hd3ebbd0717dac808"
"  22:     0x7f0390f49f3f - tokio::runtime::thread_pool::worker::Context::run_task::hfdd63cd1e0b17abf"
"  23:     0x7f0390f3a599 - tokio::runtime::task::raw::poll::h62954f6369b1d210"
"  24:     0x7f0390f37863 - std::sys_common::backtrace::__rust_begin_short_backtrace::h1c58f232c078bfe9"
"  25:     0x7f0390f4f3dd - core::ops::function::FnOnce::call_once{{vtable.shim}}::h2d329a84c0feed57"
"  26:     0x7f0390f0e535 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h137e5243c6233a3b"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/alloc/src/boxed.rs:1694:9"
"  27:     0x7f0390f0e535 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h7331c46863d912b7"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/alloc/src/boxed.rs:1694:9"
"  28:     0x7f0390f0e535 - std::sys::unix::thread::Thread::new::thread_start::h1fb20b966cb927ab"
"                               at /rustc/db9d1b20bba1968c1ec1fc49616d4742c1725b4b/library/std/src/sys/unix/thread.rs:106:17"
```

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-12 11:19:08 +02:00
Zhuoyu Tie
6e79042aa0 runtime: no need to write virtiofsd error to log
The scanner reads nothing from viriofsd stderr pipe, because param
'--syslog' rediercts stderr to syslog. So there is no need to write
scanner.Text() to kata log

Fixes: #4063

Signed-off-by: Zhuoyu Tie <tiezhuoyu@outlook.com>
2022-04-12 15:59:57 +08:00
Braden Rayhorn
9b6f24b2ee agent: add tests for mount_to_rootfs function
Add test coverage for mount_to_rootfs function in src/mount.rs.
Includes minor refactoring to make function more easily testable.

Fixes #4073

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-11 21:42:38 -05:00
Braden Rayhorn
c3776b1792 agent: add tests for is_signal_handled function
Add test coverage for is_signal_handled function in rpc.rs. Includes
refactors to make the function testable and handle additional cases.

Fixes #3939

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-11 21:23:55 -05:00
Braden Rayhorn
9c22d9554e agent: add tests for update_container_namespaces
Add test coverage for update_container_namespaces function
in src/rpc.rs. Includes minor refactor to make function easier
to test.

Fixes #4034

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-11 18:27:30 -05:00
Fabiano Fidêncio
c108bb7a2a Merge pull request #4071 from GabyCT/topic/updatelimidoc
docs: Update link to contributions guide
2022-04-11 18:37:31 +02:00
Chelsea Mafrica
bf98c99f14 Merge pull request #4069 from bradenrayhorn/test-mount-storage
agent: add tests for mount_storage
2022-04-11 09:14:05 -07:00
Yibo Zhuang
92c00c7e84 agent: fsGroup support for direct-assigned volume
Adding two functions set_ownership and
recursive_ownership_change to support changing group id
ownership for a mounted volume.

The set_ownership will be called in common_storage_handler
after mount_storage performs the mount for the volume.
set_ownership will be a noop if the FSGroup field in the
Storage struct is not set which indicates no chown will be
performed. If FSGroup field is specified, then it will
perform the recursive walk of the mounted volume path to
change ownership of all files and directories to the
desired group id. It will also configure the SetGid bit
so that files created the directory will have group
following parent directory group.

If the fsGroupChangePolicy is on root mismatch,
then the group ownership will be skipped if the root
directory group id alreasy matches the desired group
id and if the SetGid bit is also set on the root directory.

This is the same behavior as what
Kubelet does today when performing the recursive walk
to change ownership.

Fixes #4018

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-04-11 08:57:13 -07:00
Gabriela Cervantes
6e9e4e8ce5 docs: Update link to contributions guide
This PR updates the url link to the contributions guide
at the Limitations document.

Fixes #4070

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2022-04-11 15:49:57 +00:00
Yibo Zhuang
532d53977e runtime: fsGroup support for direct-assigned volume
The fsGroup will be specified by the fsGroup key in
the direct-assign mountinfo metadate field.
This will be set when invoking the kata-runtime
binary and providing the key, value pair in the metadata
field. Similarly, the fsGroupChangePolicy will also
be provided in the mountinfo metadate field.

Adding an extra fields FsGroup and FSGroupChangePolicy
in the Mount construct for container mount which will
be populated when creating block devices by parsing
out the mountInfo.json.

And in handleDeviceBlockVolume of the kata-agent client,
it checks if the mount FSGroup is not nil, which
indicates that fsGroup change is required in the guest,
and will provide the FSGroup field in the protobuf to
pass the value to the agent.

Fixes #4018

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-04-11 08:41:13 -07:00
Yibo Zhuang
6a47b82c81 proto: fsGroup support for direct-assigned volume
This change adds two fields to the Storage pb

FSGroup which is a group id that the runtime
specifies to indicate to the agent to perform a
chown of the mounted volume to the specified
group id after mounting is complete in the guest.

FSGroupChangePolicy which is a policy to indicate
whether to always perform the group id ownership
change or only if the root directory group id
does not match with the desired group id.

These two fields will allow CSI plugins to indicate
to Kata that after the block device is mounted in
the guest, group id ownership change should be performed
on that volume.

Fixes #4018

Signed-off-by: Yibo Zhuang <yibzhuang@gmail.com>
2022-04-11 08:41:13 -07:00
Braden Rayhorn
9d5e7ee0d4 agent: add tests for mount_storage
Add test coverage for mount_storage function in src/mount.rs.

Fixes: #4068

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-10 21:42:20 -05:00
bin
f8cc5d1ad8 kata-monitor: add some links when generating pages for browsers
Add some links to rendered webpages for better user experience,
let users can jump to pages only by clicking links in browsers.

Fixes: #4061

Signed-off-by: bin <bin@hyper.sh>
2022-04-11 09:29:56 +08:00
Fabiano Fidêncio
698e45f403 Merge pull request #4057 from bradenrayhorn/test-parse-mount-flags-and-options
agent: add test coverage for parse_mount_flags_and_options function
2022-04-08 14:42:18 +02:00
Fabiano Fidêncio
761e8313de Merge pull request #3985 from bradenrayhorn/test-do-write-stream
agent: add tests for do_write_stream function
2022-04-08 14:34:57 +02:00
Peng Tao
4f551e3428 Merge pull request #4048 from liubin/fix/3303-delete-virtiofsd-debug-option
runtime: delete debug option in virtiofsd
2022-04-08 15:42:38 +08:00
Peng Tao
a83a16e32c Merge pull request #4059 from garrettmahin/test-process-grpc-to-oci
rustjail: add test coverage for process_grpc_to_oci function
2022-04-08 15:39:28 +08:00
Peng Tao
95e45fab38 Merge pull request #4053 from ManaSugi/fix-makefile-for-features
agent: Allow the agent to be rebuilt with the change of Cargo features
2022-04-08 15:38:25 +08:00
garrettmahin
c31cd0e81a rustjail: add test coverage for process_grpc_to_oci function
Add test coverage for the process_grpc_to_oci function in src/rustjail/lib.rs

Fixes #4058

Signed-off-by: Garrett Mahin <garrett.mahin@gmail.com>
2022-04-07 20:50:48 -05:00
Bin Liu
9c1c219a3f Merge pull request #4007 from liubin/fix/3959-add-csi-rs-to-gitignore
protocols: add src/csi.rs to .gitignore
2022-04-08 09:33:04 +08:00
Braden Rayhorn
1118a3d2da agent: add test coverage for parse_mount_flags_and_options function
Add test coverage for the parse_mount_flags_and_options function
in src/mount.rs.

Fixes #4056

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-07 17:46:35 -05:00
bin
9d5b03a1b7 runtime: delete debug option in virtiofsd
virtiofsd's debug will be enabled if hypervisor's debug has been
enabled, this will generate too many noisy logs from virtiofsd.

Unbind the relationship of log level between virtiofsd and
hypervisor, if users want to see debug log of virtiofsd,
can set it by:

  virtio_fs_extra_args = ["-o", "log_level=debug"]

Fixes: #3303

Signed-off-by: bin <bin@hyper.sh>
2022-04-07 19:55:22 +08:00
Manabu Sugimoto
eff7c7e0ff agent: Allow the agent to be rebuilt with the change of Cargo features
This allows the kata-agent to be rebuilt when Cargo "features" is
changed. The Makefile for the agent do not need to specify the
sources for prerequisites by having Cargo check for the sources
changes.

Fixes: #4052

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2022-04-07 20:09:20 +09:00
Greg Kurz
d0d3787233 Merge pull request #3696 from shippomx/main
kata-runtime enable hugepage support
2022-04-06 16:47:04 +02:00
Fabiano Fidêncio
465d3a5506 Merge pull request #4012 from nubificus/how-to-fc-guide
docs: Add a firecracker installation guide
2022-04-06 12:59:55 +02:00
Jaylyn Ren
b975f2e8d2 Virtcontainers: Enable hot plugging vhost-user-blk device on ARM
The vhost-user-blk can be hotplugged on the PCI bridge successfully on
X86, but failed on Arm. However, hotplugging it on Root Port as a PCIe
device can work well on ARM.
Open the "pcie_root_port" in configuration.toml is needed.

Fixes: #4019

Signed-off-by: Jaylyn Ren <jaylyn.ren@arm.com>
2022-04-06 17:37:51 +08:00
bin
962d05ec86 protocols: add src/csi.rs to .gitignore
After running make in src/agent, the git working area will be changed:

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	src/libs/protocols/src/csi.rs

The generated file by `build.rs` should be ignored in git.

Fixes: #3959

Signed-off-by: bin <bin@hyper.sh>
2022-04-06 09:55:38 +08:00
Fabiano Fidêncio
b39caf43f1 Merge pull request #3923 from Jakob-Naucke/no-initrd-se
runtime: Allow and require no initrd for SE
2022-04-05 09:26:07 +02:00
Feng Wang
354cd3b9b6 runtime: Base64 encode the direct volume mountInfo path
This is to avoid accidentally deleting multiple volumes.

Fixes #4020

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2022-04-04 19:56:46 -07:00
Braden Rayhorn
485aeabb6b agent: add tests for do_write_stream function
Add test coverage for do_write_stream function of AgentService
in src/rpc.rs. Includes minor refactoring to make function more
easily testable.

Fixes #3984

Signed-off-by: Braden Rayhorn <bradenrayhorn@fastmail.com>
2022-04-04 08:21:01 -05:00
George Ntoutsos
4405b188e8 docs: Add a firecracker installation guide
Add info on setting up kata with firecracker.

Fixes: #3555

Signed-off-by: George Ntoutsos <gntouts@nubificus.co.uk>
Signed-off-by: Anastassios Nanos <ananos@nubificus.co.uk>
2022-04-04 14:59:41 +03:00
Archana Shinde
e62bc8e7f3 Merge pull request #3915 from Juneezee/test/t.TempDir
test: use `T.TempDir` to create temporary test directory
2022-04-04 01:34:46 -07:00
Fabiano Fidêncio
8980d04e25 Merge pull request #4023 from fidencio/wip/expose-service-offload-option-to-clh
clh: Expose service offload configuration
2022-04-01 14:10:33 +02:00
Fabiano Fidêncio
98750d792b clh: Expose service offload configuration
This configuration option is valid for all the hypervisor that are going
to be used with the confidential containers effort, thus exposing the
configuration option for Cloud Hypervisor as well.

Fixes: #4022

Signed-off-by: Fabiano Fidêncio <fabiano.fidencio@intel.com>
2022-04-01 11:15:55 +02:00
Eng Zer Jun
59c7165ee1 test: use T.TempDir to create temporary test directory
The directory created by `T.TempDir` is automatically removed when the
test and all its subtests complete.

This commit also updates the unit test advice to use `T.TempDir` to
create temporary directory in tests.

Fixes: #3924

Reference: https://pkg.go.dev/testing#T.TempDir
Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>
2022-03-31 09:31:36 +08:00
Jakob Naucke
ff17c756d2 runtime: Allow and require no initrd for SE
Previously, it was not permitted to have neither an initrd nor an image.
However, this is the exact config to use for Secure Execution, where the
initrd is part of the image to be specified as `-kernel`. Require the
configuration of no initrd for Secure Execution.

Also
- remove redundant code for image/initrd checking -- no need to check in
  `newQemuHypervisorConfig` (calling) when it is also checked in
  `getInitrdAndImage` (called)
- use `QemuCCWVirtio` constant when possible

Fixes: #3922
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2022-03-25 18:36:12 +01:00
Eduardo Lima (Etrunko)
1cad3a4696 agent/random: Ensure data.len > 0
Also adds a test to cover this scenario

Signed-off-by: Eduardo Lima (Etrunko) <etrunko@redhat.com>
2022-03-18 15:13:51 -03:00
Eduardo Lima (Etrunko)
33c953ace4 agent: Add test_ressed_rng_not_root
Same as previous test, but does not skip if it is not running as root.

Signed-off-by: Eduardo Lima (Etrunko) <etrunko@redhat.com>
2022-03-18 15:13:51 -03:00
Wainer dos Santos Moschetta
39a35b693a agent: Add test to random::reseed_rng()
Introduced an unit test for the random::reseed_rng() function.

Fixes #291
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2022-03-18 10:23:22 -03:00
Eduardo Lima (Etrunko)
d8f39fb269 agent/random: Rename RNDRESEEDRNG to RNDRESEEDCRNG
Make this definition match the one in kernel:

5bfc75d92e/include/uapi/linux/random.h (L38-L39)

Signed-off-by: Eduardo Lima (Etrunko) <etrunko@redhat.com>
2022-03-18 10:23:22 -03:00
Miao Xia
a2f5c1768e runtime/virtcontainers: Pass the hugepages resources to agent
The hugepages resources claimed by containers should be limited
by cgroup in the guest OS.

Fixes: #3695

Signed-off-by: Miao Xia <xia.miao1@zte.com.cn>
2022-03-15 18:46:08 +08:00
755 changed files with 312684 additions and 2687 deletions

View File

@@ -14,31 +14,31 @@ jobs:
target_branch: ${{ github.base_ref }}
steps:
- name: Install Go
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
if: github.repository_owner == 'kata-containers'
uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
env:
GOPATH: ${{ runner.workspace }}/kata-containers
- name: Set env
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
if: github.repository_owner == 'kata-containers'
run: |
echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV
echo "${{ github.workspace }}/bin" >> $GITHUB_PATH
- name: Checkout code
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
if: github.repository_owner == 'kata-containers'
uses: actions/checkout@v2
with:
fetch-depth: 0
path: ./src/github.com/${{ github.repository }}
- name: Setup
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
if: github.repository_owner == 'kata-containers'
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
env:
GOPATH: ${{ runner.workspace }}/kata-containers
# docs url alive check
- name: Docs URL Alive Check
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
if: github.repository_owner == 'kata-containers'
run: |
cd ${GOPATH}/src/github.com/${{ github.repository }} && make docs-url-alive-check

View File

@@ -24,6 +24,7 @@ jobs:
- firecracker
- rootfs-image
- rootfs-initrd
- virtiofsd
steps:
- uses: actions/checkout@v2
- name: Install docker

View File

@@ -1,4 +1,5 @@
on:
workflow_dispatch: # this is used to trigger the workflow on non-main branches
issue_comment:
types: [created, edited]
@@ -47,6 +48,7 @@ jobs:
- rootfs-image
- rootfs-initrd
- shim-v2
- virtiofsd
steps:
- name: get-PR-ref
id: get-PR-ref

View File

@@ -17,6 +17,7 @@ jobs:
- rootfs-image
- rootfs-initrd
- shim-v2
- virtiofsd
steps:
- uses: actions/checkout@v2
- name: Install docker

1
.gitignore vendored
View File

@@ -10,4 +10,5 @@ src/agent/kata-agent.service
src/agent/protocols/src/*.rs
!src/agent/protocols/src/lib.rs
build
src/tools/log-parser/kata-log-parser

View File

@@ -14,6 +14,8 @@ TOOLS =
TOOLS += agent-ctl
TOOLS += trace-forwarder
TOOLS += runk
TOOLS += log-parser
STANDARD_TARGETS = build check clean install test vendor

View File

@@ -118,6 +118,7 @@ The table below lists the core parts of the project:
| [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. |
| [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. |
| [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). |
| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) |
| [tests](https://github.com/kata-containers/tests) | tests | Excludes unit tests which live with the main code. |
### Additional components
@@ -131,6 +132,7 @@ The table below lists the remaining parts of the project:
| [osbuilder](tools/osbuilder) | infrastructure | Tool to create "mini O/S" rootfs and initrd images and kernel for the hypervisor. |
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
| [`ci`](https://github.com/kata-containers/ci) | CI | Continuous Integration configuration files and scripts. |
| [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |
@@ -138,7 +140,7 @@ The table below lists the remaining parts of the project:
Kata Containers is now
[available natively for most distributions](docs/install/README.md#packaged-installation-methods).
However, packaging scripts and metadata are still used to generate snap and GitHub releases. See
However, packaging scripts and metadata are still used to generate [snap](snap/local) and GitHub releases. See
the [components](#components) section for further details.
## Glossary of Terms

View File

@@ -1 +1 @@
2.5.0-alpha0
2.5.0-alpha2

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env bash
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
set -e
cidir=$(dirname "$0")
source "${cidir}/lib.sh"
run_go_test

View File

@@ -39,12 +39,6 @@ run_static_checks()
bash "$tests_repo_dir/.ci/static-checks.sh" "$@"
}
run_go_test()
{
clone_tests_repo
bash "$tests_repo_dir/.ci/go-test.sh"
}
run_docs_url_alive_check()
{
clone_tests_repo

View File

@@ -465,7 +465,7 @@ script and paste its output directly into a
> [runtime](../src/runtime) repository.
To perform analysis on Kata logs, use the
[`kata-log-parser`](https://github.com/kata-containers/tests/tree/main/cmd/log-parser)
[`kata-log-parser`](../src/tools/log-parser)
tool, which can convert the logs into formats (e.g. JSON, TOML, XML, and YAML).
See [Set up a debug console](#set-up-a-debug-console).
@@ -700,11 +700,11 @@ options to have the kernel boot messages logged into the system journal.
For generic information on enabling debug in the configuration file, see the
[Enable full debug](#enable-full-debug) section.
The kernel boot messages will appear in the `containerd` or `CRI-O` log appropriately,
The kernel boot messages will appear in the `kata` logs (and in the `containerd` or `CRI-O` log appropriately).
such as:
```bash
$ sudo journalctl -t containerd
$ sudo journalctl -t kata
-- Logs begin at Thu 2020-02-13 16:20:40 UTC, end at Thu 2020-02-13 16:30:23 UTC. --
...
time="2020-09-15T14:56:23.095113803+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.395399] brd: module loaded"
@@ -714,3 +714,4 @@ time="2020-09-15T14:56:23.105268162+08:00" level=debug msg="reading guest consol
time="2020-09-15T14:56:23.121121598+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[ 0.421324] memmap_init_zone_device initialised 32768 pages in 12ms"
...
```
Refer to the [kata-log-parser documentation](../src/tools/log-parser/README.md) which is useful to fetch these.

View File

@@ -46,7 +46,7 @@ The following link shows the latest list of limitations:
# Contributing
If you would like to work on resolving a limitation, please refer to the
[contributors guide](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).
[contributors guide](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md).
If you wish to raise an issue for a new limitation, either
[raise an issue directly on the runtime](https://github.com/kata-containers/kata-containers/issues/new)
or see the

View File

@@ -4,11 +4,11 @@
## Requirements
- [hub](https://github.com/github/hub)
* Using an [application token](https://github.com/settings/tokens) is required for hub.
* Using an [application token](https://github.com/settings/tokens) is required for hub (set to a GITHUB_TOKEN environment variable).
- GitHub permissions to push tags and create releases in Kata repositories.
- GPG configured to sign git tags. https://help.github.com/articles/generating-a-new-gpg-key/
- GPG configured to sign git tags. https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key
- You should configure your GitHub to use your ssh keys (to push to branches). See https://help.github.com/articles/adding-a-new-ssh-key-to-your-github-account/.
* As an alternative, configure hub to push and fork with HTTPS, `git config --global hub.protocol https` (Not tested yet) *
@@ -48,7 +48,7 @@
### Merge all bump version Pull requests
- The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
- Trigger the test-kata-deploy workflow on the kata-containers repository bump Pull request using `/test_kata_deploy` (monitor under the "action" tab).
- Trigger the `test-kata-deploy` workflow which is under the `Actions` tab on the repository GitHub page (make sure to select the correct branch and validate it passes).
- Check any failures and fix if needed.
- Work with the Kata approvers to verify that the CI works and the pull requests are merged.

View File

@@ -277,7 +277,9 @@ mod tests {
## Temporary files
Always delete temporary files on success.
Use `t.TempDir()` to create temporary directory. The directory created by
`t.TempDir()` is automatically removed when the test and all its subtests
complete.
### Golang temporary files
@@ -286,11 +288,7 @@ func TestSomething(t *testing.T) {
assert := assert.New(t)
// Create a temporary directory
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
// Delete it at the end of the test
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
// Add test logic that will use the tmpdir here...
}

View File

@@ -11,7 +11,8 @@ Kata Containers design documents:
- [`Inotify` support](inotify.md)
- [Metrics(Kata 2.0)](kata-2-0-metrics.md)
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
- [Design for direct-assigned volume](direct-blk-device-assignment.md)
- [Design for core-scheduling](core-scheduling.md)
---
- [Design proposals](proposals)

View File

@@ -20,7 +20,7 @@ For virtio-fs, the [runtime](README.md#runtime) starts one `virtiofsd` daemon
## Devicemapper
The
[devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/master/snapshots/devmapper)
[devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/main/snapshots/devmapper)
is a special case. The `snapshotter` uses dedicated block devices
rather than formatted filesystems, and operates at the block level
rather than the file level. This knowledge is used to directly use the

View File

@@ -0,0 +1,12 @@
# Core scheduling
Core scheduling is a Linux kernel feature that allows only trusted tasks to run concurrently on
CPUs sharing compute resources (for example, hyper-threads on a core).
Containerd versions >= 1.6.4 leverage this to treat all of the processes associated with a
given pod or container to be a single group of trusted tasks. To indicate this should be carried
out, containerd sets the `SCHED_CORE` environment variable for each shim it spawns. When this is
set, the Kata Containers shim implementation uses the `prctl` syscall to create a new core scheduling
domain for the shim process itself as well as future VMM processes it will start.
For more details on the core scheduling feature, see the [Linux documentation](https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html).

View File

@@ -0,0 +1,253 @@
# Motivation
Today, there exist a few gaps between Container Storage Interface (CSI) and virtual machine (VM) based runtimes such as Kata Containers
that prevent them from working together smoothly.
First, its cumbersome to use a persistent volume (PV) with Kata Containers. Today, for a PV with Filesystem volume mode, Virtio-fs
is the only way to surface it inside a Kata Container guest VM. But often mounting the filesystem (FS) within the guest operating system (OS) is
desired due to performance benefits, availability of native FS features and security benefits over the Virtio-fs mechanism.
Second, its difficult if not impossible to resize a PV online with Kata Containers. While a PV can be expanded on the host OS,
the updated metadata needs to be propagated to the guest OS in order for the application container to use the expanded volume.
Currently, there is not a way to propagate the PV metadata from the host OS to the guest OS without restarting the Pod sandbox.
# Proposed Solution
Because of the OS boundary, these features cannot be implemented in the CSI node driver plugin running on the host OS
as is normally done in the runc container. Instead, they can be done by the Kata Containers agent inside the guest OS,
but it requires the CSI driver to pass the relevant information to the Kata Containers runtime.
An ideal long term solution would be to have the `kubelet` coordinating the communication between the CSI driver and
the container runtime, as described in [KEP-2857](https://github.com/kubernetes/enhancements/pull/2893/files).
However, as the KEP is still under review, we would like to propose a short/medium term solution to unblock our use case.
The proposed solution is built on top of a previous [proposal](https://github.com/egernst/kata-containers/blob/da-proposal/docs/design/direct-assign-volume.md)
described by Eric Ernst. The previous proposal has two gaps:
1. Writing a `csiPlugin.json` file to the volume root path introduced a security risk. A malicious user can gain unauthorized
access to a block device by writing their own `csiPlugin.json` to the above location through an ephemeral CSI plugin.
2. The proposal didn't describe how to establish a mapping between a volume and a kata sandbox, which is needed for
implementing CSI volume resize and volume stat collection APIs.
This document particularly focuses on how to address these two gaps.
## Assumptions and Limitations
1. The proposal assumes that a block device volume will only be used by one Pod on a node at a time, which we believe
is the most common pattern in Kata Containers use cases. Its also unsafe to have the same block device attached to more than
one Kata pod. In the context of Kubernetes, the `PersistentVolumeClaim` (PVC) needs to have the `accessMode` as `ReadWriteOncePod`.
2. More advanced Kubernetes volume features such as, `fsGroup`, `fsGroupChangePolicy`, and `subPath` are not supported.
## End User Interface
1. The user specifies a PV as a direct-assigned volume. How a PV is specified as a direct-assigned volume is left for each CSI implementation to decide.
There are a few options for reference:
1. A storage class parameter specifies whether it's a direct-assigned volume. This avoids any lookups of PVC
or Pod information from the CSI plugin (as external provisioner takes care of these). However, all PVs in the storage class with the parameter set
will have host mounts skipped.
2. Use a PVC annotation. This approach requires the CSI plugins have `--extra-create-metadata` [set](https://kubernetes-csi.github.io/docs/external-provisioner.html#persistentvolumeclaim-and-persistentvolume-parameters)
to be able to perform a lookup of the PVC annotations from the API server. Pro: API server lookup of annotations only required during creation of PV.
Con: The CSI plugin will always skip host mounting of the PV.
3. The CSI plugin can also lookup pod `runtimeclass` during `NodePublish`. This approach can be found in the [ALIBABA CSI plugin](https://github.com/kubernetes-sigs/alibaba-cloud-csi-driver/blob/master/pkg/disk/nodeserver.go#L248).
2. The CSI node driver delegates the direct assigned volume to the Kata Containers runtime. The CSI node driver APIs need to
be modified to pass the volume mount information and collect volume information to/from the Kata Containers runtime by invoking `kata-runtime` command line commands.
* **NodePublishVolume** -- It invokes `kata-runtime direct-volume add --volume-path [volumePath] --mount-info [mountInfo]`
to propagate the volume mount information to the Kata Containers runtime for it to carry out the filesystem mount operation.
The `volumePath` is the [target_path](https://github.com/container-storage-interface/spec/blob/master/csi.proto#L1364) in the CSI `NodePublishVolumeRequest`.
The `mountInfo` is a serialized JSON string.
* **NodeGetVolumeStats** -- It invokes `kata-runtime direct-volume stats --volume-path [volumePath]` to retrieve the filesystem stats of direct-assigned volume.
* **NodeExpandVolume** -- It invokes `kata-runtime direct-volume resize --volume-path [volumePath] --size [size]` to send a resize request to the Kata Containers runtime to
resize the direct-assigned volume.
* **NodeStageVolume/NodeUnStageVolume** -- It invokes `kata-runtime direct-volume remove --volume-path [volumePath]` to remove the persisted metadata of a direct-assigned volume.
The `mountInfo` object is defined as follows:
```Golang
type MountInfo struct {
// The type of the volume (ie. block)
VolumeType string `json:"volume-type"`
// The device backing the volume.
Device string `json:"device"`
// The filesystem type to be mounted on the volume.
FsType string `json:"fstype"`
// Additional metadata to pass to the agent regarding this volume.
Metadata map[string]string `json:"metadata,omitempty"`
// Additional mount options.
Options []string `json:"options,omitempty"`
}
```
Notes: given that the `mountInfo` is persisted to the disk by the Kata runtime, it shouldn't container any secrets (such as SMB mount password).
## Implementation Details
### Kata runtime
Instead of the CSI node driver writing the mount info into a `csiPlugin.json` file under the volume root,
as described in the original proposal, here we propose that the CSI node driver passes the mount information to
the Kata Containers runtime through a new `kata-runtime` commandline command. The `kata-runtime` then writes the mount
information to a `mount-info.json` file in a predefined location (`/run/kata-containers/shared/direct-volumes/[volume_path]/`).
When the Kata Containers runtime starts a container, it verifies whether a volume mount is a direct-assigned volume by checking
whether there is a `mountInfo` file under the computed Kata `direct-volumes` directory. If it is, the runtime parses the `mountInfo` file,
updates the mount spec with the data in `mountInfo`. The updated mount spec is then passed to the Kata agent in the guest VM together
with other mounts. The Kata Containers runtime also creates a file named by the sandbox id under the `direct-volumes/[volume_path]/`
directory. The reason for adding a sandbox id file is to establish a mapping between the volume and the sandbox using it.
Later, when the Kata Containers runtime handles the `get-stats` and `resize` commands, it uses the sandbox id to identify
the endpoint of the corresponding `containerd-shim-kata-v2`.
### containerd-shim-kata-v2 changes
`containerd-shim-kata-v2` provides an API for sandbox management through a Unix domain socket. Two new handlers are proposed: `/direct-volume/stats` and `/direct-volume/resize`:
Example:
```bash
$ curl --unix-socket "$shim_socket_path" -I -X GET 'http://localhost/direct-volume/stats/[urlSafeVolumePath]'
$ curl --unix-socket "$shim_socket_path" -I -X POST 'http://localhost/direct-volume/resize' -d '{ "volumePath"": [volumePath], "Size": "123123" }'
```
The shim then forwards the corresponding request to the `kata-agent` to carry out the operations inside the guest VM. For `resize` operation,
the Kata runtime also needs to notify the hypervisor to resize the block device (e.g. call `block_resize` in QEMU).
### Kata agent changes
The mount spec of a direct-assigned volume is passed to `kata-agent` through the existing `Storage` GRPC object.
Two new APIs and three new GRPC objects are added to GRPC protocol between the shim and agent for resizing and getting volume stats:
```protobuf
rpc GetVolumeStats(VolumeStatsRequest) returns (VolumeStatsResponse);
rpc ResizeVolume(ResizeVolumeRequest) returns (google.protobuf.Empty);
message VolumeStatsRequest {
// The volume path on the guest outside the container
string volume_guest_path = 1;
}
message ResizeVolumeRequest {
// Full VM guest path of the volume (outside the container)
string volume_guest_path = 1;
uint64 size = 2;
}
// This should be kept in sync with CSI NodeGetVolumeStatsResponse (https://github.com/container-storage-interface/spec/blob/v1.5.0/csi.proto)
message VolumeStatsResponse {
// This field is OPTIONAL.
repeated VolumeUsage usage = 1;
// Information about the current condition of the volume.
// This field is OPTIONAL.
// This field MUST be specified if the VOLUME_CONDITION node
// capability is supported.
VolumeCondition volume_condition = 2;
}
message VolumeUsage {
enum Unit {
UNKNOWN = 0;
BYTES = 1;
INODES = 2;
}
// The available capacity in specified Unit. This field is OPTIONAL.
// The value of this field MUST NOT be negative.
uint64 available = 1;
// The total capacity in specified Unit. This field is REQUIRED.
// The value of this field MUST NOT be negative.
uint64 total = 2;
// The used capacity in specified Unit. This field is OPTIONAL.
// The value of this field MUST NOT be negative.
uint64 used = 3;
// Units by which values are measured. This field is REQUIRED.
Unit unit = 4;
}
// VolumeCondition represents the current condition of a volume.
message VolumeCondition {
// Normal volumes are available for use and operating optimally.
// An abnormal volume does not meet these criteria.
// This field is REQUIRED.
bool abnormal = 1;
// The message describing the condition of the volume.
// This field is REQUIRED.
string message = 2;
}
```
### Step by step walk-through
Given the following definition:
```YAML
---
apiVersion: v1
kind: Pod
metadata:
name: app
spec:
runtime-class: kata-qemu
containers:
- name: app
image: centos
command: ["/bin/sh"]
args: ["-c", "while true; do echo $(date -u) >> /data/out.txt; sleep 5; done"]
volumeMounts:
- name: persistent-storage
mountPath: /data
volumes:
- name: persistent-storage
persistentVolumeClaim:
claimName: ebs-claim
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
annotations:
skip-hostmount: "true"
name: ebs-claim
spec:
accessModes:
- ReadWriteOncePod
volumeMode: Filesystem
storageClassName: ebs-sc
resources:
requests:
storage: 4Gi
---
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: ebs-sc
provisioner: ebs.csi.aws.com
volumeBindingMode: WaitForFirstConsumer
parameters:
csi.storage.k8s.io/fstype: ext4
```
Lets assume that changes have been made in the `aws-ebs-csi-driver` node driver.
**Node publish volume**
1. In the node CSI driver, the `NodePublishVolume` API invokes: `kata-runtime direct-volume add --volume-path "/kubelet/a/b/c/d/sdf" --mount-info "{\"Device\": \"/dev/sdf\", \"fstype\": \"ext4\"}"`.
2. The `Kata-runtime` writes the mount-info JSON to a file called `mountInfo.json` under `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
**Node unstage volume**
1. In the node CSI driver, the `NodeUnstageVolume` API invokes: `kata-runtime direct-volume remove --volume-path "/kubelet/a/b/c/d/sdf"`.
2. Kata-runtime deletes the directory `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
**Use the volume in sandbox**
1. Upon the request to start a container, the `containerd-shim-kata-v2` examines the container spec,
and iterates through the mounts. For each mount, if there is a `mountInfo.json` file under `/run/kata-containers/shared/direct-volumes/[mount source path]`,
it generates a `storage` GRPC object after overwriting the mount spec with the information in `mountInfo.json`.
2. The shim sends the storage objects to kata-agent through TTRPC.
3. The shim writes a file with the sandbox id as the name under `/run/kata-containers/shared/direct-volumes/[mount source path]`.
4. The kata-agent mounts the storage objects for the container.
**Node expand volume**
1. In the node CSI driver, the `NodeExpandVolume` API invokes: `kata-runtime direct-volume resize -volume-path "/kubelet/a/b/c/d/sdf" -size 8Gi`.
2. The Kata runtime checks whether there is a sandbox id file under the directory `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
3. The Kata runtime identifies the shim instance through the sandbox id, and sends a GRPC request to resize the volume.
4. The shim handles the request, asks the hypervisor to resize the block device and sends a GRPC request to Kata agent to resize the filesystem.
5. Kata agent receives the request and resizes the filesystem.
**Node get volume stats**
1. In the node CSI driver, the `NodeGetVolumeStats` API invokes: `kata-runtime direct-volume stats -volume-path "/kubelet/a/b/c/d/sdf"`.
2. The Kata runtime checks whether there is a sandbox id file under the directory `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
3. The Kata runtime identifies the shim instance through the sandbox id, and sends a GRPC request to get the volume stats.
4. The shim handles the request and forwards it to the Kata agent.
5. Kata agent receives the request and returns the filesystem stats.

View File

@@ -51,6 +51,7 @@ The `kata-monitor` management agent should be started on each node where the Kat
> **Note**: a *node* running Kata containers will be either a single host system or a worker node belonging to a K8s cluster capable of running Kata pods.
- Aggregate sandbox metrics running on the node, adding the `sandbox_id` label to them.
- Attach the additional `cri_uid`, `cri_name` and `cri_namespace` labels to the sandbox metrics, tracking the `uid`, `name` and `namespace` Kubernetes pod metadata.
- Expose a new Prometheus target, allowing all node metrics coming from the Kata shim to be collected by Prometheus indirectly. This simplifies the targets count in Prometheus and avoids exposing shim's metrics by `ip:port`.
Only one `kata-monitor` process runs in each node.

View File

@@ -39,7 +39,7 @@ Details of each solution and a summary are provided below.
Kata Containers with QEMU has complete compatibility with Kubernetes.
Depending on the host architecture, Kata Containers supports various machine types,
for example `pc` and `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
for example `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
machine type is `q35`. The machine type and its [`Machine accelerators`](#machine-accelerators) can
be changed by editing the runtime [`configuration`](architecture/README.md#configuration) file.
@@ -60,9 +60,8 @@ Machine accelerators are architecture specific and can be used to improve the pe
and enable specific features of the machine types. The following machine accelerators
are used in Kata Containers:
- NVDIMM: This machine accelerator is x86 specific and only supported by `pc` and
`q35` machine types. `nvdimm` is used to provide the root filesystem as a persistent
memory device to the Virtual Machine.
- NVDIMM: This machine accelerator is x86 specific and only supported by `q35` machine types.
`nvdimm` is used to provide the root filesystem as a persistent memory device to the Virtual Machine.
#### Hotplug devices

View File

@@ -15,6 +15,11 @@
- `qemu`
- `cloud-hypervisor`
- `firecracker`
In the case of `firecracker` the use of a block device `snapshotter` is needed
for the VM rootfs. Refer to the following guide for additional configuration
steps:
- [Setup Kata containers with `firecracker`](how-to-use-kata-containers-with-firecracker.md)
- `ACRN`
While `qemu` , `cloud-hypervisor` and `firecracker` work out of the box with installation of Kata,

View File

@@ -72,7 +72,6 @@ $ command -v containerd
### Install CNI plugins
> **Note:** You do not need to install CNI plugins if you do not want to use containerd with Kubernetes.
> If you have installed Kubernetes with `kubeadm`, you might have already installed the CNI plugins.
You can manually install CNI plugins as follows:
@@ -131,74 +130,42 @@ For
The `RuntimeClass` is suggested.
The following configuration includes three runtime classes:
The following configuration includes two runtime classes:
- `plugins.cri.containerd.runtimes.runc`: the runc, and it is the default runtime.
- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/master/runtime/v2#binary-naming))
where the dot-connected string `io.containerd.kata.v2` is translated to `containerd-shim-kata-v2` (i.e. the
binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)).
- `plugins.cri.containerd.runtimes.katacli`: the `containerd-shim-runc-v1` calls `kata-runtime`, which is the legacy process.
```toml
[plugins.cri.containerd]
no_pivot = false
[plugins.cri.containerd.runtimes]
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v1"
[plugins.cri.containerd.runtimes.runc.options]
NoPivotRoot = false
NoNewKeyring = false
ShimCgroup = ""
IoUid = 0
IoGid = 0
BinaryName = "runc"
Root = ""
CriuPath = ""
SystemdCgroup = false
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
privileged_without_host_devices = false
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = ""
CriuImagePath = ""
CriuPath = ""
CriuWorkPath = ""
IoGid = 0
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins.cri.containerd.runtimes.katacli]
runtime_type = "io.containerd.runc.v1"
[plugins.cri.containerd.runtimes.katacli.options]
NoPivotRoot = false
NoNewKeyring = false
ShimCgroup = ""
IoUid = 0
IoGid = 0
BinaryName = "/usr/bin/kata-runtime"
Root = ""
CriuPath = ""
SystemdCgroup = false
```
From Containerd v1.2.4 and Kata v1.6.0, there is a new runtime option supported, which allows you to specify a specific Kata configuration file as follows:
```toml
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
privileged_without_host_devices = true
[plugins.cri.containerd.runtimes.kata.options]
ConfigPath = "/etc/kata-containers/config.toml"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
container_annotations = ["io.katacontainers.*"]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration.toml"
```
`privileged_without_host_devices` tells containerd that a privileged Kata container should not have direct access to all host devices. If unset, containerd will pass all host devices to Kata container, which may cause security issues.
`pod_annotations` is the list of pod annotations passed to both the pod sandbox as well as container through the OCI config.
`container_annotations` is the list of container annotations passed through to the OCI config of the containers.
This `ConfigPath` option is optional. If you do not specify it, shimv2 first tries to get the configuration file from the environment variable `KATA_CONF_FILE`. If neither are set, shimv2 will use the default Kata configuration file paths (`/etc/kata-containers/configuration.toml` and `/usr/share/defaults/kata-containers/configuration.toml`).
If you use Containerd older than v1.2.4 or a version of Kata older than v1.6.0 and also want to specify a configuration file, you can use the following workaround, since the shimv2 accepts an environment variable, `KATA_CONF_FILE` for the configuration file path. Then, you can create a
shell script with the following:
```bash
#!/usr/bin/env bash
KATA_CONF_FILE=/etc/kata-containers/firecracker.toml containerd-shim-kata-v2 $@
```
Name it as `/usr/local/bin/containerd-shim-katafc-v2` and reference it in the configuration of containerd:
```toml
[plugins.cri.containerd.runtimes.kata-firecracker]
runtime_type = "io.containerd.katafc.v2"
```
#### Kata Containers as the runtime for untrusted workload
For cases without `RuntimeClass` support, we can use the legacy annotation method to support using Kata Containers
@@ -218,28 +185,8 @@ and then, run an untrusted workload with Kata Containers:
runtime_type = "io.containerd.kata.v2"
```
For the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
```toml
[plugins.cri.containerd]
# "plugins.cri.containerd.default_runtime" is the runtime to use in containerd.
[plugins.cri.containerd.default_runtime]
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
runtime_type = "io.containerd.runtime.v1.linux"
# "plugins.cri.containerd.untrusted_workload_runtime" is a runtime to run untrusted workloads on it.
[plugins.cri.containerd.untrusted_workload_runtime]
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
runtime_type = "io.containerd.runtime.v1.linux"
# runtime_engine is the name of the runtime engine used by containerd.
runtime_engine = "/usr/bin/kata-runtime"
```
You can find more information on the [Containerd config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)
#### Kata Containers as the default runtime
If you want to set Kata Containers as the only runtime in the deployment, you can simply configure as follows:
@@ -250,15 +197,6 @@ If you want to set Kata Containers as the only runtime in the deployment, you ca
runtime_type = "io.containerd.kata.v2"
```
Alternatively, for the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
```toml
[plugins.cri.containerd]
[plugins.cri.containerd.default_runtime]
runtime_type = "io.containerd.runtime.v1.linux"
runtime_engine = "/usr/bin/kata-runtime"
```
### Configuration for `cri-tools`
> **Note:** If you skipped the [Install `cri-tools`](#install-cri-tools) section, you can skip this section too.
@@ -312,10 +250,12 @@ To run a container with Kata Containers through the containerd command line, you
```bash
$ sudo ctr image pull docker.io/library/busybox:latest
$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
$ sudo ctr run --cni --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
```
This launches a BusyBox container named `hello`, and it will be removed by `--rm` after it quits.
The `--cni` flag enables CNI networking for the container. Without this flag, a container with just a
loopback interface is created.
### Launch Pods with `crictl` command line

View File

@@ -91,6 +91,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
| `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest |
| `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |
## Container Options
| Key | Value Type | Comments |
@@ -172,7 +173,7 @@ kind: Pod
metadata:
name: pod2
annotations:
io.katacontainers.config.runtime.disable_guest_seccomp: false
io.katacontainers.config.runtime.disable_guest_seccomp: "false"
spec:
runtimeClassName: kata
containers:

View File

@@ -0,0 +1,254 @@
# Configure Kata Containers to use Firecracker
This document provides an overview on how to run Kata Containers with the AWS Firecracker hypervisor.
## Introduction
AWS Firecracker is an open source virtualization technology that is purpose-built for creating and managing secure, multi-tenant container and function-based services that provide serverless operational models. AWS Firecracker runs workloads in lightweight virtual machines, called `microVMs`, which combine the security and isolation properties provided by hardware virtualization technology with the speed and flexibility of Containers.
Please refer to AWS Firecracker [documentation](https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md) for more details.
## Pre-requisites
This document requires the presence of Kata Containers on your system. Install using the instructions available through the following links:
- Kata Containers [automated installation](../install/README.md)
- Kata Containers manual installation: Automated installation does not seem to be supported for Clear Linux, so please use [manual installation](../Developer-Guide.md) steps.
> **Note:** Create rootfs image and not initrd image.
## Install AWS Firecracker
Kata Containers only support AWS Firecracker v0.23.4 ([yet](https://github.com/kata-containers/kata-containers/pull/1519)).
To install Firecracker we need to get the `firecracker` and `jailer` binaries:
```bash
$ release_url="https://github.com/firecracker-microvm/firecracker/releases"
$ version="v0.23.1"
$ arch=`uname -m`
$ curl ${release_url}/download/${version}/firecracker-${version}-${arch} -o firecracker
$ curl ${release_url}/download/${version}/jailer-${version}-${arch} -o jailer
$ chmod +x jailer firecracker
```
To make the binaries available from the default system `PATH` it is recommended to move them to `/usr/local/bin` or add a symbolic link:
```bash
$ sudo ln -s $(pwd)/firecracker /usr/local/bin
$ sudo ln -s $(pwd)/jailer /usr/local/bin
```
More details can be found in [AWS Firecracker docs](https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md)
In order to run Kata with AWS Firecracker a block device as the backing store for a VM is required. To interact with `containerd` and Kata we use the `devmapper` `snapshotter`.
## Configure `devmapper`
To check support for your `containerd` installation, you can run:
```
$ ctr plugins ls |grep devmapper
```
if the output of the above command is:
```
io.containerd.snapshotter.v1 devmapper linux/amd64 ok
```
then you can skip this section and move on to `Configure Kata Containers with AWS Firecracker`
If the output of the above command is:
```
io.containerd.snapshotter.v1 devmapper linux/amd64 error
```
then we need to setup `devmapper` `snapshotter`. Based on a [very useful
guide](https://docs.docker.com/storage/storagedriver/device-mapper-driver/)
from docker, we can set it up using the following scripts:
> **Note:** The following scripts assume a 100G sparse file for storing container images, a 10G sparse file for the thin-provisioning pool and 10G base image files for any sandboxed container created. This means that we will need at least 10GB free space.
```
#!/bin/bash
set -ex
DATA_DIR=/var/lib/containerd/devmapper
POOL_NAME=devpool
mkdir -p ${DATA_DIR}
# Create data file
sudo touch "${DATA_DIR}/data"
sudo truncate -s 100G "${DATA_DIR}/data"
# Create metadata file
sudo touch "${DATA_DIR}/meta"
sudo truncate -s 10G "${DATA_DIR}/meta"
# Allocate loop devices
DATA_DEV=$(sudo losetup --find --show "${DATA_DIR}/data")
META_DEV=$(sudo losetup --find --show "${DATA_DIR}/meta")
# Define thin-pool parameters.
# See https://www.kernel.org/doc/Documentation/device-mapper/thin-provisioning.txt for details.
SECTOR_SIZE=512
DATA_SIZE="$(sudo blockdev --getsize64 -q ${DATA_DEV})"
LENGTH_IN_SECTORS=$(bc <<< "${DATA_SIZE}/${SECTOR_SIZE}")
DATA_BLOCK_SIZE=128
LOW_WATER_MARK=32768
# Create a thin-pool device
sudo dmsetup create "${POOL_NAME}" \
--table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
cat << EOF
#
# Add this to your config.toml configuration file and restart `containerd` daemon
#
[plugins]
[plugins.devmapper]
pool_name = "${POOL_NAME}"
root_path = "${DATA_DIR}"
base_image_size = "10GB"
discard_blocks = true
EOF
```
Make it executable and run it:
```bash
$ sudo chmod +x ~/scripts/devmapper/create.sh
$ cd ~/scripts/devmapper/
$ sudo ./create.sh
```
Now, we can add the `devmapper` configuration provided from the script to `/etc/containerd/config.toml`.
> **Note:** If you are using the default `containerd` configuration (`containerd config default >> /etc/containerd/config.toml`), you may need to edit the existing `[plugins."io.containerd.snapshotter.v1.devmapper"]`configuration.
Save and restart `containerd`:
```bash
$ sudo systemctl restart containerd
```
We can use `dmsetup` to verify that the thin-pool was created successfully.
```bash
$ sudo dmsetup ls
```
We should also check that `devmapper` is registered and running:
```bash
$ sudo ctr plugins ls | grep devmapper
```
This script needs to be run only once, while setting up the `devmapper` `snapshotter` for `containerd`. Afterwards, make sure that on each reboot, the thin-pool is initialized from the same data directory. Otherwise, all the fetched containers (or the ones that you have created) will be re-initialized. A simple script that re-creates the thin-pool from the same data directory is shown below:
```
#!/bin/bash
set -ex
DATA_DIR=/var/lib/containerd/devmapper
POOL_NAME=devpool
# Allocate loop devices
DATA_DEV=$(sudo losetup --find --show "${DATA_DIR}/data")
META_DEV=$(sudo losetup --find --show "${DATA_DIR}/meta")
# Define thin-pool parameters.
# See https://www.kernel.org/doc/Documentation/device-mapper/thin-provisioning.txt for details.
SECTOR_SIZE=512
DATA_SIZE="$(sudo blockdev --getsize64 -q ${DATA_DEV})"
LENGTH_IN_SECTORS=$(bc <<< "${DATA_SIZE}/${SECTOR_SIZE}")
DATA_BLOCK_SIZE=128
LOW_WATER_MARK=32768
# Create a thin-pool device
sudo dmsetup create "${POOL_NAME}" \
--table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
```
We can create a systemd service to run the above script on each reboot:
```bash
$ sudo nano /lib/systemd/system/devmapper_reload.service
```
The service file:
```
[Unit]
Description=Devmapper reload script
[Service]
ExecStart=/path/to/script/reload.sh
[Install]
WantedBy=multi-user.target
```
Enable the newly created service:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl enable devmapper_reload.service
$ sudo systemctl start devmapper_reload.service
```
## Configure Kata Containers with AWS Firecracker
To configure Kata Containers with AWS Firecracker, copy the generated `configuration-fc.toml` file when building the `kata-runtime` to either `/etc/kata-containers/configuration-fc.toml` or `/usr/share/defaults/kata-containers/configuration-fc.toml`.
The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)
```bash
$ sudo kata-runtime --show-default-config-paths
```
## Configure `containerd`
Next, we need to configure containerd. Add a file in your path (e.g. `/usr/local/bin/containerd-shim-kata-fc-v2`) with the following contents:
```
#!/bin/bash
KATA_CONF_FILE=/etc/containers/configuration-fc.toml /usr/local/bin/containerd-shim-kata-v2 $@
```
> **Note:** You may need to edit the paths of the configuration file and the `containerd-shim-kata-v2` to correspond to your setup.
Make it executable:
```bash
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-fc-v2
```
Add the relevant section in `containerd`s `config.toml` file (`/etc/containerd/config.toml`):
```
[plugins.cri.containerd.runtimes]
[plugins.cri.containerd.runtimes.kata-fc]
runtime_type = "io.containerd.kata-fc.v2"
```
> **Note:** If you are using the default `containerd` configuration (`containerd config default >> /etc/containerd/config.toml`),
> the configuration should change to :
```
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata-fc]
runtime_type = "io.containerd.kata-fc.v2"
```
Restart `containerd`:
```bash
$ sudo systemctl restart containerd
```
## Verify the installation
We are now ready to launch a container using Kata with Firecracker to verify that everything worked:
```bash
$ sudo ctr images pull --snapshotter devmapper docker.io/library/ubuntu:latest
$ sudo ctr run --snapshotter devmapper --runtime io.containerd.run.kata-fc.v2 -t --rm docker.io/library/ubuntu
```

View File

@@ -31,7 +31,7 @@ See below example config:
[plugins.cri]
[plugins.cri.containerd]
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v1"
runtime_type = "io.containerd.runc.v2"
privileged_without_host_devices = false
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"

View File

@@ -81,7 +81,7 @@
- Download the standard `systemd(1)` service file and install to
`/etc/systemd/system/`:
- https://raw.githubusercontent.com/containerd/containerd/master/containerd.service
- https://raw.githubusercontent.com/containerd/containerd/main/containerd.service
> **Notes:**
>

View File

@@ -3,4 +3,4 @@
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
- [Intel](Intel-GPU-passthrough-and-Kata.md)
- [Nvidia](Nvidia-GPU-passthrough-and-Kata.md)
- [NVIDIA](NVIDIA-GPU-passthrough-and-Kata.md)

View File

@@ -0,0 +1,592 @@
# Using NVIDIA GPU device with Kata Containers
An NVIDIA GPU device can be passed to a Kata Containers container using GPU
passthrough (NVIDIA GPU pass-through mode) as well as GPU mediated passthrough
(NVIDIA `vGPU` mode).
NVIDIA GPU pass-through mode, an entire physical GPU is directly assigned to one
VM, bypassing the NVIDIA Virtual GPU Manager. In this mode of operation, the GPU
is accessed exclusively by the NVIDIA driver running in the VM to which it is
assigned. The GPU is not shared among VMs.
NVIDIA Virtual GPU (`vGPU`) enables multiple virtual machines (VMs) to have
simultaneous, direct access to a single physical GPU, using the same NVIDIA
graphics drivers that are deployed on non-virtualized operating systems. By
doing this, NVIDIA `vGPU` provides VMs with unparalleled graphics performance,
compute performance, and application compatibility, together with the
cost-effectiveness and scalability brought about by sharing a GPU among multiple
workloads. A `vGPU` can be either time-sliced or Multi-Instance GPU (MIG)-backed
with [MIG-slices](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
| Technology | Description | Behavior | Detail |
| --- | --- | --- | --- |
| NVIDIA GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| NVIDIA vGPU time-sliced | GPU time-sliced | Physical GPU time-sliced for multiple VMs | Mediated passthrough |
| NVIDIA vGPU MIG-backed | GPU with MIG-slices | Physical GPU MIG-sliced for multiple VMs | Mediated passthrough |
## Hardware Requirements
NVIDIA GPUs Recommended for Virtualization:
- NVIDIA Tesla (T4, M10, P6, V100 or newer)
- NVIDIA Quadro RTX 6000/8000
## Host BIOS Requirements
Some hardware requires a larger PCI BARs window, for example, NVIDIA Tesla P100,
K40m
```sh
$ lspci -s d0:00.0 -vv | grep Region
Region 0: Memory at e7000000 (32-bit, non-prefetchable) [size=16M]
Region 1: Memory at 222800000000 (64-bit, prefetchable) [size=32G] # Above 4G
Region 3: Memory at 223810000000 (64-bit, prefetchable) [size=32M]
```
For large BARs devices, MMIO mapping above 4G address space should be `enabled`
in the PCI configuration of the BIOS.
Some hardware vendors use a different name in BIOS, such as:
- Above 4G Decoding
- Memory Hole for PCI MMIO
- Memory Mapped I/O above 4GB
If one is using a GPU based on the Ampere architecture and later additionally
SR-IOV needs to be enabled for the `vGPU` use-case.
The following steps outline the workflow for using an NVIDIA GPU with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
- `CONFIG_VFIO`
- `CONFIG_VFIO_IOMMU_TYPE1`
- `CONFIG_VFIO_MDEV`
- `CONFIG_VFIO_MDEV_DEVICE`
- `CONFIG_VFIO_PCI`
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
line.
## Install and configure Kata Containers
To use non-large BARs devices (for example, NVIDIA Tesla T4), you need Kata
version 1.3.0 or above. Follow the [Kata Containers setup
instructions](../install/README.md) to install the latest version of Kata.
To use large BARs devices (for example, NVIDIA Tesla P100), you need Kata
version 1.11.0 or above.
The following configuration in the Kata `configuration.toml` file as shown below
can work:
Hotplug for PCI devices with small BARs by `acpi_pcihp` (Linux's ACPI PCI
Hotplug driver):
```sh
machine_type = "q35"
hotplug_vfio_on_root_bus = false
```
Hotplug for PCIe devices with large BARs by `pciehp` (Linux's PCIe Hotplug
driver):
```sh
machine_type = "q35"
hotplug_vfio_on_root_bus = true
pcie_root_port = 1
```
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU
support. To use an NVIDIA GPU with Kata Containers, you need to build a kernel
with the necessary GPU support.
The following kernel config options need to be enabled:
```sh
# Support PCI/PCIe device hotplug (Required for large BARs device)
CONFIG_HOTPLUG_PCI_PCIE=y
# Support for loading modules (Required for load NVIDIA drivers)
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# Enable the MMIO access method for PCIe devices (Required for large BARs device)
CONFIG_PCI_MMCONFIG=y
```
The following kernel config options need to be disabled:
```sh
# Disable Open Source NVIDIA driver nouveau
# It conflicts with NVIDIA official driver
CONFIG_DRM_NOUVEAU=n
```
> **Note**: `CONFIG_DRM_NOUVEAU` is normally disabled by default.
It is worth checking that it is not enabled in your kernel configuration to
prevent any conflicts.
Build the Kata Containers kernel with the previous config options, using the
instructions described in [Building Kata Containers
kernel](../../tools/packaging/kernel). For further details on building and
installing guest kernels, see [the developer
guide](../Developer-Guide.md#install-guest-kernel-images).
There is an easy way to build a guest kernel that supports NVIDIA GPU:
```sh
## Build guest kernel with ../../tools/packaging/kernel
# Prepare (download guest kernel source, generate .config)
$ ./build-kernel.sh -v 5.15.23 -g nvidia -f setup
# Build guest kernel
$ ./build-kernel.sh -v 5.15.23 -g nvidia build
# Install guest kernel
$ sudo -E ./build-kernel.sh -v 5.15.23 -g nvidia install
```
To build NVIDIA Driver in Kata container, `linux-headers` are required.
This is a way to generate deb packages for `linux-headers`:
> **Note**:
> Run `make rpm-pkg` to build the rpm package.
> Run `make deb-pkg` to build the deb package.
>
```sh
$ cd kata-linux-5.15.23-89
$ make deb-pkg
```
Before using the new guest kernel, please update the `kernel` parameters in
`configuration.toml`.
```sh
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
```
## NVIDIA GPU pass-through mode with Kata Containers
Use the following steps to pass an NVIDIA GPU device in pass-through mode with Kata:
1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
```sh
$ sudo lspci -nn -D | grep -i nvidia
0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
```
> PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
> `10de:20b9` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```sh
$ BDF="0000:d0:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
```
The previous output shows that the GPU belongs to IOMMU group 192. The next
step is to bind the GPU to the VFIO-PCI driver.
```sh
$ BDF="0000:d0:00.0"
$ DEV="/sys/bus/pci/devices/$BDF"
$ echo "vfio-pci" > $DEV/driver_override
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
# To return the device to the standard driver, we simply clear the
# driver_override and reprobe the device, ex:
$ echo > $DEV/preferred_driver
$ echo $BDF > $DEV/driver/unbind
$ echo $BDF > /sys/bus/pci/drivers_probe
```
3. Check the IOMMU group number under `/dev/vfio`:
```sh
$ ls -l /dev/vfio
total 0
crw------- 1 zvonkok zvonkok 243, 0 Mar 18 03:06 192
crw-rw-rw- 1 root root 10, 196 Mar 18 02:27 vfio
```
4. Start a Kata container with the GPU device:
```sh
# You may need to `modprobe vhost-vsock` if you get
# host system doesn't support vsock: stat /dev/vhost-vsock
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch uname -r
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
```
6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
```
> **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
> GPU has been successfully allocated.
## NVIDIA vGPU mode with Kata Containers
NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
needs to be installed on the host to configure GPUs in vGPU mode. See [NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/) for more details.
### NVIDIA vGPU time-sliced
In the time-sliced mode, the GPU is not partitioned and the workload uses the
whole GPU and shares access to the GPU engines. Processes are scheduled in
series. The best effort scheduler is the default one and can be exchanged by
other scheduling policies see the documentation above how to do that.
Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
to use `time-sliced` `vGPU`.
```sh
$ sudo nvidia-smi -mig 0
```
Enable the virtual functions for the physical GPU in the `sysfs` file system.
```sh
$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
```
Get the `BDF` of the available virtual function on the GPU, and choose one for the
following steps.
```sh
$ cd /sys/bus/pci/devices/0000:41:00.0/
$ ls -l | grep virtfn
```
#### List all available vGPU instances
The following shell snippet will walk the `sysfs` and only print instances
that are available, that can be created.
```sh
# The 00.0 is often the PF of the device the VFs will have the funciont in the
# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
cd /sys/bus/pci/devices/0000:41:00.0/
for vf in $(ls -d virtfn*)
do
BDF=$(basename $(readlink -f $vf))
for md in $(ls -d $vf/mdev_supported_types/*)
do
AVAIL=$(cat $md/available_instances)
NAME=$(cat $md/name)
DIR=$(basename $md)
if [ $AVAIL -gt 0 ]; then
echo "| BDF | INSTANCES | NAME | DIR |"
echo "+--------------+-----------+----------------+------------+"
printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
fi
done
done
```
If there are available instances you get something like this (for the first VF),
beware that the output is highly dependent on the GPU you have, if there is no
output check again if `MIG` is really disabled.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-4C | nvidia-692 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-8C | nvidia-693 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-10C | nvidia-694 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-16C | nvidia-695 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-20C | nvidia-696 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-40C | nvidia-697 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 | GRID A100D-80C | nvidia-698 |
```
Change to the `mdev_supported_types` directory for the virtual function on which
you want to create the `vGPU`. Taking the first output as an example:
```sh
$ cd virtfn0/mdev_supported_types/nvidia-692
$ UUIDGEN=$(uuidgen)
$ sudo bash -c "echo $UUIDGEN > create"
```
Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
subdirectory of the `sysfs` space.
```sh
$ ls -l /sys/bus/mdev/devices/
```
Get the `IOMMU` group number and verify there is a `VFIO` device created to use
with Kata.
```sh
$ ls -l /sys/bus/mdev/devices/*/
$ ls -l /dev/vfio
```
Use the `VFIO` device created in the same way as in the pass-through use-case.
Beware that the guest needs the NVIDIA guest drivers, so one would need to build
a new guest `OS` image.
### NVIDIA vGPU MIG-backed
We're not going into detail what `MIG` is but briefly it is a technology to
partition the hardware into independent instances with guaranteed quality of
service. For more details see [NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
First enable `MIG` mode for a GPU, depending on the platform you're running
a reboot would be necessary. Some platforms support GPU reset.
```sh
$ sudo nvidia-smi -mig 1
```
If the platform supports a GPU reset one can run, otherwise you will get a
warning to reboot the server.
```sh
$ sudo nvidia-smi --gpu-reset
```
The driver per default provides a number of profiles that users can opt-in when
configuring the MIG feature.
```sh
$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.50 No 14 0 0 |
| 1 0 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.50 No 14 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.50 No 28 1 0 |
| 2 0 0 |
+-----------------------------------------------------------------------------+
...
```
Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
`vGPUs` that you will create [NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
```sh
# MIG 1g.10gb --> vGPU A100D-1-10C
$ sudo nvidia-smi mig -cgi 19
```
List the GPU instances and get the GPU instance id to create the compute
instance.
```sh
$ sudo nvidia-smi mig -lgi # list the created GPU instances
$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
# instances. Instance -> Workload
```
Verify that the compute instances were created within the GPU instance
```sh
$ nvidia-smi
... snip ...
+-----------------------------------------------------------------------------+
| MIG devices: |
+------------------+----------------------+-----------+-----------------------+
| GPU GI CI MIG | Memory-Usage | Vol| Shared |
| ID ID Dev | BAR1-Usage | SM Unc| CE ENC DEC OFA JPG|
| | | ECC| |
|==================+======================+===========+=======================|
| 0 9 0 0 | 0MiB / 9728MiB | 14 0 | 1 0 0 0 0 |
| | 0MiB / 4095MiB | | |
+------------------+----------------------+-----------+-----------------------+
... snip ...
```
We can use the [snippet](#list-all-available-vgpu-instances) from before to list
the available `vGPU` instances, this time `MIG-backed`.
```sh
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.4 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:00.5 | 1 |GRID A100D-1-10C | nvidia-699 |
| BDF | INSTANCES | NAME | DIR |
+--------------+-----------+----------------+------------+
| 0000:41:01.6 | 1 |GRID A100D-1-10C | nvidia-699 |
... snip ...
```
Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
to create the corresponding `mdev` device and use the guest `OS` created in the
previous section with `time-sliced` `vGPUs`.
## Install NVIDIA Driver + Toolkit in Kata Containers Guest OS
Consult the [Developer-Guide](https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#create-a-rootfs-image) on how to create a
rootfs base image for a distribution of your choice. This is going to be used as
a base for a NVIDIA enabled guest OS. Use the `EXTRA_PKGS` variable to install
all the needed packages to compile the drivers. Also copy the kernel development
packages from the previous `make deb-pkg` into `$ROOTFS_DIR`.
```sh
export EXTRA_PKGS="gcc make curl gnupg"
```
Having the `$ROOTFS_DIR` exported in the previous step we can now install all the
needed parts in the guest OS. In this case, we have an Ubuntu based rootfs.
First off all mount the special filesystems into the rootfs
```sh
$ sudo mount -t sysfs -o ro none ${ROOTFS_DIR}/sys
$ sudo mount -t proc -o ro none ${ROOTFS_DIR}/proc
$ sudo mount -t tmpfs none ${ROOTFS_DIR}/tmp
$ sudo mount -o bind,ro /dev ${ROOTFS_DIR}/dev
$ sudo mount -t devpts none ${ROOTFS_DIR}/dev/pts
```
Now we can enter `chroot`
```sh
$ sudo chroot ${ROOTFS_DIR}
```
Inside the rootfs one is going to install the drivers and toolkit to enable the
easy creation of GPU containers with Kata. We can also use this rootfs for any
other container not specifically only for GPUs.
As a prerequisite install the copied kernel development packages
```sh
$ sudo dpkg -i *.deb
```
Get the driver run file, since we need to build the driver against a kernel that
is not running on the host we need the ability to specify the exact version we
want the driver to build against. Take the kernel version one used for building
the NVIDIA kernel (`5.15.23-nvidia-gpu`).
```sh
$ wget https://us.download.nvidia.com/XFree86/Linux-x86_64/510.54/NVIDIA-Linux-x86_64-510.54.run
$ chmod +x NVIDIA-Linux-x86_64-510.54.run
# Extract the source files so we can run the installer with arguments
$ ./NVIDIA-Linux-x86_64-510.54.run -x
$ cd NVIDIA-Linux-x86_64-510.54
$ ./nvidia-installer -k 5.15.23-nvidia-gpu
```
Having the drivers installed we need to install the toolkit which will take care
of providing the right bits into the container.
```sh
$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
$ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
$ curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
$ apt update
$ apt install nvidia-container-toolkit
```
Create the hook execution file for Kata:
```
# Content of $ROOTFS_DIR/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh
#!/bin/bash -x
/usr/bin/nvidia-container-toolkit -debug $@
```
As the last step one can do some cleanup of files or package caches. Build the
rootfs and configure it for use with Kata according to the development guide.
Enable the `guest_hook_path` in Kata's `configuration.toml`
```sh
guest_hook_path = "/usr/share/oci/hooks"
```
One has built a NVIDIA rootfs, kernel and now we can run any GPU container
without installing the drivers into the container. Check NVIDIA device status
with `nvidia-smi`
```sh
$ sudo ctr --debug run --runtime "io.containerd.kata.v2" --device /dev/vfio/192 --rm -t "docker.io/nvidia/cuda:11.6.0-base-ubuntu20.04" cuda nvidia-smi
Fri Mar 18 10:36:59 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.54 Driver Version: 510.54 CUDA Version: 11.6 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA A30X Off | 00000000:02:00.0 Off | 0 |
| N/A 38C P0 67W / 230W | 0MiB / 24576MiB | 0% Default |
| | | Disabled |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
As the last step one can remove the additional packages and files that were added
to the `$ROOTFS_DIR` to keep it as small as possible.
## References
- [Configuring a VM for GPU Pass-Through by Using the QEMU Command Line](https://docs.nvidia.com/grid/latest/grid-vgpu-user-guide/index.html#using-gpu-pass-through-red-hat-el-qemu-cli)
- https://gitlab.com/nvidia/container-images/driver/-/tree/master
- https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers

View File

@@ -1,293 +0,0 @@
# Using Nvidia GPU device with Kata Containers
An Nvidia GPU device can be passed to a Kata Containers container using GPU passthrough
(Nvidia GPU pass-through mode) as well as GPU mediated passthrough (Nvidia vGPU mode). 
Nvidia GPU pass-through mode, an entire physical GPU is directly assigned to one VM,
bypassing the Nvidia Virtual GPU Manager. In this mode of operation, the GPU is accessed
exclusively by the Nvidia driver running in the VM to which it is assigned.
The GPU is not shared among VMs.
Nvidia Virtual GPU (vGPU) enables multiple virtual machines (VMs) to have simultaneous,
direct access to a single physical GPU, using the same Nvidia graphics drivers that are
deployed on non-virtualized operating systems. By doing this, Nvidia vGPU provides VMs
with unparalleled graphics performance, compute performance, and application compatibility,
together with the cost-effectiveness and scalability brought about by sharing a GPU
among multiple workloads.
| Technology | Description | Behaviour | Detail |
| --- | --- | --- | --- |
| Nvidia GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| Nvidia vGPU mode | GPU sharing | Physical GPU shared by multiple VMs | Mediated passthrough |
## Hardware Requirements
Nvidia GPUs Recommended for Virtualization:
- Nvidia Tesla (T4, M10, P6, V100 or newer)
- Nvidia Quadro RTX 6000/8000
## Host BIOS Requirements
Some hardware requires a larger PCI BARs window, for example, Nvidia Tesla P100, K40m
```
$ lspci -s 04:00.0 -vv | grep Region
Region 0: Memory at c6000000 (32-bit, non-prefetchable) [size=16M]
Region 1: Memory at 383800000000 (64-bit, prefetchable) [size=16G] #above 4G
Region 3: Memory at 383c00000000 (64-bit, prefetchable) [size=32M]
```
For large BARs devices, MMIO mapping above 4G address space should be `enabled`
in the PCI configuration of the BIOS.
Some hardware vendors use different name in BIOS, such as:
- Above 4G Decoding
- Memory Hole for PCI MMIO
- Memory Mapped I/O above 4GB
The following steps outline the workflow for using an Nvidia GPU with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
- `CONFIG_VFIO`
- `CONFIG_VFIO_IOMMU_TYPE1`
- `CONFIG_VFIO_MDEV`
- `CONFIG_VFIO_MDEV_DEVICE`
- `CONFIG_VFIO_PCI`
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command line.
## Install and configure Kata Containers
To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata version 1.3.0 or above.
Follow the [Kata Containers setup instructions](../install/README.md)
to install the latest version of Kata.
To use large BARs devices (for example, Nvidia Tesla P100), you need Kata version 1.11.0 or above.
The following configuration in the Kata `configuration.toml` file as shown below can work:
Hotplug for PCI devices by `acpi_pcihp` (Linux's ACPI PCI Hotplug driver):
```
machine_type = "q35"
hotplug_vfio_on_root_bus = false
```
Hotplug for PCIe devices by `pciehp` (Linux's PCIe Hotplug driver):
```
machine_type = "q35"
hotplug_vfio_on_root_bus = true
pcie_root_port = 1
```
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU support.
To use an Nvidia GPU with Kata Containers, you need to build a kernel with the
necessary GPU support.
The following kernel config options need to be enabled:
```
# Support PCI/PCIe device hotplug (Required for large BARs device)
CONFIG_HOTPLUG_PCI_PCIE=y
# Support for loading modules (Required for load Nvidia drivers)
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# Enable the MMIO access method for PCIe devices (Required for large BARs device)
CONFIG_PCI_MMCONFIG=y
```
The following kernel config options need to be disabled:
```
# Disable Open Source Nvidia driver nouveau
# It conflicts with Nvidia official driver
CONFIG_DRM_NOUVEAU=n
```
> **Note**: `CONFIG_DRM_NOUVEAU` is normally disabled by default.
It is worth checking that it is not enabled in your kernel configuration to prevent any conflicts.
Build the Kata Containers kernel with the previous config options,
using the instructions described in [Building Kata Containers kernel](../../tools/packaging/kernel).
For further details on building and installing guest kernels,
see [the developer guide](../Developer-Guide.md#install-guest-kernel-images).
There is an easy way to build a guest kernel that supports Nvidia GPU:
```
## Build guest kernel with ../../tools/packaging/kernel
# Prepare (download guest kernel source, generate .config)
$ ./build-kernel.sh -v 4.19.86 -g nvidia -f setup
# Build guest kernel
$ ./build-kernel.sh -v 4.19.86 -g nvidia build
# Install guest kernel
$ sudo -E ./build-kernel.sh -v 4.19.86 -g nvidia install
/usr/share/kata-containers/vmlinux-nvidia-gpu.container -> vmlinux-4.19.86-70-nvidia-gpu
/usr/share/kata-containers/vmlinuz-nvidia-gpu.container -> vmlinuz-4.19.86-70-nvidia-gpu
```
To build Nvidia Driver in Kata container, `kernel-devel` is required.
This is a way to generate rpm packages for `kernel-devel`:
```
$ cd kata-linux-4.19.86-68
$ make rpm-pkg
Output RPMs:
~/rpmbuild/RPMS/x86_64/kernel-devel-4.19.86_nvidia_gpu-1.x86_64.rpm
```
> **Note**:
> - `kernel-devel` should be installed in Kata container before run Nvidia driver installer.
> - Run `make deb-pkg` to build the deb package.
Before using the new guest kernel, please update the `kernel` parameters in `configuration.toml`.
```
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
```
## Nvidia GPU pass-through mode with Kata Containers
Use the following steps to pass an Nvidia GPU device in pass-through mode with Kata:
1. Find the Bus-Device-Function (BDF) for GPU device on host:
```
$ sudo lspci -nn -D | grep -i nvidia
0000:04:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
0000:84:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
```
> PCI address `0000:04:00.0` is assigned to the hardware GPU device.
> `10de:15f8` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```
$ BDF="0000:04:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
/sys/kernel/iommu_groups/45
```
The previous output shows that the GPU belongs to IOMMU group 45.
3. Check the IOMMU group number under `/dev/vfio`:
```
$ ls -l /dev/vfio
total 0
crw------- 1 root root 248, 0 Feb 28 09:57 45
crw------- 1 root root 248, 1 Feb 28 09:57 54
crw-rw-rw- 1 root root 10, 196 Feb 28 09:57 vfio
```
4. Start a Kata container with GPU device:
```
$ sudo docker run -it --runtime=kata-runtime --cap-add=ALL --device /dev/vfio/45 centos /bin/bash
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:15f8`) in the `lspci` output.
```
$ lspci -nn -D | grep '10de:15f8'
0000:01:01.0 3D controller [0302]: NVIDIA Corporation GP100GL [Tesla P100 PCIe 16GB] [10de:15f8] (rev a1)
```
6. Additionally, you can check the PCI BARs space of the Nvidia GPU device in the container:
```
$ lspci -s 01:01.0 -vv | grep Region
Region 0: Memory at c0000000 (32-bit, non-prefetchable) [disabled] [size=16M]
Region 1: Memory at 4400000000 (64-bit, prefetchable) [disabled] [size=16G]
Region 3: Memory at 4800000000 (64-bit, prefetchable) [disabled] [size=32M]
```
> **Note**: If you see a message similar to the above, the BAR space of the Nvidia
> GPU has been successfully allocated.
## Nvidia vGPU mode with Kata Containers
Nvidia vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM.
> **Note**: There is no suitable test environment, so it is not written here.
## Install Nvidia Driver in Kata Containers
Download the official Nvidia driver from
[https://www.nvidia.com/Download/index.aspx](https://www.nvidia.com/Download/index.aspx),
for example `NVIDIA-Linux-x86_64-418.87.01.run`.
Install the `kernel-devel`(generated in the previous steps) for guest kernel:
```
$ sudo rpm -ivh kernel-devel-4.19.86_gpu-1.x86_64.rpm
```
Here is an example to extract, compile and install Nvidia driver:
```
## Extract
$ sh ./NVIDIA-Linux-x86_64-418.87.01.run -x
## Compile and install (It will take some time)
$ cd NVIDIA-Linux-x86_64-418.87.01
$ sudo ./nvidia-installer -a -q --ui=none \
--no-cc-version-check \
--no-opengl-files --no-install-libglvnd \
--kernel-source-path=/usr/src/kernels/`uname -r`
```
Or just run one command line:
```
$ sudo sh ./NVIDIA-Linux-x86_64-418.87.01.run -a -q --ui=none \
--no-cc-version-check \
--no-opengl-files --no-install-libglvnd \
--kernel-source-path=/usr/src/kernels/`uname -r`
```
To view detailed logs of the installer:
```
$ tail -f /var/log/nvidia-installer.log
```
Load Nvidia driver module manually
```
# Optionalgenerate modules.dep and map files for Nvidia driver
$ sudo depmod
# Load module
$ sudo modprobe nvidia-drm
# Check module
$ lsmod | grep nvidia
nvidia_drm 45056 0
nvidia_modeset 1093632 1 nvidia_drm
nvidia 18202624 1 nvidia_modeset
drm_kms_helper 159744 1 nvidia_drm
drm 364544 3 nvidia_drm,drm_kms_helper
i2c_core 65536 3 nvidia,drm_kms_helper,drm
ipmi_msghandler 49152 1 nvidia
```
Check Nvidia device status with `nvidia-smi`
```
$ nvidia-smi
Tue Mar 3 00:03:49 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.01 Driver Version: 418.87.01 CUDA Version: 10.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE... Off | 00000000:01:01.0 Off | 0 |
| N/A 27C P0 25W / 250W | 0MiB / 16280MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
## References
- [Configuring a VM for GPU Pass-Through by Using the QEMU Command Line](https://docs.nvidia.com/grid/latest/grid-vgpu-user-guide/index.html#using-gpu-pass-through-red-hat-el-qemu-cli)
- https://gitlab.com/nvidia/container-images/driver/-/tree/master
- https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers

View File

@@ -312,7 +312,7 @@ working properly with the Kata Containers VM.
### Build OpenSSL Intel® QAT engine container
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine)
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/demo/openssl-qat-engine)
to build a container image with an optimized OpenSSL engine for
Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
have issues. Therefore, make sure that `runc` is the default Docker container
@@ -444,7 +444,7 @@ $ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
```
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/master/cmd/qat_plugin/README.md)
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/qat_plugin/README.md)
needs to be started so that the virtual functions can be discovered and
used by Kubernetes.

View File

@@ -22,21 +22,35 @@ $ sudo snap install kata-containers --classic
## Build and install snap image
Run next command at the root directory of the packaging repository.
Run the command below which will use the packaging Makefile to build the snap image:
```sh
$ make snap
$ make -C tools/packaging snap
```
> **Warning:**
>
> By default, `snapcraft` will create a clean virtual machine
> environment to build the snap in using the `multipass` tool.
>
> However, `multipass` is silently disabled when `--destructive-mode` is
> used.
>
> Since building the Kata Containers package currently requires
> `--destructive-mode`, the snap will be built using the host
> environment. To avoid parts of the build auto-detecting additional
> features to enable (for example for QEMU), we recommend that you
> only run the snap build in a minimal host environment.
To install the resulting snap image, snap must be put in [classic mode][3] and the
security confinement must be disabled (*--classic*). Also since the resulting snap
has not been signed the verification of signature must be omitted (*--dangerous*).
security confinement must be disabled (`--classic`). Also since the resulting snap
has not been signed the verification of signature must be omitted (`--dangerous`).
```sh
$ sudo snap install --classic --dangerous kata-containers_[VERSION]_[ARCH].snap
$ sudo snap install --classic --dangerous "kata-containers_${version}_${arch}.snap"
```
Replace `VERSION` with the current version of Kata Containers and `ARCH` with
Replace `${version}` with the current version of Kata Containers and `${arch}` with
the system architecture.
## Configure Kata Containers
@@ -76,12 +90,12 @@ then a new configuration file can be [created](#configure-kata-containers)
and [configured][7].
[1]: https://docs.snapcraft.io/snaps/intro
[2]: ../docs/design/architecture/README.md#root-filesystem-image
[2]: ../../docs/design/architecture/README.md#root-filesystem-image
[3]: https://docs.snapcraft.io/reference/confinement#classic
[4]: https://github.com/kata-containers/runtime#configuration
[4]: https://github.com/kata-containers/kata-containers/tree/main/src/runtime#configuration
[5]: https://docs.docker.com/engine/reference/commandline/dockerd
[6]: ../docs/install/docker/ubuntu-docker-install.md
[7]: ../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
[6]: ../../docs/install/docker/ubuntu-docker-install.md
[7]: ../../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
[8]: https://snapcraft.io/kata-containers
[9]: ../docs/Developer-Guide.md#run-kata-containers-with-docker
[10]: ../docs/Developer-Guide.md#run-kata-containers-with-kubernetes
[9]: ../../docs/Developer-Guide.md#run-kata-containers-with-docker
[10]: ../../docs/Developer-Guide.md#run-kata-containers-with-kubernetes

114
snap/local/snap-common.sh Normal file
View File

@@ -0,0 +1,114 @@
#!/usr/bin/env bash
#
# Copyright (c) 2022 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Description: Idempotent script to be sourced by all parts in a
# snapcraft config file.
set -o errexit
set -o nounset
set -o pipefail
# XXX: Bash-specific code. zsh doesn't support this option and that *does*
# matter if this script is run sourced... since it'll be using zsh! ;)
[ -n "$BASH_VERSION" ] && set -o errtrace
[ -n "${DEBUG:-}" ] && set -o xtrace
die()
{
echo >&2 "ERROR: $0: $*"
}
[ -n "${SNAPCRAFT_STAGE:-}" ] ||\
die "must be sourced from a snapcraft config file"
snap_yq_version=3.4.1
snap_common_install_yq()
{
export yq="${SNAPCRAFT_STAGE}/bin/yq"
local yq_pkg
yq_pkg="github.com/mikefarah/yq"
local yq_url
yq_url="https://${yq_pkg}/releases/download/${snap_yq_version}/yq_${goos}_${goarch}"
curl -o "${yq}" -L "${yq_url}"
chmod +x "${yq}"
}
# Function that should be called for each snap "part" in
# snapcraft.yaml.
snap_common_main()
{
# Architecture
arch="$(uname -m)"
case "${arch}" in
aarch64)
goarch="arm64"
qemu_arch="${arch}"
;;
ppc64le)
goarch="ppc64le"
qemu_arch="ppc64"
;;
s390x)
goarch="${arch}"
qemu_arch="${arch}"
;;
x86_64)
goarch="amd64"
qemu_arch="${arch}"
;;
*) die "unsupported architecture: ${arch}" ;;
esac
dpkg_arch=$(dpkg --print-architecture)
# golang
#
# We need the O/S name in golang format, but since we don't
# know if the godeps part has run, we don't know if golang is
# available yet, hence fall back to a standard system command.
goos="$(go env GOOS &>/dev/null || true)"
[ -z "$goos" ] && goos=$(uname -s|tr '[A-Z]' '[a-z]')
export GOROOT="${SNAPCRAFT_STAGE}"
export GOPATH="${GOROOT}/gopath"
export GO111MODULE="auto"
mkdir -p "${GOPATH}/bin"
export PATH="${GOPATH}/bin:${PATH}"
# Proxy
export http_proxy="${http_proxy:-}"
export https_proxy="${https_proxy:-}"
# Binaries
mkdir -p "${SNAPCRAFT_STAGE}/bin"
export PATH="$PATH:${SNAPCRAFT_STAGE}/bin"
# YAML query tool
export yq="${SNAPCRAFT_STAGE}/bin/yq"
# Kata paths
export kata_dir=$(printf "%s/src/github.com/%s/%s" \
"${GOPATH}" \
"${SNAPCRAFT_PROJECT_NAME}" \
"${SNAPCRAFT_PROJECT_NAME}")
export versions_file="${kata_dir}/versions.yaml"
[ -n "${yq:-}" ] && [ -x "${yq:-}" ] || snap_common_install_yq
}
snap_common_main

View File

@@ -1,4 +1,5 @@
name: kata-containers
website: https://github.com/kata-containers/kata-containers
summary: Build lightweight VMs that seamlessly plug into the containers ecosystem
description: |
Kata Containers is an open source project and community working to build a
@@ -18,20 +19,18 @@ parts:
- git
- git-extras
override-pull: |
version="9999"
kata_url="https://github.com/kata-containers/kata-containers"
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
if echo "${GITHUB_REF}" | grep -q -E "^refs/tags"; then
version=$(echo ${GITHUB_REF} | cut -d/ -f3)
version="9999"
if echo "${GITHUB_REF:-}" | grep -q -E "^refs/tags"; then
version=$(echo ${GITHUB_REF:-} | cut -d/ -f3)
git checkout ${version}
fi
snapcraftctl set-grade "stable"
snapcraftctl set-version "${version}"
# setup GOPATH - this repo dir should be there
export GOPATH=${SNAPCRAFT_STAGE}/gopath
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
mkdir -p $(dirname ${kata_dir})
ln -sf $(realpath "${SNAPCRAFT_STAGE}/..") ${kata_dir}
@@ -43,27 +42,12 @@ parts:
build-packages:
- curl
override-build: |
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
# put everything in stage
cd ${SNAPCRAFT_STAGE}
cd "${SNAPCRAFT_STAGE}"
yq_path="./yq"
yq_pkg="github.com/mikefarah/yq"
goos="linux"
case "$(uname -m)" in
aarch64) goarch="arm64";;
ppc64le) goarch="ppc64le";;
x86_64) goarch="amd64";;
s390x) goarch="s390x";;
*) echo "unsupported architecture: $(uname -m)"; exit 1;;
esac
yq_version=3.4.1
yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}"
curl -o "${yq_path}" -L "${yq_url}"
chmod +x "${yq_path}"
kata_dir=gopath/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
version="$(${yq_path} r ${kata_dir}/versions.yaml languages.golang.meta.newest-version)"
version="$(${yq} r ${kata_dir}/versions.yaml languages.golang.meta.newest-version)"
tarfile="go${version}.${goos}-${goarch}.tar.gz"
curl -LO https://golang.org/dl/${tarfile}
tar -xf ${tarfile} --strip-components=1
@@ -80,28 +64,17 @@ parts:
- uidmap
- gnupg2
override-build: |
[ "$(uname -m)" = "ppc64le" ] || [ "$(uname -m)" = "s390x" ] && sudo apt-get --no-install-recommends install -y protobuf-compiler
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
yq=${SNAPCRAFT_STAGE}/yq
[ "${arch}" = "ppc64le" ] || [ "${arch}" = "s390x" ] && sudo apt-get --no-install-recommends install -y protobuf-compiler
# set GOPATH
export GOPATH=${SNAPCRAFT_STAGE}/gopath
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
export GOROOT=${SNAPCRAFT_STAGE}
export PATH="${GOROOT}/bin:${PATH}"
export GO111MODULE="auto"
http_proxy=${http_proxy:-""}
https_proxy=${https_proxy:-""}
if [ -n "$http_proxy" ]; then
echo "Setting proxy $http_proxy"
sudo -E systemctl set-environment http_proxy=$http_proxy || true
sudo -E systemctl set-environment https_proxy=$https_proxy || true
sudo -E systemctl set-environment http_proxy="$http_proxy" || true
sudo -E systemctl set-environment https_proxy="$https_proxy" || true
fi
# Copy yq binary. It's used in the container
mkdir -p "${GOPATH}/bin/"
cp -a "${yq}" "${GOPATH}/bin/"
echo "Unmasking docker service"
@@ -112,63 +85,54 @@ parts:
echo "Starting docker"
sudo -E systemctl start docker || true
cd ${kata_dir}/tools/osbuilder
cd "${kata_dir}/tools/osbuilder"
# build image
export AGENT_INIT=yes
export USE_DOCKER=1
export DEBUG=1
arch="$(uname -m)"
initrd_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.initrd.architecture.${arch}.name)
image_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.image.architecture.${arch}.name)
case "$arch" in
x86_64)
# In some build systems it's impossible to build a rootfs image, try with the initrd image
sudo -E PATH=$PATH make image DISTRO=${image_distro} || sudo -E PATH=$PATH make initrd DISTRO=${initrd_distro}
sudo -E PATH=$PATH make image DISTRO="${image_distro}" || sudo -E PATH="$PATH" make initrd DISTRO="${initrd_distro}"
;;
aarch64|ppc64le|s390x)
sudo -E PATH=$PATH make initrd DISTRO=${initrd_distro}
sudo -E PATH="$PATH" make initrd DISTRO="${initrd_distro}"
;;
*) echo "unsupported architecture: $(uname -m)"; exit 1;;
*) die "unsupported architecture: ${arch}" ;;
esac
# Install image
kata_image_dir=${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers
mkdir -p ${kata_image_dir}
cp kata-containers*.img ${kata_image_dir}
kata_image_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers"
mkdir -p "${kata_image_dir}"
cp kata-containers*.img "${kata_image_dir}"
runtime:
after: [godeps, image, cloud-hypervisor]
plugin: nil
build-attributes: [no-patchelf]
override-build: |
# set GOPATH
export GOPATH=${SNAPCRAFT_STAGE}/gopath
export GOROOT=${SNAPCRAFT_STAGE}
export PATH="${GOROOT}/bin:${PATH}"
export GO111MODULE="auto"
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
cd ${kata_dir}/src/runtime
cd "${kata_dir}/src/runtime"
# setup arch
arch=$(uname -m)
if [ ${arch} = "ppc64le" ]; then
arch="ppc64"
fi
qemu_cmd="qemu-system-${qemu_arch}"
# build and install runtime
make \
PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr \
PREFIX="/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr" \
SKIP_GO_VERSION_CHECK=1 \
QEMUCMD=qemu-system-$arch
QEMUCMD="${qemu_cmd}"
make install \
PREFIX=/usr \
DESTDIR=${SNAPCRAFT_PART_INSTALL} \
DESTDIR="${SNAPCRAFT_PART_INSTALL}" \
SKIP_GO_VERSION_CHECK=1 \
QEMUCMD=qemu-system-$arch
QEMUCMD="${qemu_cmd}"
if [ ! -f ${SNAPCRAFT_PART_INSTALL}/../../image/install/usr/share/kata-containers/kata-containers.img ]; then
sed -i -e "s|^image =.*|initrd = \"/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share/kata-containers/kata-containers-initrd.img\"|" \
@@ -185,44 +149,37 @@ parts:
- bison
- flex
override-build: |
yq=${SNAPCRAFT_STAGE}/yq
export PATH="${PATH}:${SNAPCRAFT_STAGE}"
export GOPATH=${SNAPCRAFT_STAGE}/gopath
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
versions_file="${kata_dir}/versions.yaml"
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
kernel_version="$(${yq} r $versions_file assets.kernel.version)"
#Remove extra 'v'
kernel_version=${kernel_version#v}
kernel_version="${kernel_version#v}"
[ "$(uname -m)" = "s390x" ] && sudo apt-get --no-install-recommends install -y libssl-dev
[ "${arch}" = "s390x" ] && sudo apt-get --no-install-recommends install -y libssl-dev
export GOPATH=${SNAPCRAFT_STAGE}/gopath
export GO111MODULE="auto"
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
cd ${kata_dir}/tools/packaging/kernel
cd "${kata_dir}/tools/packaging/kernel"
kernel_dir_prefix="kata-linux-"
# Setup and build kernel
./build-kernel.sh -v ${kernel_version} -d setup
./build-kernel.sh -v "${kernel_version}" -d setup
cd ${kernel_dir_prefix}*
make -j $(($(nproc)-1)) EXTRAVERSION=".container"
kernel_suffix=${kernel_version}.container
kata_kernel_dir=${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers
mkdir -p ${kata_kernel_dir}
kernel_suffix="${kernel_version}.container"
kata_kernel_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers"
mkdir -p "${kata_kernel_dir}"
# Install bz kernel
make install INSTALL_PATH=${kata_kernel_dir} EXTRAVERSION=".container" || true
vmlinuz_name=vmlinuz-${kernel_suffix}
ln -sf ${vmlinuz_name} ${kata_kernel_dir}/vmlinuz.container
make install INSTALL_PATH="${kata_kernel_dir}" EXTRAVERSION=".container" || true
vmlinuz_name="vmlinuz-${kernel_suffix}"
ln -sf "${vmlinuz_name}" "${kata_kernel_dir}/vmlinuz.container"
# Install raw kernel
vmlinux_path=vmlinux
[ "$(uname -m)" = "s390x" ] && vmlinux_path=arch/s390/boot/compressed/vmlinux
vmlinux_name=vmlinux-${kernel_suffix}
cp ${vmlinux_path} ${kata_kernel_dir}/${vmlinux_name}
ln -sf ${vmlinux_name} ${kata_kernel_dir}/vmlinux.container
vmlinux_path="vmlinux"
[ "${arch}" = "s390x" ] && vmlinux_path="arch/s390/boot/compressed/vmlinux"
vmlinux_name="vmlinux-${kernel_suffix}"
cp "${vmlinux_path}" "${kata_kernel_dir}/${vmlinux_name}"
ln -sf "${vmlinux_name}" "${kata_kernel_dir}/vmlinux.container"
qemu:
plugin: make
@@ -249,12 +206,8 @@ parts:
- libselinux1-dev
- ninja-build
override-build: |
yq=${SNAPCRAFT_STAGE}/yq
export GOPATH=${SNAPCRAFT_STAGE}/gopath
export GO111MODULE="auto"
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
versions_file="${kata_dir}/versions.yaml"
branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)"
url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)"
commit=""
@@ -262,11 +215,11 @@ parts:
patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}"
# download source
qemu_dir=${SNAPCRAFT_STAGE}/qemu
qemu_dir="${SNAPCRAFT_STAGE}/qemu"
rm -rf "${qemu_dir}"
git clone --depth 1 --branch ${branch} --single-branch ${url} "${qemu_dir}"
cd ${qemu_dir}
[ -z "${commit}" ] || git checkout ${commit}
cd "${qemu_dir}"
[ -z "${commit}" ] || git checkout "${commit}"
[ -n "$(ls -A ui/keycodemapdb)" ] || git clone --depth 1 https://github.com/qemu/keycodemapdb ui/keycodemapdb/
[ -n "$(ls -A capstone)" ] || git clone --depth 1 https://github.com/qemu/capstone capstone
@@ -277,10 +230,10 @@ parts:
${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"
# Only x86_64 supports libpmem
[ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev
[ "${arch}" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev
configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
chmod +x ${configure_hypervisor}
configure_hypervisor="${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh"
chmod +x "${configure_hypervisor}"
# static build. The --prefix, --libdir, --libexecdir, --datadir arguments are
# based on PREFIX and set by configure-hypervisor.sh
echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \
@@ -290,17 +243,17 @@ parts:
# Copy QEMU configurations (Kconfigs)
case "${branch}" in
"v5.1.0")
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs
cp -a "${kata_dir}"/tools/packaging/qemu/default-configs/* default-configs
;;
*)
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* configs/devices/
cp -a "${kata_dir}"/tools/packaging/qemu/default-configs/* configs/devices/
;;
esac
# build and install
make -j $(($(nproc)-1))
make install DESTDIR=${SNAPCRAFT_PART_INSTALL}
make install DESTDIR="${SNAPCRAFT_PART_INSTALL}"
prime:
- -snap/
- -usr/bin/qemu-ga
@@ -316,26 +269,66 @@ parts:
# Hack: move qemu to /
"snap/kata-containers/current/": "./"
virtiofsd:
plugin: nil
after: [godeps]
override-build: |
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
# Currently, only one platform uses the new rust virtiofsd. The
# others make use of QEMU's C implementation.
#
# See "tools/packaging/scripts/configure-hypervisor.sh".
if [ "${arch}" = 'x86_64' ]
then
echo "INFO: Building rust version of virtiofsd"
else
echo "INFO: Building QEMU's C version of virtiofsd"
# Handled by the 'qemu' part, so nothing more to do here.
exit 0
fi
cd "${kata_dir}"
# Download the rust implementation of virtiofsd
tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh
sudo install \
--owner='root' \
--group='root' \
--mode=0755 \
-D \
--target-directory="${SNAPCRAFT_PART_INSTALL}/usr/libexec/" \
virtiofsd/virtiofsd
cloud-hypervisor:
plugin: nil
after: [godeps]
override-build: |
arch=$(uname -m)
if [ "{$arch}" == "aarch64" ] || [ "${arch}" == "x64_64" ]; then
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
if [ "${arch}" == "aarch64" ] || [ "${arch}" == "x86_64" ]; then
sudo apt-get -y update
sudo apt-get -y install ca-certificates curl gnupg lsb-release
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
curl -fsSL https://download.docker.com/linux/ubuntu/gpg |\
sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
distro_codename=$(lsb_release -cs)
echo "deb [arch=${dpkg_arch} signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu ${distro_codename} stable" |\
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get -y update
sudo apt-get -y install docker-ce docker-ce-cli containerd.io
sudo systemctl start docker.socket
export GOPATH=${SNAPCRAFT_STAGE}/gopath
kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
cd ${kata_dir}
cd "${SNAPCRAFT_PROJECT_DIR}"
sudo -E NO_TTY=true make cloud-hypervisor-tarball
tar xvJpf build/kata-static-cloud-hypervisor.tar.xz -C /tmp/
install -D /tmp/opt/kata/bin/cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-cloud-hypervisor.tar.xz"
tmpdir=$(mktemp -d)
tar -xvJpf "${tarfile}" -C "${tmpdir}"
install -D "${tmpdir}/opt/kata/bin/cloud-hypervisor" "${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor"
rm -rf "${tmpdir}"
fi
apps:

1
src/agent/Cargo.lock generated
View File

@@ -1370,6 +1370,7 @@ dependencies = [
"async-trait",
"capctl",
"caps",
"cfg-if 0.1.10",
"cgroups-rs",
"futures",
"inotify",

View File

@@ -76,3 +76,8 @@ lto = true
[features]
seccomp = ["rustjail/seccomp"]
standard-oci-runtime = ["rustjail/standard-oci-runtime"]
[[bin]]
name = "kata-agent"
path = "src/main.rs"

View File

@@ -14,10 +14,6 @@ PROJECT_COMPONENT = kata-agent
TARGET = $(PROJECT_COMPONENT)
SOURCES := \
$(shell find . 2>&1 | grep -E '.*\.rs$$') \
Cargo.toml
VERSION_FILE := ./VERSION
VERSION := $(shell grep -v ^\# $(VERSION_FILE))
COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true)
@@ -37,8 +33,16 @@ ifeq ($(SECCOMP),yes)
override EXTRA_RUSTFEATURES += seccomp
endif
##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature
STANDARD_OCI_RUNTIME := no
# Enable standard oci runtime feature of rust build
ifeq ($(STANDARD_OCI_RUNTIME),yes)
override EXTRA_RUSTFEATURES += standard-oci-runtime
endif
ifneq ($(EXTRA_RUSTFEATURES),)
override EXTRA_RUSTFEATURES := --features $(EXTRA_RUSTFEATURES)
override EXTRA_RUSTFEATURES := --features "$(EXTRA_RUSTFEATURES)"
endif
include ../../utils.mk
@@ -108,14 +112,14 @@ $(TARGET): $(GENERATED_CODE) logging-crate-tests $(TARGET_PATH)
logging-crate-tests:
make -C $(CWD)/../libs/logging
$(TARGET_PATH): $(SOURCES) | show-summary
$(TARGET_PATH): show-summary
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
$(GENERATED_FILES): %: %.in
@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
##TARGET optimize: optimized build
optimize: $(SOURCES) | show-summary show-header
optimize: show-summary show-header
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
##TARGET install: install agent

View File

@@ -25,6 +25,7 @@ path-absolutize = "1.2.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.2.8" }
rlimit = "0.5.3"
cfg-if = "0.1.0"
tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
futures = "0.3.17"
@@ -38,3 +39,4 @@ tempfile = "3.1.0"
[features]
seccomp = ["libseccomp"]
standard-oci-runtime = []

View File

@@ -391,7 +391,7 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
if let Some(swappiness) = memory.swappiness {
if (0..=100).contains(&swappiness) {
mem_controller.set_swappiness(swappiness as u64)?;
mem_controller.set_swappiness(swappiness)?;
} else {
return Err(anyhow!(
"invalid value:{}. valid memory swappiness range is 0-100",
@@ -590,9 +590,9 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField<CpuUsage> {
let h = lines_to_map(&cpuacct.stat);
let usage_in_usermode =
(((*h.get("user").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
(((*h.get("user").unwrap_or(&0) * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
let usage_in_kernelmode =
(((*h.get("system").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
(((*h.get("system").unwrap_or(&0) * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
let total_usage = cpuacct.usage;
@@ -623,9 +623,9 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField<CpuUsage> {
let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg);
let stat = cpu_controller.cpu().stat;
let h = lines_to_map(&stat);
let usage_in_usermode = *h.get("user_usec").unwrap();
let usage_in_kernelmode = *h.get("system_usec").unwrap();
let total_usage = *h.get("usage_usec").unwrap();
let usage_in_usermode = *h.get("user_usec").unwrap_or(&0);
let usage_in_kernelmode = *h.get("system_usec").unwrap_or(&0);
let total_usage = *h.get("usage_usec").unwrap_or(&0);
let percpu_usage = vec![];
SingularPtrField::some(CpuUsage {

View File

@@ -0,0 +1,79 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright 2021 Sony Group Corporation
//
use anyhow::{anyhow, Result};
use nix::errno::Errno;
use nix::pty;
use nix::sys::{socket, uio};
use nix::unistd::{self, dup2};
use std::os::unix::io::{AsRawFd, RawFd};
use std::path::Path;
pub fn setup_console_socket(csocket_path: &str) -> Result<Option<RawFd>> {
if csocket_path.is_empty() {
return Ok(None);
}
let socket_fd = socket::socket(
socket::AddressFamily::Unix,
socket::SockType::Stream,
socket::SockFlag::empty(),
None,
)?;
match socket::connect(
socket_fd,
&socket::SockAddr::Unix(socket::UnixAddr::new(Path::new(csocket_path))?),
) {
Ok(()) => Ok(Some(socket_fd)),
Err(errno) => Err(anyhow!("failed to open console fd: {}", errno)),
}
}
pub fn setup_master_console(socket_fd: RawFd) -> Result<()> {
let pseudo = pty::openpty(None, None)?;
let pty_name: &[u8] = b"/dev/ptmx";
let iov = [uio::IoVec::from_slice(pty_name)];
let fds = [pseudo.master];
let cmsg = socket::ControlMessage::ScmRights(&fds);
socket::sendmsg(socket_fd, &iov, &[cmsg], socket::MsgFlags::empty(), None)?;
unistd::setsid()?;
let ret = unsafe { libc::ioctl(pseudo.slave, libc::TIOCSCTTY) };
Errno::result(ret).map_err(|e| anyhow!(e).context("ioctl TIOCSCTTY"))?;
dup2(pseudo.slave, std::io::stdin().as_raw_fd())?;
dup2(pseudo.slave, std::io::stdout().as_raw_fd())?;
dup2(pseudo.slave, std::io::stderr().as_raw_fd())?;
unistd::close(socket_fd)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::os::unix::net::UnixListener;
use tempfile::{self, tempdir};
const CONSOLE_SOCKET: &str = "console-socket";
#[test]
fn test_setup_console_socket() {
let dir = tempdir()
.map_err(|e| anyhow!(e).context("tempdir failed"))
.unwrap();
let socket_path = dir.path().join(CONSOLE_SOCKET);
let _listener = UnixListener::bind(&socket_path).unwrap();
let ret = setup_console_socket(socket_path.to_str().unwrap());
assert!(ret.is_ok());
}
}

View File

@@ -23,6 +23,8 @@ use crate::cgroups::fs::Manager as FsManager;
#[cfg(test)]
use crate::cgroups::mock::Manager as FsManager;
use crate::cgroups::Manager;
#[cfg(feature = "standard-oci-runtime")]
use crate::console;
use crate::log_child;
use crate::process::Process;
#[cfg(feature = "seccomp")]
@@ -40,7 +42,7 @@ use nix::pty;
use nix::sched::{self, CloneFlags};
use nix::sys::signal::{self, Signal};
use nix::sys::stat::{self, Mode};
use nix::unistd::{self, fork, ForkResult, Gid, Pid, Uid};
use nix::unistd::{self, fork, ForkResult, Gid, Pid, Uid, User};
use std::os::unix::fs::MetadataExt;
use std::os::unix::io::AsRawFd;
@@ -62,9 +64,7 @@ use rlimit::{setrlimit, Resource, Rlim};
use tokio::io::AsyncBufReadExt;
use tokio::sync::Mutex;
use crate::utils;
const EXEC_FIFO_FILENAME: &str = "exec.fifo";
pub const EXEC_FIFO_FILENAME: &str = "exec.fifo";
const INIT: &str = "INIT";
const NO_PIVOT: &str = "NO_PIVOT";
@@ -74,6 +74,7 @@ const CLOG_FD: &str = "CLOG_FD";
const FIFO_FD: &str = "FIFO_FD";
const HOME_ENV_KEY: &str = "HOME";
const PIDNS_FD: &str = "PIDNS_FD";
const CONSOLE_SOCKET_FD: &str = "CONSOLE_SOCKET_FD";
#[derive(Debug)]
pub struct ContainerStatus {
@@ -82,7 +83,7 @@ pub struct ContainerStatus {
}
impl ContainerStatus {
fn new() -> Self {
pub fn new() -> Self {
ContainerStatus {
pre_status: ContainerState::Created,
cur_status: ContainerState::Created,
@@ -99,6 +100,12 @@ impl ContainerStatus {
}
}
impl Default for ContainerStatus {
fn default() -> Self {
Self::new()
}
}
pub type Config = CreateOpts;
type NamespaceType = String;
@@ -106,7 +113,7 @@ lazy_static! {
// This locker ensures the child exit signal will be received by the right receiver.
pub static ref WAIT_PID_LOCKER: Arc<Mutex<bool>> = Arc::new(Mutex::new(false));
static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
pub static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
let mut m = HashMap::new();
m.insert("user", CloneFlags::CLONE_NEWUSER);
m.insert("ipc", CloneFlags::CLONE_NEWIPC);
@@ -119,7 +126,7 @@ lazy_static! {
};
// type to name hashmap, better to be in NAMESPACES
static ref TYPETONAME: HashMap<&'static str, &'static str> = {
pub static ref TYPETONAME: HashMap<&'static str, &'static str> = {
let mut m = HashMap::new();
m.insert("ipc", "ipc");
m.insert("user", "user");
@@ -236,6 +243,8 @@ pub struct LinuxContainer {
pub status: ContainerStatus,
pub created: SystemTime,
pub logger: Logger,
#[cfg(feature = "standard-oci-runtime")]
pub console_socket: PathBuf,
}
#[derive(Serialize, Deserialize, Debug)]
@@ -359,7 +368,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
)));
}
}
log_child!(cfd_log, "child process start run");
let buf = read_sync(crfd)?;
let spec_str = std::str::from_utf8(&buf)?;
@@ -379,6 +387,9 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let cm: FsManager = serde_json::from_str(cm_str)?;
#[cfg(feature = "standard-oci-runtime")]
let csocket_fd = console::setup_console_socket(&std::env::var(CONSOLE_SOCKET_FD)?)?;
let p = if spec.process.is_some() {
spec.process.as_ref().unwrap()
} else {
@@ -649,12 +660,17 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
}
}
// set the "HOME" env getting from "/etc/passwd", if
// there's no uid entry in /etc/passwd, set "/" as the
// home env.
if env::var_os(HOME_ENV_KEY).is_none() {
let home_dir = utils::home_dir(guser.uid).unwrap_or_else(|_| String::from("/"));
env::set_var(HOME_ENV_KEY, home_dir);
// try to set "HOME" env by uid
if let Ok(Some(user)) = User::from_uid(Uid::from_raw(guser.uid)) {
if let Ok(user_home_dir) = user.dir.into_os_string().into_string() {
env::set_var(HOME_ENV_KEY, user_home_dir);
}
}
// set default home dir as "/" if "HOME" env is still empty
if env::var_os(HOME_ENV_KEY).is_none() {
env::set_var(HOME_ENV_KEY, String::from("/"));
}
}
let exec_file = Path::new(&args[0]);
@@ -670,10 +686,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let _ = unistd::close(crfd);
let _ = unistd::close(cwfd);
unistd::setsid().context("create a new session")?;
if oci_process.terminal {
unsafe {
libc::ioctl(0, libc::TIOCSCTTY);
cfg_if::cfg_if! {
if #[cfg(feature = "standard-oci-runtime")] {
if let Some(csocket_fd) = csocket_fd {
console::setup_master_console(csocket_fd)?;
} else {
return Err(anyhow!("failed to get console master socket fd"));
}
}
else {
unistd::setsid().context("create a new session")?;
unsafe { libc::ioctl(0, libc::TIOCSCTTY) };
}
}
}
@@ -928,6 +953,14 @@ impl BaseContainer for LinuxContainer {
let exec_path = std::env::current_exe()?;
let mut child = std::process::Command::new(exec_path);
#[allow(unused_mut)]
let mut console_name = PathBuf::from("");
#[cfg(feature = "standard-oci-runtime")]
if !self.console_socket.as_os_str().is_empty() {
console_name = self.console_socket.clone();
}
let mut child = child
.arg("init")
.stdin(child_stdin)
@@ -937,7 +970,8 @@ impl BaseContainer for LinuxContainer {
.env(NO_PIVOT, format!("{}", self.config.no_pivot_root))
.env(CRFD_FD, format!("{}", crfd))
.env(CWFD_FD, format!("{}", cwfd))
.env(CLOG_FD, format!("{}", cfd_log));
.env(CLOG_FD, format!("{}", cfd_log))
.env(CONSOLE_SOCKET_FD, console_name);
if p.init {
child = child.env(FIFO_FD, format!("{}", fifofd));
@@ -1032,7 +1066,19 @@ impl BaseContainer for LinuxContainer {
let st = self.oci_state()?;
for pid in self.processes.keys() {
signal::kill(Pid::from_raw(*pid), Some(Signal::SIGKILL))?;
match signal::kill(Pid::from_raw(*pid), Some(Signal::SIGKILL)) {
Err(Errno::ESRCH) => {
info!(
self.logger,
"kill encounters ESRCH, pid: {}, container: {}",
pid,
self.id.clone()
);
continue;
}
Err(err) => return Err(anyhow!(err)),
Ok(_) => continue,
}
}
if spec.hooks.is_some() {
@@ -1419,8 +1465,16 @@ impl LinuxContainer {
.unwrap()
.as_secs(),
logger: logger.new(o!("module" => "rustjail", "subsystem" => "container", "cid" => id)),
#[cfg(feature = "standard-oci-runtime")]
console_socket: Path::new("").to_path_buf(),
})
}
#[cfg(feature = "standard-oci-runtime")]
pub fn set_console_socket(&mut self, console_socket: &Path) -> Result<()> {
self.console_socket = console_socket.to_path_buf();
Ok(())
}
}
fn setgroups(grps: &[libc::gid_t]) -> Result<()> {
@@ -1460,7 +1514,7 @@ use std::process::Stdio;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
pub async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
let logger = logger.new(o!("action" => "execute-hook"));
let binary = PathBuf::from(h.path.as_str());

View File

@@ -30,6 +30,8 @@ extern crate regex;
pub mod capabilities;
pub mod cgroups;
#[cfg(feature = "standard-oci-runtime")]
pub mod console;
pub mod container;
pub mod mount;
pub mod pipestream;
@@ -39,7 +41,6 @@ pub mod seccomp;
pub mod specconv;
pub mod sync;
pub mod sync_with_async;
pub mod utils;
pub mod validator;
use std::collections::HashMap;
@@ -265,7 +266,7 @@ pub fn resources_grpc_to_oci(res: &grpc::LinuxResources) -> oci::LinuxResources
swap: Some(mem.Swap),
kernel: Some(mem.Kernel),
kernel_tcp: Some(mem.KernelTCP),
swappiness: Some(mem.Swappiness as i64),
swappiness: Some(mem.Swappiness),
disable_oom_killer: Some(mem.DisableOOMKiller),
})
} else {
@@ -512,6 +513,7 @@ pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
#[cfg(test)]
mod tests {
use super::*;
#[macro_export]
macro_rules! skip_if_not_root {
() => {
@@ -521,4 +523,595 @@ mod tests {
}
};
}
// Parameters:
//
// 1: expected Result
// 2: actual Result
// 3: string used to identify the test on error
#[macro_export]
macro_rules! assert_result {
($expected_result:expr, $actual_result:expr, $msg:expr) => {
if $expected_result.is_ok() {
let expected_value = $expected_result.as_ref().unwrap();
let actual_value = $actual_result.unwrap();
assert!(*expected_value == actual_value, "{}", $msg);
} else {
assert!($actual_result.is_err(), "{}", $msg);
let expected_error = $expected_result.as_ref().unwrap_err();
let expected_error_msg = format!("{:?}", expected_error);
let actual_error_msg = format!("{:?}", $actual_result.unwrap_err());
assert!(expected_error_msg == actual_error_msg, "{}", $msg);
}
};
}
#[test]
fn test_process_grpc_to_oci() {
#[derive(Debug)]
struct TestData {
grpcproc: grpc::Process,
result: oci::Process,
}
let tests = &[
TestData {
// All fields specified
grpcproc: grpc::Process {
Terminal: true,
ConsoleSize: protobuf::SingularPtrField::<grpc::Box>::some(grpc::Box {
Height: 123,
Width: 456,
..Default::default()
}),
User: protobuf::SingularPtrField::<grpc::User>::some(grpc::User {
UID: 1234,
GID: 5678,
AdditionalGids: Vec::from([910, 1112]),
Username: String::from("username"),
..Default::default()
}),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([String::from("env")])),
Cwd: String::from("cwd"),
Capabilities: protobuf::SingularPtrField::some(grpc::LinuxCapabilities {
Bounding: protobuf::RepeatedField::from(Vec::from([String::from("bnd")])),
Effective: protobuf::RepeatedField::from(Vec::from([String::from("eff")])),
Inheritable: protobuf::RepeatedField::from(Vec::from([String::from(
"inher",
)])),
Permitted: protobuf::RepeatedField::from(Vec::from([String::from("perm")])),
Ambient: protobuf::RepeatedField::from(Vec::from([String::from("amb")])),
..Default::default()
}),
Rlimits: protobuf::RepeatedField::from(Vec::from([
grpc::POSIXRlimit {
Type: String::from("r#type"),
Hard: 123,
Soft: 456,
..Default::default()
},
grpc::POSIXRlimit {
Type: String::from("r#type2"),
Hard: 789,
Soft: 1011,
..Default::default()
},
])),
NoNewPrivileges: true,
ApparmorProfile: String::from("apparmor profile"),
OOMScoreAdj: 123456,
SelinuxLabel: String::from("Selinux Label"),
..Default::default()
},
result: oci::Process {
terminal: true,
console_size: Some(oci::Box {
height: 123,
width: 456,
}),
user: oci::User {
uid: 1234,
gid: 5678,
additional_gids: Vec::from([910, 1112]),
username: String::from("username"),
},
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env")]),
cwd: String::from("cwd"),
capabilities: Some(oci::LinuxCapabilities {
bounding: Vec::from([String::from("bnd")]),
effective: Vec::from([String::from("eff")]),
inheritable: Vec::from([String::from("inher")]),
permitted: Vec::from([String::from("perm")]),
ambient: Vec::from([String::from("amb")]),
}),
rlimits: Vec::from([
oci::PosixRlimit {
r#type: String::from("r#type"),
hard: 123,
soft: 456,
},
oci::PosixRlimit {
r#type: String::from("r#type2"),
hard: 789,
soft: 1011,
},
]),
no_new_privileges: true,
apparmor_profile: String::from("apparmor profile"),
oom_score_adj: Some(123456),
selinux_label: String::from("Selinux Label"),
},
},
TestData {
// None ConsoleSize
grpcproc: grpc::Process {
ConsoleSize: protobuf::SingularPtrField::<grpc::Box>::none(),
OOMScoreAdj: 0,
..Default::default()
},
result: oci::Process {
console_size: None,
oom_score_adj: Some(0),
..Default::default()
},
},
TestData {
// None User
grpcproc: grpc::Process {
User: protobuf::SingularPtrField::<grpc::User>::none(),
OOMScoreAdj: 0,
..Default::default()
},
result: oci::Process {
user: oci::User {
uid: 0,
gid: 0,
additional_gids: vec![],
username: String::from(""),
},
oom_score_adj: Some(0),
..Default::default()
},
},
TestData {
// None Capabilities
grpcproc: grpc::Process {
Capabilities: protobuf::SingularPtrField::none(),
OOMScoreAdj: 0,
..Default::default()
},
result: oci::Process {
capabilities: None,
oom_score_adj: Some(0),
..Default::default()
},
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = process_grpc_to_oci(&d.grpcproc);
let msg = format!("{}, result: {:?}", msg, result);
assert_eq!(d.result, result, "{}", msg);
}
}
#[test]
fn test_root_grpc_to_oci() {
#[derive(Debug)]
struct TestData {
grpcroot: grpc::Root,
result: oci::Root,
}
let tests = &[
TestData {
// Default fields
grpcroot: grpc::Root {
..Default::default()
},
result: oci::Root {
..Default::default()
},
},
TestData {
// Specified fields, readonly false
grpcroot: grpc::Root {
Path: String::from("path"),
Readonly: false,
..Default::default()
},
result: oci::Root {
path: String::from("path"),
readonly: false,
..Default::default()
},
},
TestData {
// Specified fields, readonly true
grpcroot: grpc::Root {
Path: String::from("path"),
Readonly: true,
..Default::default()
},
result: oci::Root {
path: String::from("path"),
readonly: true,
..Default::default()
},
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = root_grpc_to_oci(&d.grpcroot);
let msg = format!("{}, result: {:?}", msg, result);
assert_eq!(d.result, result, "{}", msg);
}
}
#[test]
fn test_hooks_grpc_to_oci() {
#[derive(Debug)]
struct TestData {
grpchooks: grpc::Hooks,
result: oci::Hooks,
}
let tests = &[
TestData {
// Default fields
grpchooks: grpc::Hooks {
..Default::default()
},
result: oci::Hooks {
..Default::default()
},
},
TestData {
// All specified
grpchooks: grpc::Hooks {
Prestart: protobuf::RepeatedField::from(Vec::from([
grpc::Hook {
Path: String::from("prestartpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
},
grpc::Hook {
Path: String::from("prestartpath2"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg3"),
String::from("arg4"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env3"),
String::from("env4"),
])),
Timeout: 25,
..Default::default()
},
])),
Poststart: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("poststartpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
Poststop: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("poststoppath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
..Default::default()
},
result: oci::Hooks {
prestart: Vec::from([
oci::Hook {
path: String::from("prestartpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
},
oci::Hook {
path: String::from("prestartpath2"),
args: Vec::from([String::from("arg3"), String::from("arg4")]),
env: Vec::from([String::from("env3"), String::from("env4")]),
timeout: Some(25),
},
]),
poststart: Vec::from([oci::Hook {
path: String::from("poststartpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
poststop: Vec::from([oci::Hook {
path: String::from("poststoppath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
},
},
TestData {
// Prestart empty
grpchooks: grpc::Hooks {
Prestart: protobuf::RepeatedField::from(Vec::from([])),
Poststart: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("poststartpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
Poststop: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("poststoppath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
..Default::default()
},
result: oci::Hooks {
prestart: Vec::from([]),
poststart: Vec::from([oci::Hook {
path: String::from("poststartpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
poststop: Vec::from([oci::Hook {
path: String::from("poststoppath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
},
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = hooks_grpc_to_oci(&d.grpchooks);
let msg = format!("{}, result: {:?}", msg, result);
assert_eq!(d.result, result, "{}", msg);
}
}
#[test]
fn test_mount_grpc_to_oci() {
#[derive(Debug)]
struct TestData {
grpcmount: grpc::Mount,
result: oci::Mount,
}
let tests = &[
TestData {
// Default fields
grpcmount: grpc::Mount {
..Default::default()
},
result: oci::Mount {
..Default::default()
},
},
TestData {
grpcmount: grpc::Mount {
destination: String::from("destination"),
source: String::from("source"),
field_type: String::from("fieldtype"),
options: protobuf::RepeatedField::from(Vec::from([
String::from("option1"),
String::from("option2"),
])),
..Default::default()
},
result: oci::Mount {
destination: String::from("destination"),
source: String::from("source"),
r#type: String::from("fieldtype"),
options: Vec::from([String::from("option1"), String::from("option2")]),
},
},
TestData {
grpcmount: grpc::Mount {
destination: String::from("destination"),
source: String::from("source"),
field_type: String::from("fieldtype"),
options: protobuf::RepeatedField::from(Vec::new()),
..Default::default()
},
result: oci::Mount {
destination: String::from("destination"),
source: String::from("source"),
r#type: String::from("fieldtype"),
options: Vec::new(),
},
},
TestData {
grpcmount: grpc::Mount {
destination: String::new(),
source: String::from("source"),
field_type: String::from("fieldtype"),
options: protobuf::RepeatedField::from(Vec::from([String::from("option1")])),
..Default::default()
},
result: oci::Mount {
destination: String::new(),
source: String::from("source"),
r#type: String::from("fieldtype"),
options: Vec::from([String::from("option1")]),
},
},
TestData {
grpcmount: grpc::Mount {
destination: String::from("destination"),
source: String::from("source"),
field_type: String::new(),
options: protobuf::RepeatedField::from(Vec::from([String::from("option1")])),
..Default::default()
},
result: oci::Mount {
destination: String::from("destination"),
source: String::from("source"),
r#type: String::new(),
options: Vec::from([String::from("option1")]),
},
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = mount_grpc_to_oci(&d.grpcmount);
let msg = format!("{}, result: {:?}", msg, result);
assert_eq!(d.result, result, "{}", msg);
}
}
#[test]
fn test_hook_grpc_to_oci<'a>() {
#[derive(Debug)]
struct TestData<'a> {
grpchook: &'a [grpc::Hook],
result: Vec<oci::Hook>,
}
let tests = &[
TestData {
// Default fields
grpchook: &[
grpc::Hook {
Timeout: 0,
..Default::default()
},
grpc::Hook {
Timeout: 0,
..Default::default()
},
],
result: vec![
oci::Hook {
timeout: Some(0),
..Default::default()
},
oci::Hook {
timeout: Some(0),
..Default::default()
},
],
},
TestData {
// Specified fields
grpchook: &[
grpc::Hook {
Path: String::from("path"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
},
grpc::Hook {
Path: String::from("path2"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg3"),
String::from("arg4"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env3"),
String::from("env4"),
])),
Timeout: 20,
..Default::default()
},
],
result: vec![
oci::Hook {
path: String::from("path"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
},
oci::Hook {
path: String::from("path2"),
args: Vec::from([String::from("arg3"), String::from("arg4")]),
env: Vec::from([String::from("env3"), String::from("env4")]),
timeout: Some(20),
},
],
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = hook_grpc_to_oci(d.grpchook);
let msg = format!("{}, result: {:?}", msg, result);
assert_eq!(d.result, result, "{}", msg);
}
}
}

View File

@@ -32,16 +32,21 @@ use crate::log_child;
// Info reveals information about a particular mounted filesystem. This
// struct is populated from the content in the /proc/<pid>/mountinfo file.
#[derive(std::fmt::Debug)]
#[derive(std::fmt::Debug, PartialEq)]
pub struct Info {
mount_point: String,
optional: String,
fstype: String,
}
const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
const MOUNTINFO_FORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
const MOUNTINFO_PATH: &str = "/proc/self/mountinfo";
const PROC_PATH: &str = "/proc";
const ERR_FAILED_PARSE_MOUNTINFO: &str = "failed to parse mountinfo file";
const ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS: &str =
"failed to parse final fields in mountinfo file";
// since libc didn't defined this const for musl, thus redefined it here.
#[cfg(all(target_os = "linux", target_env = "gnu", not(target_arch = "s390x")))]
const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
@@ -518,7 +523,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
}
fn rootfs_parent_mount_private(path: &str) -> Result<()> {
let mount_infos = parse_mount_table()?;
let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;
let mut max_len = 0;
let mut mount_point = String::from("");
@@ -546,8 +551,8 @@ fn rootfs_parent_mount_private(path: &str) -> Result<()> {
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts
fn parse_mount_table() -> Result<Vec<Info>> {
let file = File::open("/proc/self/mountinfo")?;
fn parse_mount_table(mountinfo_path: &str) -> Result<Vec<Info>> {
let file = File::open(mountinfo_path)?;
let reader = BufReader::new(file);
let mut infos = Vec::new();
@@ -569,7 +574,7 @@ fn parse_mount_table() -> Result<Vec<Info>> {
let (_id, _parent, _major, _minor, _root, mount_point, _opts, optional) = scan_fmt!(
&line,
MOUNTINFOFORMAT,
MOUNTINFO_FORMAT,
i32,
i32,
i32,
@@ -578,12 +583,17 @@ fn parse_mount_table() -> Result<Vec<Info>> {
String,
String,
String
)?;
)
.map_err(|_| anyhow!(ERR_FAILED_PARSE_MOUNTINFO))?;
let fields: Vec<&str> = line.split(" - ").collect();
if fields.len() == 2 {
let (fstype, _source, _vfs_opts) =
scan_fmt!(fields[1], "{} {} {}", String, String, String)?;
let final_fields: Vec<&str> = fields[1].split_whitespace().collect();
if final_fields.len() != 3 {
return Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS));
}
let fstype = final_fields[0].to_string();
let mut optional_new = String::new();
if optional != "-" {
@@ -598,7 +608,7 @@ fn parse_mount_table() -> Result<Vec<Info>> {
infos.push(info);
} else {
return Err(anyhow!("failed to parse mount info file".to_string()));
return Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO));
}
}
@@ -619,7 +629,7 @@ fn chroot<P: ?Sized + NixPath>(_path: &P) -> Result<(), nix::Error> {
pub fn ms_move_root(rootfs: &str) -> Result<bool> {
unistd::chdir(rootfs)?;
let mount_infos = parse_mount_table()?;
let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;
let root_path = Path::new(rootfs);
let abs_root_buf = root_path.absolutize()?;
@@ -1046,10 +1056,12 @@ fn readonly_path(path: &str) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use crate::assert_result;
use crate::skip_if_not_root;
use std::fs::create_dir;
use std::fs::create_dir_all;
use std::fs::remove_dir_all;
use std::io;
use std::os::unix::fs;
use std::os::unix::io::AsRawFd;
use tempfile::tempdir;
@@ -1286,6 +1298,113 @@ mod tests {
let ret = stat::stat(path);
assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
}
#[test]
fn test_mount_from() {
#[derive(Debug)]
struct TestData<'a> {
source: &'a str,
destination: &'a str,
r#type: &'a str,
flags: MsFlags,
error_contains: &'a str,
// if true, a directory will be created at path in source
make_source_directory: bool,
// if true, a file will be created at path in source
make_source_file: bool,
}
impl Default for TestData<'_> {
fn default() -> Self {
TestData {
source: "tmp",
destination: "dest",
r#type: "tmpfs",
flags: MsFlags::empty(),
error_contains: "",
make_source_directory: true,
make_source_file: false,
}
}
}
let tests = &[
TestData {
..Default::default()
},
TestData {
flags: MsFlags::MS_BIND,
..Default::default()
},
TestData {
r#type: "bind",
..Default::default()
},
TestData {
r#type: "cgroup2",
..Default::default()
},
TestData {
r#type: "bind",
make_source_directory: false,
error_contains: &format!("{}", std::io::Error::from_raw_os_error(libc::ENOENT)),
..Default::default()
},
TestData {
r#type: "bind",
make_source_directory: false,
make_source_file: true,
..Default::default()
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let tempdir = tempdir().unwrap();
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
defer!({
unistd::close(rfd).unwrap();
unistd::close(wfd).unwrap();
});
let source_path = tempdir.path().join(d.source).to_str().unwrap().to_string();
if d.make_source_directory {
std::fs::create_dir_all(&source_path).unwrap();
} else if d.make_source_file {
std::fs::write(&source_path, []).unwrap();
}
let mount = Mount {
source: source_path,
destination: d.destination.to_string(),
r#type: d.r#type.to_string(),
options: vec![],
};
let result = mount_from(
wfd,
&mount,
tempdir.path().to_str().unwrap(),
d.flags,
"",
"",
);
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
} else {
assert!(result.is_err(), "{}", msg);
let error_msg = format!("{}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
}
#[test]
fn test_check_proc_mount() {
let mount = oci::Mount {
@@ -1401,6 +1520,121 @@ mod tests {
}
}
#[test]
fn test_parse_mount_table() {
#[derive(Debug)]
struct TestData<'a> {
mountinfo_data: Option<&'a str>,
result: Result<Vec<Info>>,
}
let tests = &[
TestData {
mountinfo_data: Some(
"22 933 0:20 / /sys rw,nodev shared:2 - sysfs sysfs rw,noexec",
),
result: Ok(vec![Info {
mount_point: "/sys".to_string(),
optional: "shared:2".to_string(),
fstype: "sysfs".to_string(),
}]),
},
TestData {
mountinfo_data: Some(
r#"22 933 0:20 / /sys rw,nodev - sysfs sysfs rw,noexec
81 13 1:2 / /tmp/dir rw shared:2 - tmpfs tmpfs rw"#,
),
result: Ok(vec![
Info {
mount_point: "/sys".to_string(),
optional: "".to_string(),
fstype: "sysfs".to_string(),
},
Info {
mount_point: "/tmp/dir".to_string(),
optional: "shared:2".to_string(),
fstype: "tmpfs".to_string(),
},
]),
},
TestData {
mountinfo_data: Some(
"22 933 0:20 /foo\040-\040bar /sys rw,nodev shared:2 - sysfs sysfs rw,noexec",
),
result: Ok(vec![Info {
mount_point: "/sys".to_string(),
optional: "shared:2".to_string(),
fstype: "sysfs".to_string(),
}]),
},
TestData {
mountinfo_data: Some(""),
result: Ok(vec![]),
},
TestData {
mountinfo_data: Some("invalid line data - sysfs sysfs rw"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: Some("22 96 0:21 / /sys rw,noexec - sysfs"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS)),
},
TestData {
mountinfo_data: Some("22 96 0:21 / /sys rw,noexec - sysfs sysfs rw rw"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS)),
},
TestData {
mountinfo_data: Some("22 96 0:21 / /sys rw,noexec shared:2 - x - x"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: Some("-"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: Some("--"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: Some("- -"),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: Some(" - "),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: Some(
r#"22 933 0:20 / /sys rw,nodev - sysfs sysfs rw,noexec
invalid line
81 13 1:2 / /tmp/dir rw shared:2 - tmpfs tmpfs rw"#,
),
result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
},
TestData {
mountinfo_data: None,
result: Err(anyhow!(io::Error::from_raw_os_error(libc::ENOENT))),
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let tempdir = tempdir().unwrap();
let mountinfo_path = tempdir.path().join("mountinfo");
if let Some(mountinfo_data) = d.mountinfo_data {
std::fs::write(&mountinfo_path, mountinfo_data).unwrap();
}
let result = parse_mount_table(mountinfo_path.to_str().unwrap());
let msg = format!("{}: result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
}
#[test]
fn test_dev_rel_path() {
// Valid device paths

View File

@@ -5,7 +5,7 @@
use libc::pid_t;
use std::fs::File;
use std::os::unix::io::RawFd;
use std::os::unix::io::{AsRawFd, RawFd};
use tokio::sync::mpsc::Sender;
use nix::errno::Errno;
@@ -137,19 +137,25 @@ impl Process {
info!(logger, "before create console socket!");
if !p.tty {
info!(logger, "created console socket!");
if cfg!(feature = "standard-oci-runtime") {
p.stdin = Some(std::io::stdin().as_raw_fd());
p.stdout = Some(std::io::stdout().as_raw_fd());
p.stderr = Some(std::io::stderr().as_raw_fd());
} else {
info!(logger, "created console socket!");
let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?;
p.parent_stdin = Some(pstdin);
p.stdin = Some(stdin);
let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?;
p.parent_stdin = Some(pstdin);
p.stdin = Some(stdin);
let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
p.parent_stdout = Some(pstdout);
p.stdout = Some(stdout);
let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
p.parent_stdout = Some(pstdout);
p.stdout = Some(stdout);
let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
p.parent_stderr = Some(pstderr);
p.stderr = Some(stderr);
let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
p.parent_stderr = Some(pstderr);
p.stderr = Some(stderr);
}
}
Ok(p)
}
@@ -284,5 +290,11 @@ mod tests {
// group of the calling process.
process.pid = 0;
assert!(process.signal(libc::SIGCONT).is_ok());
if cfg!(feature = "standard-oci-runtime") {
assert_eq!(process.stdin.unwrap(), std::io::stdin().as_raw_fd());
assert_eq!(process.stdout.unwrap(), std::io::stdout().as_raw_fd());
assert_eq!(process.stderr.unwrap(), std::io::stderr().as_raw_fd());
}
}
}

View File

@@ -5,7 +5,7 @@
use oci::Spec;
#[derive(Debug)]
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
pub struct CreateOpts {
pub cgroup_name: String,
pub use_systemd_cgroup: bool,

View File

@@ -1,120 +0,0 @@
// Copyright (c) 2021 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use libc::gid_t;
use libc::uid_t;
use std::fs::File;
use std::io::{BufRead, BufReader};
const PASSWD_FILE: &str = "/etc/passwd";
// An entry from /etc/passwd
#[derive(Debug, PartialEq, PartialOrd)]
pub struct PasswdEntry {
// username
pub name: String,
// user password
pub passwd: String,
// user id
pub uid: uid_t,
// group id
pub gid: gid_t,
// user Information
pub gecos: String,
// home directory
pub dir: String,
// User's Shell
pub shell: String,
}
// get an entry for a given `uid` from `/etc/passwd`
fn get_entry_by_uid(uid: uid_t, path: &str) -> Result<PasswdEntry> {
let file = File::open(path).with_context(|| format!("open file {}", path))?;
let mut reader = BufReader::new(file);
let mut line = String::new();
loop {
line.clear();
match reader.read_line(&mut line) {
Ok(0) => return Err(anyhow!(format!("file {} is empty", path))),
Ok(_) => (),
Err(e) => {
return Err(anyhow!(format!(
"failed to read file {} with {:?}",
path, e
)))
}
}
if line.starts_with('#') {
continue;
}
let parts: Vec<&str> = line.split(':').map(|part| part.trim()).collect();
if parts.len() != 7 {
continue;
}
match parts[2].parse() {
Err(_e) => continue,
Ok(new_uid) => {
if uid != new_uid {
continue;
}
let entry = PasswdEntry {
name: parts[0].to_string(),
passwd: parts[1].to_string(),
uid: new_uid,
gid: parts[3].parse().unwrap_or(0),
gecos: parts[4].to_string(),
dir: parts[5].to_string(),
shell: parts[6].to_string(),
};
return Ok(entry);
}
}
}
}
pub fn home_dir(uid: uid_t) -> Result<String> {
get_entry_by_uid(uid, PASSWD_FILE).map(|entry| entry.dir)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::Builder;
#[test]
fn test_get_entry_by_uid() {
let tmpdir = Builder::new().tempdir().unwrap();
let tmpdir_path = tmpdir.path().to_str().unwrap();
let temp_passwd = format!("{}/passwd", tmpdir_path);
let mut tempf = File::create(temp_passwd.as_str()).unwrap();
let passwd_entries = "root:x:0:0:root:/root0:/bin/bash
root:x:1:0:root:/root1:/bin/bash
#root:x:1:0:root:/rootx:/bin/bash
root:x:2:0:root:/root2:/bin/bash
root:x:3:0:root:/root3
root:x:3:0:root:/root3:/bin/bash";
writeln!(tempf, "{}", passwd_entries).unwrap();
let entry = get_entry_by_uid(0, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root0");
let entry = get_entry_by_uid(1, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root1");
let entry = get_entry_by_uid(2, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root2");
let entry = get_entry_by_uid(3, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root3");
}
}

View File

@@ -432,6 +432,8 @@ fn get_container_pipe_size(param: &str) -> Result<i32> {
#[cfg(test)]
mod tests {
use crate::assert_result;
use super::*;
use anyhow::anyhow;
use std::fs::File;
@@ -439,32 +441,6 @@ mod tests {
use std::time;
use tempfile::tempdir;
// Parameters:
//
// 1: expected Result
// 2: actual Result
// 3: string used to identify the test on error
macro_rules! assert_result {
($expected_result:expr, $actual_result:expr, $msg:expr) => {
if $expected_result.is_ok() {
let expected_level = $expected_result.as_ref().unwrap();
let actual_level = $actual_result.unwrap();
assert!(*expected_level == actual_level, "{}", $msg);
} else {
let expected_error = $expected_result.as_ref().unwrap_err();
let expected_error_msg = format!("{:?}", expected_error);
if let Err(actual_error) = $actual_result {
let actual_error_msg = format!("{:?}", actual_error);
assert!(expected_error_msg == actual_error_msg, "{}", $msg);
} else {
assert!(expected_error_msg == "expected error, got OK", "{}", $msg);
}
}
};
}
#[test]
fn test_new() {
let config: AgentConfig = Default::default();

View File

@@ -416,3 +416,59 @@ fn reset_sigpipe() {
use crate::config::AgentConfig;
use std::os::unix::io::{FromRawFd, RawFd};
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils::test_utils::TestUserType;
#[tokio::test]
async fn test_create_logger_task() {
#[derive(Debug)]
struct TestData {
vsock_port: u32,
test_user: TestUserType,
result: Result<()>,
}
let tests = &[
TestData {
// non-root user cannot use privileged vsock port
vsock_port: 1,
test_user: TestUserType::NonRootOnly,
result: Err(anyhow!(nix::errno::Errno::from_i32(libc::EACCES))),
},
TestData {
// passing vsock_port 0 causes logger task to write to stdout
vsock_port: 0,
test_user: TestUserType::Any,
result: Ok(()),
},
];
for (i, d) in tests.iter().enumerate() {
if d.test_user == TestUserType::RootOnly {
skip_if_not_root!();
} else if d.test_user == TestUserType::NonRootOnly {
skip_if_root!();
}
let msg = format!("test[{}]: {:?}", i, d);
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
defer!({
// rfd is closed by the use of PipeStream in the crate_logger_task function,
// but we will attempt to close in case of a failure
let _ = unistd::close(rfd);
unistd::close(wfd).unwrap();
});
let (shutdown_tx, shutdown_rx) = channel(true);
shutdown_tx.send(true).unwrap();
let result = create_logger_task(rfd, d.vsock_port, shutdown_rx).await;
let msg = format!("{}, result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
}
}

View File

@@ -16,7 +16,7 @@ use std::sync::Arc;
use tokio::sync::Mutex;
use nix::mount::MsFlags;
use nix::unistd::Gid;
use nix::unistd::{Gid, Uid};
use regex::Regex;
@@ -29,6 +29,7 @@ use crate::device::{
use crate::linux_abi::*;
use crate::pci;
use crate::protocols::agent::Storage;
use crate::protocols::types::FSGroupChangePolicy;
use crate::Sandbox;
#[cfg(target_arch = "s390x")]
use crate::{ccw, device::get_virtio_blk_ccw_device_name};
@@ -43,6 +44,11 @@ pub const MOUNT_GUEST_TAG: &str = "kataShared";
// Allocating an FSGroup that owns the pod's volumes
const FS_GID: &str = "fsgid";
const RW_MASK: u32 = 0o660;
const RO_MASK: u32 = 0o440;
const EXEC_MASK: u32 = 0o110;
const MODE_SETGID: u32 = 0o2000;
#[rustfmt::skip]
lazy_static! {
pub static ref FLAGS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -85,11 +91,11 @@ lazy_static! {
}
#[derive(Debug, PartialEq)]
pub struct InitMount {
fstype: &'static str,
src: &'static str,
dest: &'static str,
options: Vec<&'static str>,
pub struct InitMount<'a> {
fstype: &'a str,
src: &'a str,
dest: &'a str,
options: Vec<&'a str>,
}
#[rustfmt::skip]
@@ -115,7 +121,7 @@ lazy_static!{
#[rustfmt::skip]
lazy_static! {
pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount> = vec![
pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount<'static>> = vec![
InitMount{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
InitMount{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
InitMount{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
@@ -222,7 +228,7 @@ async fn ephemeral_storage_handler(
let meta = fs::metadata(&storage.mount_point)?;
let mut permission = meta.permissions();
let o_mode = meta.mode() | 0o2000;
let o_mode = meta.mode() | MODE_SETGID;
permission.set_mode(o_mode);
fs::set_permissions(&storage.mount_point, permission)?;
}
@@ -272,7 +278,7 @@ async fn local_storage_handler(
if need_set_fsgid {
// set SetGid mode mask.
o_mode |= 0o2000;
o_mode |= MODE_SETGID;
}
permission.set_mode(o_mode);
@@ -489,7 +495,9 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
// Mount the storage device.
let mount_point = storage.mount_point.to_string();
mount_storage(logger, storage).and(Ok(mount_point))
mount_storage(logger, storage)?;
set_ownership(logger, storage)?;
Ok(mount_point)
}
// nvdimm_storage_handler handles the storage for NVDIMM driver.
@@ -573,6 +581,91 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
)
}
#[instrument]
pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership"));
// If fsGroup is not set, skip performing ownership change
if storage.fs_group.is_none() {
return Ok(());
}
let fs_group = storage.get_fs_group();
let mut read_only = false;
let opts_vec: Vec<String> = storage.options.to_vec();
if opts_vec.contains(&String::from("ro")) {
read_only = true;
}
let mount_path = Path::new(&storage.mount_point);
let metadata = mount_path.metadata().map_err(|err| {
error!(logger, "failed to obtain metadata for mount path";
"mount-path" => mount_path.to_str(),
"error" => err.to_string(),
);
err
})?;
if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch
&& metadata.gid() == fs_group.group_id
{
let mut mask = if read_only { RO_MASK } else { RW_MASK };
mask |= EXEC_MASK;
// With fsGroup change policy to OnRootMismatch, if the current
// gid of the mount path root directory matches the desired gid
// and the current permission of mount path root directory is correct,
// then ownership change will be skipped.
let current_mode = metadata.permissions().mode();
if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) {
info!(logger, "skipping ownership change for volume";
"mount-path" => mount_path.to_str(),
"fs-group" => fs_group.group_id.to_string(),
);
return Ok(());
}
}
info!(logger, "performing recursive ownership change";
"mount-path" => mount_path.to_str(),
"fs-group" => fs_group.group_id.to_string(),
);
recursive_ownership_change(
mount_path,
None,
Some(Gid::from_raw(fs_group.group_id)),
read_only,
)
}
#[instrument]
pub fn recursive_ownership_change(
path: &Path,
uid: Option<Uid>,
gid: Option<Gid>,
read_only: bool,
) -> Result<()> {
let mut mask = if read_only { RO_MASK } else { RW_MASK };
if path.is_dir() {
for entry in fs::read_dir(&path)? {
recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?;
}
mask |= EXEC_MASK;
mask |= MODE_SETGID;
}
nix::unistd::chown(path, uid, gid)?;
if gid.is_some() {
let metadata = path.metadata()?;
let mut permission = metadata.permissions();
let target_mode = metadata.mode() | mask;
permission.set_mode(target_mode);
fs::set_permissions(path, permission)?;
}
Ok(())
}
/// Looks for `mount_point` entry in the /proc/mounts.
#[instrument]
pub fn is_mounted(mount_point: &str) -> Result<bool> {
@@ -766,8 +859,9 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
}
Err(anyhow!(
"failed to find FS type for mount point {}",
mount_point
"failed to find FS type for mount point {}, mount file content: {:?}",
mount_point,
fs::read_to_string(mount_file)
))
}
@@ -776,7 +870,7 @@ pub fn get_cgroup_mounts(
logger: &Logger,
cg_path: &str,
unified_cgroup_hierarchy: bool,
) -> Result<Vec<InitMount>> {
) -> Result<Vec<InitMount<'static>>> {
// cgroup v2
// https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249
if unified_cgroup_hierarchy {
@@ -924,20 +1018,16 @@ fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
#[cfg(test)]
mod tests {
use super::*;
use crate::{skip_if_not_root, skip_loop_if_not_root, skip_loop_if_root};
use crate::test_utils::test_utils::TestUserType;
use crate::{skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root};
use protobuf::RepeatedField;
use protocols::agent::FSGroup;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::PathBuf;
use tempfile::tempdir;
#[derive(Debug, PartialEq)]
enum TestUserType {
RootOnly,
NonRootOnly,
Any,
}
#[test]
fn test_mount() {
#[derive(Debug)]
@@ -1023,11 +1113,7 @@ mod tests {
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
if d.test_user == TestUserType::RootOnly {
skip_loop_if_not_root!(msg);
} else if d.test_user == TestUserType::NonRootOnly {
skip_loop_if_root!(msg);
}
skip_loop_by_user!(msg, d.test_user);
let src: PathBuf;
let dest: PathBuf;
@@ -1497,6 +1583,226 @@ mod tests {
assert!(testfile.is_file());
}
#[test]
fn test_mount_storage() {
#[derive(Debug)]
struct TestData<'a> {
test_user: TestUserType,
storage: Storage,
error_contains: &'a str,
make_source_dir: bool,
make_mount_dir: bool,
deny_mount_permission: bool,
}
impl Default for TestData<'_> {
fn default() -> Self {
TestData {
test_user: TestUserType::Any,
storage: Storage {
mount_point: "mnt".to_string(),
source: "src".to_string(),
fstype: "tmpfs".to_string(),
..Default::default()
},
make_source_dir: true,
make_mount_dir: false,
deny_mount_permission: false,
error_contains: "",
}
}
}
let tests = &[
TestData {
test_user: TestUserType::NonRootOnly,
error_contains: "EPERM: Operation not permitted",
..Default::default()
},
TestData {
test_user: TestUserType::RootOnly,
..Default::default()
},
TestData {
storage: Storage {
mount_point: "mnt".to_string(),
source: "src".to_string(),
fstype: "bind".to_string(),
..Default::default()
},
make_source_dir: false,
make_mount_dir: true,
error_contains: "Could not create mountpoint",
..Default::default()
},
TestData {
test_user: TestUserType::NonRootOnly,
deny_mount_permission: true,
error_contains: "Could not create mountpoint",
..Default::default()
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
skip_loop_by_user!(msg, d.test_user);
let drain = slog::Discard;
let logger = slog::Logger::root(drain, o!());
let tempdir = tempdir().unwrap();
let source = tempdir.path().join(&d.storage.source);
let mount_point = tempdir.path().join(&d.storage.mount_point);
let storage = Storage {
source: source.to_str().unwrap().to_string(),
mount_point: mount_point.to_str().unwrap().to_string(),
..d.storage.clone()
};
if d.make_source_dir {
fs::create_dir_all(&storage.source).unwrap();
}
if d.make_mount_dir {
fs::create_dir_all(&storage.mount_point).unwrap();
}
if d.deny_mount_permission {
fs::set_permissions(
mount_point.parent().unwrap(),
fs::Permissions::from_mode(0o000),
)
.unwrap();
}
let result = mount_storage(&logger, &storage);
// restore permissions so tempdir can be cleaned up
if d.deny_mount_permission {
fs::set_permissions(
mount_point.parent().unwrap(),
fs::Permissions::from_mode(0o755),
)
.unwrap();
}
if result.is_ok() {
nix::mount::umount(&mount_point).unwrap();
}
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
} else {
assert!(result.is_err(), "{}", msg);
let error_msg = format!("{}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
}
#[test]
fn test_mount_to_rootfs() {
#[derive(Debug)]
struct TestData<'a> {
test_user: TestUserType,
src: &'a str,
options: Vec<&'a str>,
error_contains: &'a str,
deny_mount_dir_permission: bool,
// if true src will be prepended with a temporary directory
mask_src: bool,
}
impl Default for TestData<'_> {
fn default() -> Self {
TestData {
test_user: TestUserType::Any,
src: "src",
options: vec![],
error_contains: "",
deny_mount_dir_permission: false,
mask_src: true,
}
}
}
let tests = &[
TestData {
test_user: TestUserType::NonRootOnly,
error_contains: "EPERM: Operation not permitted",
..Default::default()
},
TestData {
test_user: TestUserType::NonRootOnly,
src: "dev",
mask_src: false,
..Default::default()
},
TestData {
test_user: TestUserType::RootOnly,
..Default::default()
},
TestData {
test_user: TestUserType::NonRootOnly,
deny_mount_dir_permission: true,
error_contains: "could not create directory",
..Default::default()
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
skip_loop_by_user!(msg, d.test_user);
let drain = slog::Discard;
let logger = slog::Logger::root(drain, o!());
let tempdir = tempdir().unwrap();
let src = if d.mask_src {
tempdir.path().join(&d.src)
} else {
Path::new(d.src).to_path_buf()
};
let dest = tempdir.path().join("mnt");
let init_mount = InitMount {
fstype: "tmpfs",
src: src.to_str().unwrap(),
dest: dest.to_str().unwrap(),
options: d.options.clone(),
};
if d.deny_mount_dir_permission {
fs::set_permissions(dest.parent().unwrap(), fs::Permissions::from_mode(0o000))
.unwrap();
}
let result = mount_to_rootfs(&logger, &init_mount);
// restore permissions so tempdir can be cleaned up
if d.deny_mount_dir_permission {
fs::set_permissions(dest.parent().unwrap(), fs::Permissions::from_mode(0o755))
.unwrap();
}
if result.is_ok() && d.mask_src {
nix::mount::umount(&dest).unwrap();
}
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), "{}", msg);
} else {
assert!(result.is_err(), "{}", msg);
let error_msg = format!("{}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
}
#[test]
fn test_get_pagesize_and_size_from_option() {
let expected_pagesize = 2048;
@@ -1552,4 +1858,263 @@ mod tests {
}
}
}
#[test]
fn test_parse_mount_flags_and_options() {
#[derive(Debug)]
struct TestData<'a> {
options_vec: Vec<&'a str>,
result: (MsFlags, &'a str),
}
let tests = &[
TestData {
options_vec: vec![],
result: (MsFlags::empty(), ""),
},
TestData {
options_vec: vec!["ro"],
result: (MsFlags::MS_RDONLY, ""),
},
TestData {
options_vec: vec!["rw"],
result: (MsFlags::empty(), ""),
},
TestData {
options_vec: vec!["ro", "rw"],
result: (MsFlags::empty(), ""),
},
TestData {
options_vec: vec!["ro", "nodev"],
result: (MsFlags::MS_RDONLY | MsFlags::MS_NODEV, ""),
},
TestData {
options_vec: vec!["option1", "nodev", "option2"],
result: (MsFlags::MS_NODEV, "option1,option2"),
},
TestData {
options_vec: vec!["rbind", "", "ro"],
result: (MsFlags::MS_BIND | MsFlags::MS_REC | MsFlags::MS_RDONLY, ""),
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = parse_mount_flags_and_options(d.options_vec.clone());
let msg = format!("{}: result: {:?}", msg, result);
let expected_result = (d.result.0, d.result.1.to_owned());
assert_eq!(expected_result, result, "{}", msg);
}
}
#[test]
fn test_set_ownership() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
#[derive(Debug)]
struct TestData<'a> {
mount_path: &'a str,
fs_group: Option<FSGroup>,
read_only: bool,
expected_group_id: u32,
expected_permission: u32,
}
let tests = &[
TestData {
mount_path: "foo",
fs_group: None,
read_only: false,
expected_group_id: 0,
expected_permission: 0,
},
TestData {
mount_path: "rw_mount",
fs_group: Some(FSGroup {
group_id: 3000,
group_change_policy: FSGroupChangePolicy::Always,
unknown_fields: Default::default(),
cached_size: Default::default(),
}),
read_only: false,
expected_group_id: 3000,
expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID,
},
TestData {
mount_path: "ro_mount",
fs_group: Some(FSGroup {
group_id: 3000,
group_change_policy: FSGroupChangePolicy::OnRootMismatch,
unknown_fields: Default::default(),
cached_size: Default::default(),
}),
read_only: true,
expected_group_id: 3000,
expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID,
},
];
let tempdir = tempdir().expect("failed to create tmpdir");
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let mount_dir = tempdir.path().join(d.mount_path);
fs::create_dir(&mount_dir)
.unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
let mut storage_data = Storage::new();
if d.read_only {
storage_data.set_options(RepeatedField::from_slice(&[
"foo".to_string(),
"ro".to_string(),
]));
}
if let Some(fs_group) = d.fs_group.clone() {
storage_data.set_fs_group(fs_group);
}
storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap();
let result = set_ownership(&logger, &storage_data);
assert!(result.is_ok());
assert_eq!(
mount_dir.as_path().metadata().unwrap().gid(),
d.expected_group_id
);
assert_eq!(
mount_dir.as_path().metadata().unwrap().permissions().mode(),
(directory_mode | d.expected_permission)
);
}
}
#[test]
fn test_recursive_ownership_change() {
skip_if_not_root!();
const COUNT: usize = 5;
#[derive(Debug)]
struct TestData<'a> {
// Directory where the recursive ownership change should be performed on
path: &'a str,
// User ID for ownership change
uid: u32,
// Group ID for ownership change
gid: u32,
// Set when the permission should be read-only
read_only: bool,
// The expected permission of all directories after ownership change
expected_permission_directory: u32,
// The expected permission of all files after ownership change
expected_permission_file: u32,
}
let tests = &[
TestData {
path: "no_gid_change",
uid: 0,
gid: 0,
read_only: false,
expected_permission_directory: 0,
expected_permission_file: 0,
},
TestData {
path: "rw_gid_change",
uid: 0,
gid: 3000,
read_only: false,
expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID,
expected_permission_file: RW_MASK,
},
TestData {
path: "ro_gid_change",
uid: 0,
gid: 3000,
read_only: true,
expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID,
expected_permission_file: RO_MASK,
},
];
let tempdir = tempdir().expect("failed to create tmpdir");
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let mount_dir = tempdir.path().join(d.path);
fs::create_dir(&mount_dir)
.unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
let mut file_mode: u32 = 0;
// create testing directories and files
for n in 1..COUNT {
let nest_dir = mount_dir.join(format!("nested{}", n));
fs::create_dir(&nest_dir)
.unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg));
for f in 1..COUNT {
let filename = nest_dir.join(format!("file{}", f));
File::create(&filename)
.unwrap_or_else(|_| panic!("{}: failed to create file", msg));
file_mode = filename.as_path().metadata().unwrap().permissions().mode();
}
}
let uid = if d.uid > 0 {
Some(Uid::from_raw(d.uid))
} else {
None
};
let gid = if d.gid > 0 {
Some(Gid::from_raw(d.gid))
} else {
None
};
let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only);
assert!(result.is_ok());
assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid);
assert_eq!(
mount_dir.as_path().metadata().unwrap().permissions().mode(),
(directory_mode | d.expected_permission_directory)
);
for n in 1..COUNT {
let nest_dir = mount_dir.join(format!("nested{}", n));
for f in 1..COUNT {
let filename = nest_dir.join(format!("file{}", f));
let file = Path::new(&filename);
assert_eq!(file.metadata().unwrap().gid(), d.gid);
assert_eq!(
file.metadata().unwrap().permissions().mode(),
(file_mode | d.expected_permission_file)
);
}
let dir = Path::new(&nest_dir);
assert_eq!(dir.metadata().unwrap().gid(), d.gid);
assert_eq!(
dir.metadata().unwrap().permissions().mode(),
(directory_mode | d.expected_permission_directory)
);
}
}
}
}

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
use anyhow::{ensure, Result};
use nix::errno::Errno;
use nix::fcntl::{self, OFlag};
use nix::sys::stat::Mode;
@@ -13,7 +13,7 @@ use tracing::instrument;
pub const RNGDEV: &str = "/dev/random";
pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
pub const RNDRESEEDRNG: libc::c_int = 0x5207;
pub const RNDRESEEDCRNG: libc::c_int = 0x5207;
// Handle the differing ioctl(2) request types for different targets
#[cfg(target_env = "musl")]
@@ -24,6 +24,9 @@ type IoctlRequestType = libc::c_ulong;
#[instrument]
pub fn reseed_rng(data: &[u8]) -> Result<()> {
let len = data.len() as libc::c_long;
ensure!(len > 0, "missing entropy data");
fs::write(RNGDEV, data)?;
let f = {
@@ -41,8 +44,52 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
};
Errno::result(ret).map(drop)?;
let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDCRNG as IoctlRequestType, 0) };
Errno::result(ret).map(drop)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::skip_if_not_root;
use std::fs::File;
use std::io::prelude::*;
#[test]
fn test_reseed_rng() {
skip_if_not_root!();
const POOL_SIZE: usize = 512;
let mut f = File::open("/dev/urandom").unwrap();
let mut seed = [0; POOL_SIZE];
let n = f.read(&mut seed).unwrap();
// Ensure the buffer was filled.
assert!(n == POOL_SIZE);
let ret = reseed_rng(&seed);
assert!(ret.is_ok());
}
#[test]
fn test_reseed_rng_not_root() {
const POOL_SIZE: usize = 512;
let mut f = File::open("/dev/urandom").unwrap();
let mut seed = [0; POOL_SIZE];
let n = f.read(&mut seed).unwrap();
// Ensure the buffer was filled.
assert!(n == POOL_SIZE);
let ret = reseed_rng(&seed);
if nix::unistd::Uid::effective().is_root() {
assert!(ret.is_ok());
} else {
assert!(!ret.is_ok());
}
}
#[test]
fn test_reseed_rng_zero_data() {
let seed = [];
let ret = reseed_rng(&seed);
assert!(!ret.is_ok());
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -32,6 +32,8 @@ use tokio::sync::oneshot;
use tokio::sync::Mutex;
use tracing::instrument;
pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id";
type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);
#[derive(Debug)]
@@ -149,7 +151,12 @@ impl Sandbox {
pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> {
let mounts = vec![path.to_string()];
remove_mounts(&mounts)?;
fs::remove_dir_all(path).context(format!("failed to remove dir {:?}", path))?;
// "remove_dir" will fail if the mount point is backed by a read-only filesystem.
// This is the case with the device mapper snapshotter, where we mount the block device directly
// at the underlying sandbox path which was provided from the base RO kataShared path from the host.
if let Err(err) = fs::remove_dir(path) {
warn!(self.logger, "failed to remove dir {}, {:?}", path, err);
}
Ok(())
}
@@ -232,7 +239,7 @@ impl Sandbox {
pub fn find_container_process(&mut self, cid: &str, eid: &str) -> Result<&mut Process> {
let ctr = self
.get_container(cid)
.ok_or_else(|| anyhow!("Invalid container id"))?;
.ok_or_else(|| anyhow!(ERR_INVALID_CONTAINER_ID))?;
if eid.is_empty() {
return ctr
@@ -463,7 +470,7 @@ fn online_memory(logger: &Logger) -> Result<()> {
#[cfg(test)]
mod tests {
use super::Sandbox;
use super::*;
use crate::{mount::baremount, skip_if_not_root};
use anyhow::{anyhow, Error};
use nix::mount::MsFlags;
@@ -473,6 +480,7 @@ mod tests {
use rustjail::specconv::CreateOpts;
use slog::Logger;
use std::fs::{self, File};
use std::io::prelude::*;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use tempfile::{tempdir, Builder, TempDir};
@@ -562,19 +570,8 @@ mod tests {
.remove_sandbox_storage(invalid_dir.to_str().unwrap())
.is_err());
// Now, create a double mount as this guarantees the directory cannot
// be deleted after the first umount.
for _i in 0..2 {
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
}
assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
assert!(
s.remove_sandbox_storage(destdir_path).is_err(),
"Expect fail as deletion cannot happen due to the second mount."
);
// This time it should work as the previous two calls have undone the double
// mount.
assert!(s.remove_sandbox_storage(destdir_path).is_ok());
}
@@ -851,4 +848,259 @@ mod tests {
let p = s.find_container_process("not-exist-cid", "");
assert!(p.is_err(), "Expecting Error, Got {:?}", p);
}
#[tokio::test]
async fn test_find_process() {
let logger = slog::Logger::root(slog::Discard, o!());
let test_pids = [std::i32::MIN, -1, 0, 1, std::i32::MAX];
for test_pid in test_pids {
let mut s = Sandbox::new(&logger).unwrap();
let (mut linux_container, _root) = create_linuxcontainer();
let mut test_process = Process::new(
&logger,
&oci::Process::default(),
"this_is_a_test_process",
true,
1,
)
.unwrap();
// processes interally only have pids when manually set
test_process.pid = test_pid;
linux_container.processes.insert(test_pid, test_process);
s.add_container(linux_container);
let find_result = s.find_process(test_pid);
// test first if it finds anything
assert!(find_result.is_some(), "Should be able to find a process");
let found_process = find_result.unwrap();
// then test if it founds the correct process
assert_eq!(
found_process.pid, test_pid,
"Should be able to find correct process"
);
}
// to test for nonexistent pids, any pid that isn't the one set
// above should work, as linuxcontainer starts with no processes
let mut s = Sandbox::new(&logger).unwrap();
let nonexistent_test_pid = 1234;
let find_result = s.find_process(nonexistent_test_pid);
assert!(
find_result.is_none(),
"Shouldn't find a process for non existent pid"
);
}
#[tokio::test]
async fn test_online_resources() {
#[derive(Debug, Default)]
struct TestFile {
name: String,
content: String,
}
#[derive(Debug, Default)]
struct TestDirectory<'a> {
name: String,
files: &'a [TestFile],
}
#[derive(Debug)]
struct TestData<'a> {
directory_autogen_name: String,
number_autogen_directories: u32,
extra_directories: &'a [TestDirectory<'a>],
pattern: String,
to_enable: i32,
result: Result<i32>,
}
impl Default for TestData<'_> {
fn default() -> Self {
TestData {
directory_autogen_name: Default::default(),
number_autogen_directories: Default::default(),
extra_directories: Default::default(),
pattern: Default::default(),
to_enable: Default::default(),
result: Ok(Default::default()),
}
}
}
let tests = &[
// 4 well formed directories, request enabled 4,
// correct result 4 enabled, should pass
TestData {
directory_autogen_name: String::from("cpu"),
number_autogen_directories: 4,
pattern: String::from(r"cpu[0-9]+"),
to_enable: 4,
result: Ok(4),
..Default::default()
},
// 0 well formed directories, request enabled 4,
// correct result 0 enabled, should pass
TestData {
number_autogen_directories: 0,
to_enable: 4,
result: Ok(0),
..Default::default()
},
// 10 well formed directories, request enabled 4,
// correct result 4 enabled, should pass
TestData {
directory_autogen_name: String::from("cpu"),
number_autogen_directories: 10,
pattern: String::from(r"cpu[0-9]+"),
to_enable: 4,
result: Ok(4),
..Default::default()
},
// 0 well formed directories, request enabled 0,
// correct result 0 enabled, should pass
TestData {
number_autogen_directories: 0,
pattern: String::from(r"cpu[0-9]+"),
to_enable: 0,
result: Ok(0),
..Default::default()
},
// 4 well formed directories, 1 malformed (no online file),
// request enable 5, correct result 4
TestData {
directory_autogen_name: String::from("cpu"),
number_autogen_directories: 4,
pattern: String::from(r"cpu[0-9]+"),
extra_directories: &[TestDirectory {
name: String::from("cpu4"),
files: &[],
}],
to_enable: 5,
result: Ok(4),
},
// 3 malformed directories (no online files),
// request enable 3, correct result 0
TestData {
pattern: String::from(r"cpu[0-9]+"),
extra_directories: &[
TestDirectory {
name: String::from("cpu0"),
files: &[],
},
TestDirectory {
name: String::from("cpu1"),
files: &[],
},
TestDirectory {
name: String::from("cpu2"),
files: &[],
},
],
to_enable: 3,
result: Ok(0),
..Default::default()
},
// 1 malformed directories (online file with content "1"),
// request enable 1, correct result 0
TestData {
pattern: String::from(r"cpu[0-9]+"),
extra_directories: &[TestDirectory {
name: String::from("cpu0"),
files: &[TestFile {
name: SYSFS_ONLINE_FILE.to_string(),
content: String::from("1"),
}],
}],
to_enable: 1,
result: Ok(0),
..Default::default()
},
// 2 well formed directories, 1 malformed (online file with content "1"),
// request enable 3, correct result 2
TestData {
directory_autogen_name: String::from("cpu"),
number_autogen_directories: 2,
pattern: String::from(r"cpu[0-9]+"),
extra_directories: &[TestDirectory {
name: String::from("cpu2"),
files: &[TestFile {
name: SYSFS_ONLINE_FILE.to_string(),
content: String::from("1"),
}],
}],
to_enable: 3,
result: Ok(2),
},
];
let logger = slog::Logger::root(slog::Discard, o!());
let tmpdir = Builder::new().tempdir().unwrap();
let tmpdir_path = tmpdir.path().to_str().unwrap();
for (i, d) in tests.iter().enumerate() {
let current_test_dir_path = format!("{}/test_{}", tmpdir_path, i);
fs::create_dir(&current_test_dir_path).unwrap();
// create numbered directories and fill using root name
for j in 0..d.number_autogen_directories {
let subdir_path = format!(
"{}/{}{}",
current_test_dir_path, d.directory_autogen_name, j
);
let subfile_path = format!("{}/{}", subdir_path, SYSFS_ONLINE_FILE);
fs::create_dir(&subdir_path).unwrap();
let mut subfile = File::create(subfile_path).unwrap();
subfile.write_all(b"0").unwrap();
}
// create extra directories and fill to specification
for j in d.extra_directories {
let subdir_path = format!("{}/{}", current_test_dir_path, j.name);
fs::create_dir(&subdir_path).unwrap();
for file in j.files {
let subfile_path = format!("{}/{}", subdir_path, file.name);
let mut subfile = File::create(&subfile_path).unwrap();
subfile.write_all(file.content.as_bytes()).unwrap();
}
}
// run created directory structure against online_resources
let result = online_resources(&logger, &current_test_dir_path, &d.pattern, d.to_enable);
let mut msg = format!(
"test[{}]: {:?}, expected {}, actual {}",
i,
d,
d.result.is_ok(),
result.is_ok()
);
assert_eq!(result.is_ok(), d.result.is_ok(), "{}", msg);
if d.result.is_ok() {
let test_result_val = *d.result.as_ref().ok().unwrap();
let result_val = result.ok().unwrap();
msg = format!(
"test[{}]: {:?}, expected {}, actual {}",
i, d, test_result_val, result_val
);
assert_eq!(test_result_val, result_val, "{}", msg);
}
}
}
}

View File

@@ -5,7 +5,14 @@
#![allow(clippy::module_inception)]
#[cfg(test)]
mod test_utils {
pub mod test_utils {
#[derive(Debug, PartialEq)]
pub enum TestUserType {
RootOnly,
NonRootOnly,
Any,
}
#[macro_export]
macro_rules! skip_if_root {
() => {
@@ -53,4 +60,40 @@ mod test_utils {
}
};
}
// Parameters:
//
// 1: expected Result
// 2: actual Result
// 3: string used to identify the test on error
#[macro_export]
macro_rules! assert_result {
($expected_result:expr, $actual_result:expr, $msg:expr) => {
if $expected_result.is_ok() {
let expected_value = $expected_result.as_ref().unwrap();
let actual_value = $actual_result.unwrap();
assert!(*expected_value == actual_value, "{}", $msg);
} else {
assert!($actual_result.is_err(), "{}", $msg);
let expected_error = $expected_result.as_ref().unwrap_err();
let expected_error_msg = format!("{:?}", expected_error);
let actual_error_msg = format!("{:?}", $actual_result.unwrap_err());
assert!(expected_error_msg == actual_error_msg, "{}", $msg);
}
};
}
#[macro_export]
macro_rules! skip_loop_by_user {
($msg:expr, $user:expr) => {
if $user == TestUserType::RootOnly {
skip_loop_if_not_root!($msg);
} else if $user == TestUserType::NonRootOnly {
skip_loop_if_root!($msg);
}
};
}
}

View File

@@ -6,6 +6,7 @@
#![allow(unknown_lints)]
use std::collections::HashMap;
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
@@ -13,6 +14,7 @@ use std::time::SystemTime;
use anyhow::{ensure, Context, Result};
use async_recursion::async_recursion;
use nix::mount::{umount, MsFlags};
use nix::unistd::{Gid, Uid};
use slog::{debug, error, info, warn, Logger};
use thiserror::Error;
use tokio::fs;
@@ -80,7 +82,8 @@ impl Drop for Storage {
}
async fn copy(from: impl AsRef<Path>, to: impl AsRef<Path>) -> Result<()> {
if fs::symlink_metadata(&from).await?.file_type().is_symlink() {
let metadata = fs::symlink_metadata(&from).await?;
if metadata.file_type().is_symlink() {
// if source is a symlink, create new symlink with same link source. If
// the symlink exists, remove and create new one:
if fs::symlink_metadata(&to).await.is_ok() {
@@ -88,8 +91,15 @@ async fn copy(from: impl AsRef<Path>, to: impl AsRef<Path>) -> Result<()> {
}
fs::symlink(fs::read_link(&from).await?, &to).await?;
} else {
fs::copy(from, to).await?;
fs::copy(&from, &to).await?;
}
// preserve the source uid and gid to the destination.
nix::unistd::chown(
to.as_ref(),
Some(Uid::from_raw(metadata.uid())),
Some(Gid::from_raw(metadata.gid())),
)?;
Ok(())
}
@@ -106,14 +116,29 @@ impl Storage {
async fn update_target(&self, logger: &Logger, source_path: impl AsRef<Path>) -> Result<()> {
let source_file_path = source_path.as_ref();
let metadata = source_file_path.symlink_metadata()?;
// if we are creating a directory: just create it, nothing more to do
if source_file_path.symlink_metadata()?.file_type().is_dir() {
if metadata.file_type().is_dir() {
let dest_file_path = self.make_target_path(&source_file_path)?;
fs::create_dir_all(&dest_file_path)
.await
.with_context(|| format!("Unable to mkdir all for {}", dest_file_path.display()))?;
// set the directory permissions to match the source directory permissions
fs::set_permissions(&dest_file_path, metadata.permissions())
.await
.with_context(|| {
format!("Unable to set permissions for {}", dest_file_path.display())
})?;
// preserve the source directory uid and gid to the destination.
nix::unistd::chown(
&dest_file_path,
Some(Uid::from_raw(metadata.uid())),
Some(Gid::from_raw(metadata.gid())),
)
.with_context(|| format!("Unable to set ownership for {}", dest_file_path.display()))?;
return Ok(());
}
@@ -504,6 +529,7 @@ mod tests {
use super::*;
use crate::mount::is_mounted;
use crate::skip_if_not_root;
use nix::unistd::{Gid, Uid};
use std::fs;
use std::thread;
@@ -895,20 +921,28 @@ mod tests {
#[tokio::test]
async fn test_copy() {
skip_if_not_root!();
// prepare tmp src/destination
let source_dir = tempfile::tempdir().unwrap();
let dest_dir = tempfile::tempdir().unwrap();
let uid = Uid::from_raw(10);
let gid = Gid::from_raw(200);
// verify copy of a regular file
let src_file = source_dir.path().join("file.txt");
let dst_file = dest_dir.path().join("file.txt");
fs::write(&src_file, "foo").unwrap();
nix::unistd::chown(&src_file, Some(uid), Some(gid)).unwrap();
copy(&src_file, &dst_file).await.unwrap();
// verify destination:
assert!(!fs::symlink_metadata(dst_file)
assert!(!fs::symlink_metadata(&dst_file)
.unwrap()
.file_type()
.is_symlink());
assert_eq!(fs::metadata(&dst_file).unwrap().uid(), uid.as_raw());
assert_eq!(fs::metadata(&dst_file).unwrap().gid(), gid.as_raw());
// verify copy of a symlink
let src_symlink_file = source_dir.path().join("symlink_file.txt");
@@ -916,7 +950,7 @@ mod tests {
tokio::fs::symlink(&src_file, &src_symlink_file)
.await
.unwrap();
copy(src_symlink_file, &dst_symlink_file).await.unwrap();
copy(&src_symlink_file, &dst_symlink_file).await.unwrap();
// verify destination:
assert!(fs::symlink_metadata(&dst_symlink_file)
.unwrap()
@@ -924,6 +958,8 @@ mod tests {
.is_symlink());
assert_eq!(fs::read_link(&dst_symlink_file).unwrap(), src_file);
assert_eq!(fs::read_to_string(&dst_symlink_file).unwrap(), "foo");
assert_ne!(fs::metadata(&dst_symlink_file).unwrap().uid(), uid.as_raw());
assert_ne!(fs::metadata(&dst_symlink_file).unwrap().gid(), gid.as_raw());
}
#[tokio::test]
@@ -1069,6 +1105,8 @@ mod tests {
#[tokio::test]
async fn watch_directory() {
skip_if_not_root!();
// Prepare source directory:
// ./tmp/1.txt
// ./tmp/A/B/2.txt
@@ -1079,7 +1117,9 @@ mod tests {
// A/C is an empty directory
let empty_dir = "A/C";
fs::create_dir_all(source_dir.path().join(empty_dir)).unwrap();
let path = source_dir.path().join(empty_dir);
fs::create_dir_all(&path).unwrap();
nix::unistd::chown(&path, Some(Uid::from_raw(10)), Some(Gid::from_raw(200))).unwrap();
// delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
thread::sleep(Duration::from_millis(20));
@@ -1123,7 +1163,9 @@ mod tests {
// create another empty directory A/C/D
let empty_dir = "A/C/D";
fs::create_dir_all(source_dir.path().join(empty_dir)).unwrap();
let path = source_dir.path().join(empty_dir);
fs::create_dir_all(&path).unwrap();
nix::unistd::chown(&path, Some(Uid::from_raw(10)), Some(Gid::from_raw(200))).unwrap();
assert_eq!(entry.scan(&logger).await.unwrap(), 1);
assert!(dest_dir.path().join(empty_dir).exists());
}

897
src/libs/Cargo.lock generated Normal file
View File

@@ -0,0 +1,897 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anyhow"
version = "1.0.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc"
[[package]]
name = "arc-swap"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
[[package]]
name = "async-trait"
version = "0.1.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
dependencies = [
"byteorder",
"iovec",
]
[[package]]
name = "bytes"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "derive-new"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "fastrand"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2"
dependencies = [
"instant",
]
[[package]]
name = "fixedbitset"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "futures"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"
[[package]]
name = "futures-executor"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
[[package]]
name = "futures-macro"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868"
[[package]]
name = "futures-task"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"
[[package]]
name = "futures-util"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "indexmap"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "iovec"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
dependencies = [
"libc",
]
[[package]]
name = "itertools"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.124"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
[[package]]
name = "log"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
dependencies = [
"cfg-if",
]
[[package]]
name = "logging"
version = "0.1.0"
dependencies = [
"serde_json",
"slog",
"slog-async",
"slog-json",
"slog-scope",
"tempfile",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "mio"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52da4364ffb0e4fe33a9841a98a3f3014fb964045ce4f7a45a398243c8d6b0c9"
dependencies = [
"libc",
"log",
"miow",
"ntapi",
"wasi 0.11.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "miow"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
dependencies = [
"winapi",
]
[[package]]
name = "multimap"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]]
name = "nix"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5e06129fb611568ef4e868c14b326274959aa70ff7776e9d55323531c374945"
dependencies = [
"bitflags",
"cc",
"cfg-if",
"libc",
"memoffset",
]
[[package]]
name = "nix"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6"
dependencies = [
"bitflags",
"cc",
"cfg-if",
"libc",
"memoffset",
]
[[package]]
name = "ntapi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f"
dependencies = [
"winapi",
]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "petgraph"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7"
dependencies = [
"fixedbitset",
"indexmap",
]
[[package]]
name = "pin-project-lite"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "proc-macro2"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1"
dependencies = [
"unicode-xid",
]
[[package]]
name = "prost"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de5e2533f59d08fcf364fd374ebda0692a70bd6d7e66ef97f306f45c6c5d8020"
dependencies = [
"bytes 1.1.0",
"prost-derive",
]
[[package]]
name = "prost-build"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603"
dependencies = [
"bytes 1.1.0",
"heck",
"itertools",
"log",
"multimap",
"petgraph",
"prost",
"prost-types",
"tempfile",
"which",
]
[[package]]
name = "prost-derive"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba"
dependencies = [
"anyhow",
"itertools",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "prost-types"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "603bbd6394701d13f3f25aada59c7de9d35a6a5887cfc156181234a44002771b"
dependencies = [
"bytes 1.1.0",
"prost",
]
[[package]]
name = "protobuf"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e86d370532557ae7573551a1ec8235a0f8d6cb276c7c9e6aa490b511c447485"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "protobuf-codegen"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de113bba758ccf2c1ef816b127c958001b7831136c9bc3f8e9ec695ac4e82b0c"
dependencies = [
"protobuf",
]
[[package]]
name = "protobuf-codegen-pure"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d1a4febc73bf0cada1d77c459a0c8e5973179f1cfd5b0f1ab789d45b17b6440"
dependencies = [
"protobuf",
"protobuf-codegen",
]
[[package]]
name = "protocols"
version = "0.1.0"
dependencies = [
"async-trait",
"protobuf",
"serde",
"serde_json",
"ttrpc",
"ttrpc-codegen",
]
[[package]]
name = "quote"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "ryu"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "safe-path"
version = "0.1.0"
dependencies = [
"libc",
"tempfile",
]
[[package]]
name = "serde"
version = "1.0.133"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.133"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed201699328568d8d08208fdd080e3ff594e6c422e438b6705905da01005d537"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "slab"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
[[package]]
name = "slog"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
[[package]]
name = "slog-async"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe"
dependencies = [
"crossbeam-channel",
"slog",
"take_mut",
"thread_local",
]
[[package]]
name = "slog-json"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52e9b96fb6b5e80e371423b4aca6656eb537661ce8f82c2697e619f8ca85d043"
dependencies = [
"chrono",
"serde",
"serde_json",
"slog",
]
[[package]]
name = "slog-scope"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
dependencies = [
"arc-swap",
"lazy_static",
"slog",
]
[[package]]
name = "socket2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "syn"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "take_mut"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
[[package]]
name = "tempfile"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
dependencies = [
"cfg-if",
"fastrand",
"libc",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]]
name = "thiserror"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "tokio"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee"
dependencies = [
"bytes 1.1.0",
"libc",
"memchr",
"mio",
"pin-project-lite",
"socket2",
"tokio-macros",
"winapi",
]
[[package]]
name = "tokio-macros"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tokio-vsock"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e0723fc001950a3b018947b05eeb45014fd2b7c6e8f292502193ab74486bdb6"
dependencies = [
"bytes 0.4.12",
"futures",
"libc",
"tokio",
"vsock",
]
[[package]]
name = "ttrpc"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66a973ce6d5eaa20c173635b29ffb660dafbc7ef109172c0015ba44e47a23711"
dependencies = [
"async-trait",
"byteorder",
"futures",
"libc",
"log",
"nix 0.20.2",
"protobuf",
"protobuf-codegen-pure",
"thiserror",
"tokio",
"tokio-vsock",
]
[[package]]
name = "ttrpc-codegen"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809eda4e459820237104e4b61d6b41bbe6c9e1ce6adf4057955e6e6722a90408"
dependencies = [
"protobuf",
"protobuf-codegen",
"protobuf-codegen-pure",
"ttrpc-compiler",
]
[[package]]
name = "ttrpc-compiler"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2978ed3fa047d8fd55cbeb4d4a61d461fb3021a90c9618519c73ce7e5bb66c15"
dependencies = [
"derive-new",
"prost",
"prost-build",
"prost-types",
"protobuf",
"protobuf-codegen",
"tempfile",
]
[[package]]
name = "unicode-segmentation"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "vsock"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e32675ee2b3ce5df274c0ab52d19b28789632406277ca26bffee79a8e27dc133"
dependencies = [
"libc",
"nix 0.23.1",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "which"
version = "4.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c4fb54e6113b6a8772ee41c3404fb0301ac79604489467e0a9ce1f3e97c24ae"
dependencies = [
"either",
"lazy_static",
"libc",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

7
src/libs/Cargo.toml Normal file
View File

@@ -0,0 +1,7 @@
[workspace]
members = [
"logging",
"safe-path",
"protocols",
]
resolver = "2"

10
src/libs/README.md Normal file
View File

@@ -0,0 +1,10 @@
The `src/libs` directory hosts library crates which may be shared by multiple Kata Containers components
or published to [`crates.io`](https://crates.io/index.html).
### Library Crates
Currently it provides following library crates:
| Library | Description |
|-|-|-|
| [logging](logging/) | Facilities to setup logging subsystem based slog. |
| [safe-path](safe-path/) | Utilities to safely resolve filesystem paths. |

View File

@@ -1,321 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "arc-swap"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "itoa"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
[[package]]
name = "logging"
version = "0.1.0"
dependencies = [
"serde_json",
"slog",
"slog-async",
"slog-json",
"slog-scope",
"tempfile",
]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "ppv-lite86"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "rand"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_hc",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_hc"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
dependencies = [
"rand_core",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "ryu"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "serde"
version = "1.0.131"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1"
[[package]]
name = "serde_json"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "slog"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
[[package]]
name = "slog-async"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe"
dependencies = [
"crossbeam-channel",
"slog",
"take_mut",
"thread_local",
]
[[package]]
name = "slog-json"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52e9b96fb6b5e80e371423b4aca6656eb537661ce8f82c2697e619f8ca85d043"
dependencies = [
"chrono",
"serde",
"serde_json",
"slog",
]
[[package]]
name = "slog-scope"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
dependencies = [
"arc-swap",
"lazy_static",
"slog",
]
[[package]]
name = "take_mut"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
[[package]]
name = "tempfile"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22"
dependencies = [
"cfg-if",
"libc",
"rand",
"redox_syscall",
"remove_dir_all",
"winapi",
]
[[package]]
name = "thread_local"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -381,7 +381,7 @@ pub struct LinuxMemory {
#[serde(default, skip_serializing_if = "Option::is_none", rename = "kernelTCP")]
pub kernel_tcp: Option<i64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub swappiness: Option<i64>,
pub swappiness: Option<u64>,
#[serde(
default,
skip_serializing_if = "Option::is_none",

View File

@@ -1,6 +1,7 @@
Cargo.lock
src/agent.rs
src/agent_ttrpc.rs
src/csi.rs
src/empty.rs
src/health.rs
src/health_ttrpc.rs

View File

@@ -51,6 +51,8 @@ service AgentService {
rpc ListInterfaces(ListInterfacesRequest) returns(Interfaces);
rpc ListRoutes(ListRoutesRequest) returns (Routes);
rpc AddARPNeighbors(AddARPNeighborsRequest) returns (google.protobuf.Empty);
rpc GetIPTables(GetIPTablesRequest) returns (GetIPTablesResponse);
rpc SetIPTables(SetIPTablesRequest) returns (SetIPTablesResponse);
// observability
rpc GetMetrics(GetMetricsRequest) returns (Metrics);
@@ -328,6 +330,28 @@ message AddARPNeighborsRequest {
ARPNeighbors neighbors = 1;
}
message GetIPTablesRequest {
bool is_ipv6 = 1;
}
message GetIPTablesResponse{
// raw stdout from iptables-save or ip6tables-save
bytes data = 1;
}
message SetIPTablesRequest {
bool is_ipv6 = 1;
// iptables, in raw format expected to be passed to stdin
// of iptables-save or ip6tables-save
bytes data = 2;
}
message SetIPTablesResponse{
// raw stdout from iptables-restore or ip6tables-restore
bytes data = 1;
}
message OnlineCPUMemRequest {
// Wait specifies if the caller waits for the agent to online all resources.
// If true the agent returns once all resources have been connected, otherwise all
@@ -399,6 +423,17 @@ message SetGuestDateTimeRequest {
int64 Usec = 2;
}
// FSGroup consists of the group id and group ownership change policy
// that a volume should have its ownership changed to.
message FSGroup {
// GroupID is the ID that the group ownership of the
// files in the mounted volume will need to be changed to.
uint32 group_id = 2;
// GroupChangePolicy specifies the policy for applying group id
// ownership change on a mounted volume.
types.FSGroupChangePolicy group_change_policy = 3;
}
// Storage represents both the rootfs of the container, and any volume that
// could have been defined through the Mount list of the OCI specification.
message Storage {
@@ -422,11 +457,14 @@ message Storage {
// device, "9p" for shared filesystem, or "tmpfs" for shared /dev/shm.
string fstype = 4;
// Options describes the additional options that might be needed to
// mount properly the storage filesytem.
// mount properly the storage filesystem.
repeated string options = 5;
// MountPoint refers to the path where the storage should be mounted
// inside the VM.
string mount_point = 6;
// FSGroup consists of the group ID and group ownership change policy
// that the mounted volume must have its group ID changed to when specified.
FSGroup fs_group = 7;
}
// Device represents only the devices that could have been defined through the

View File

@@ -16,6 +16,15 @@ enum IPFamily {
v6 = 1;
}
// FSGroupChangePolicy defines the policy for applying group id ownership change on a mounted volume.
enum FSGroupChangePolicy {
// Always indicates that the volume ownership will always be changed.
Always = 0;
// OnRootMismatch indicates that the volume ownership will be changed only
// when the ownership of the root directory does not match with the expected group id for the volume.
OnRootMismatch = 1;
}
message IPAddress {
IPFamily family = 1;
string address = 2;

View File

@@ -0,0 +1,18 @@
[package]
name = "safe-path"
version = "0.1.0"
description = "A library to safely handle file system paths for container runtimes"
keywords = ["kata", "container", "path", "securejoin"]
categories = ["parser-implementations", "filesystem"]
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
repository = "https://github.com/kata-containers/kata-containers.git"
homepage = "https://katacontainers.io/"
readme = "README.md"
license = "Apache-2.0"
edition = "2018"
[dependencies]
libc = "0.2.100"
[dev-dependencies]
tempfile = "3.2.0"

View File

@@ -0,0 +1,21 @@
Safe Path
====================
[![CI](https://github.com/magiclen/path-absolutize/actions/workflows/ci.yml/badge.svg)](https://github.com/magiclen/path-absolutize/actions/workflows/ci.yml)
A library to safely handle filesystem paths, typically for container runtimes.
There are often path related attacks, such as symlink based attacks, TOCTTOU attacks. The `safe-path` crate
provides several functions and utility structures to protect against path resolution related attacks.
## Support
**Operating Systems**:
- Linux
## Reference
- [`filepath-securejoin`](https://github.com/cyphar/filepath-securejoin): secure_join() written in Go.
- [CVE-2021-30465](https://github.com/advisories/GHSA-c3xm-pvg7-gh7r): symlink related TOCTOU flaw in `runC`.
## License
This code is licensed under [Apache-2.0](../../../LICENSE).

View File

@@ -0,0 +1,65 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
//! A library to safely handle filesystem paths, typically for container runtimes.
//!
//! Linux [mount namespace](https://man7.org/linux/man-pages/man7/mount_namespaces.7.html)
//! provides isolation of the list of mounts seen by the processes in each
//! [namespace](https://man7.org/linux/man-pages/man7/namespaces.7.html) instance.
//! Thus, the processes in each of the mount namespace instances will see distinct single-directory
//! hierarchies.
//!
//! Containers are used to isolate workloads from the host system. Container on Linux systems
//! depends on the mount namespace to build an isolated root filesystem for each container,
//! thus protect the host and containers from each other. When creating containers, the container
//! runtime needs to setup filesystem mounts for container rootfs/volumes. Configuration for
//! mounts/paths may be indirectly controlled by end users through:
//! - container images
//! - Kubernetes pod specifications
//! - hook command line arguments
//!
//! These volume configuration information may be controlled by end users/malicious attackers,
//! so it must not be trusted by container runtimes. When the container runtime is preparing mount
//! namespace for a container, it must be very careful to validate user input configuration
//! information and ensure data out of the container rootfs directory won't be affected
//! by the container. There are several types of attacks related to container mount namespace:
//! - symlink based attack
//! - Time of check to time of use (TOCTTOU)
//!
//! This crate provides several mechanisms for container runtimes to safely handle filesystem paths
//! when preparing mount namespace for containers.
//! - [scoped_join()](crate::scoped_join()): safely join `unsafe_path` to `root`, and ensure
//! `unsafe_path` is scoped under `root`.
//! - [scoped_resolve()](crate::scoped_resolve()): resolve `unsafe_path` to a relative path,
//! rooted at and constrained by `root`.
//! - [struct PinnedPathBuf](crate::PinnedPathBuf): safe version of `PathBuf` to protect from
//! TOCTTOU style of attacks, which ensures:
//! - the value of [`PinnedPathBuf::as_path()`] never changes.
//! - the path returned by [`PinnedPathBuf::as_path()`] is always a symlink.
//! - the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never changes.
//! - the value of [`PinnedPathBuf::target()`] never changes.
//! - [struct ScopedDirBuilder](crate::ScopedDirBuilder): safe version of `DirBuilder` to protect
//! from symlink race and TOCTTOU style of attacks, which enhances security by:
//! - ensuring the new directories are created under a specified `root` directory.
//! - avoiding symlink race attacks during making directories.
//! - returning a [PinnedPathBuf] for the last level of directory, so it could be used for other
//! operations safely.
//!
//! The work is inspired by:
//! - [`filepath-securejoin`](https://github.com/cyphar/filepath-securejoin): secure_join() written
//! in Go.
//! - [CVE-2021-30465](https://github.com/advisories/GHSA-c3xm-pvg7-gh7r): symlink related TOCTOU
//! flaw in `runC`.
#![deny(missing_docs)]
mod pinned_path_buf;
pub use pinned_path_buf::PinnedPathBuf;
mod scoped_dir_builder;
pub use scoped_dir_builder::ScopedDirBuilder;
mod scoped_path_resolver;
pub use scoped_path_resolver::{scoped_join, scoped_resolve};

View File

@@ -0,0 +1,444 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::ffi::{CString, OsStr};
use std::fs::{self, File, Metadata, OpenOptions};
use std::io::{Error, ErrorKind, Result};
use std::ops::Deref;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::OpenOptionsExt;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::path::{Component, Path, PathBuf};
use crate::scoped_join;
/// A safe version of [`PathBuf`] pinned to an underlying filesystem object to protect from
/// `TOCTTOU` style of attacks.
///
/// A [`PinnedPathBuf`] is a resolved path buffer pinned to an underlying filesystem object, which
/// guarantees:
/// - the value of [`PinnedPathBuf::as_path()`] never changes.
/// - the path returned by [`PinnedPathBuf::as_path()`] is always a symlink.
/// - the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never changes.
/// - the value of [`PinnedPathBuf::target()`] never changes.
///
/// Note:
/// - Though the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never
/// changes, the value of `fs::read_link(PinnedPathBuf::as_path())` may change due to filesystem
/// operations.
/// - The value of [`PinnedPathBuf::target()`] is a cached version of
/// `fs::read_link(PinnedPathBuf::as_path())` generated when creating the `PinnedPathBuf` object.
/// - It's a sign of possible attacks if `[PinnedPathBuf::target()]` doesn't match
/// `fs::read_link(PinnedPathBuf::as_path())`.
/// - Once the [`PinnedPathBuf`] object gets dropped, the [`Path`] returned by
/// [`PinnedPathBuf::as_path()`] becomes invalid.
///
/// With normal [`PathBuf`], there's a race window for attackers between time to validate a path and
/// time to use the path. An attacker may maliciously change filesystem object referenced by the
/// path by using symlinks to compose an attack.
///
/// The [`PinnedPathBuf`] is introduced to protect from such attacks, by using the
/// `/proc/self/fd/xxx` files on Linux. The `/proc/self/fd/xxx` file on Linux is a symlink to the
/// real target corresponding to the process's file descriptor `xxx`. And the target filesystem
/// object referenced by the symlink will be kept stable until the file descriptor has been closed.
/// Combined with `O_PATH`, a safe version of `PathBuf` could be built by:
/// - Generate a safe path from `root` and `path` by using [`crate::scoped_join()`].
/// - Open the safe path with O_PATH | O_CLOEXEC flags, say the fd number is `fd_num`.
/// - Read the symlink target of `/proc/self/fd/fd_num`.
/// - Compare the symlink target with the safe path, it's safe if these two paths equal.
/// - Use the proc file path as a safe version of [`PathBuf`].
/// - Close the `fd_num` when dropping the [`PinnedPathBuf`] object.
#[derive(Debug)]
pub struct PinnedPathBuf {
handle: File,
path: PathBuf,
target: PathBuf,
}
impl PinnedPathBuf {
/// Create a [`PinnedPathBuf`] object from `root` and `path`.
///
/// The `path` must be a subdirectory of `root`, otherwise error will be returned.
pub fn new<R: AsRef<Path>, U: AsRef<Path>>(root: R, path: U) -> Result<Self> {
let path = scoped_join(root, path)?;
Self::from_path(path)
}
/// Create a `PinnedPathBuf` from `path`.
///
/// If the resolved value of `path` doesn't equal to `path`, an error will be returned.
pub fn from_path<P: AsRef<Path>>(orig_path: P) -> Result<Self> {
let orig_path = orig_path.as_ref();
let handle = Self::open_by_path(orig_path)?;
Self::new_from_file(handle, orig_path)
}
/// Try to clone the [`PinnedPathBuf`] object.
pub fn try_clone(&self) -> Result<Self> {
let fd = unsafe { libc::dup(self.path_fd()) };
if fd < 0 {
Err(Error::last_os_error())
} else {
Ok(Self {
handle: unsafe { File::from_raw_fd(fd) },
path: Self::get_proc_path(fd),
target: self.target.clone(),
})
}
}
/// Return the underlying file descriptor representing the pinned path.
///
/// Following operations are supported by the returned `RawFd`:
/// - fchdir
/// - fstat/fstatfs
/// - openat/linkat/fchownat/fstatat/readlinkat/mkdirat/*at
/// - fcntl(F_GETFD, F_SETFD, F_GETFL)
pub fn path_fd(&self) -> RawFd {
self.handle.as_raw_fd()
}
/// Get the symlink path referring the target filesystem object.
pub fn as_path(&self) -> &Path {
self.path.as_path()
}
/// Get the cached real path of the target filesystem object.
///
/// The target path is cached version of `fs::read_link(PinnedPathBuf::as_path())` generated
/// when creating the `PinnedPathBuf` object. On the other hand, the value of
/// `fs::read_link(PinnedPathBuf::as_path())` may change due to underlying filesystem operations.
/// So it's a sign of possible attacks if `PinnedPathBuf::target()` does not match
/// `fs::read_link(PinnedPathBuf::as_path())`.
pub fn target(&self) -> &Path {
&self.target
}
/// Get [`Metadata`] about the path handle.
pub fn metadata(&self) -> Result<Metadata> {
self.handle.metadata()
}
/// Open a direct child of the filesystem objected referenced by the `PinnedPathBuf` object.
pub fn open_child(&self, path_comp: &OsStr) -> Result<Self> {
let name = Self::prepare_path_component(path_comp)?;
let oflags = libc::O_PATH | libc::O_CLOEXEC;
let res = unsafe { libc::openat(self.path_fd(), name.as_ptr(), oflags, 0) };
if res < 0 {
Err(Error::last_os_error())
} else {
let handle = unsafe { File::from_raw_fd(res) };
Self::new_from_file(handle, self.target.join(path_comp))
}
}
/// Create or open a child directory if current object is a directory.
pub fn mkdir(&self, path_comp: &OsStr, mode: libc::mode_t) -> Result<Self> {
let path_name = Self::prepare_path_component(path_comp)?;
let res = unsafe { libc::mkdirat(self.handle.as_raw_fd(), path_name.as_ptr(), mode) };
if res < 0 {
Err(Error::last_os_error())
} else {
self.open_child(path_comp)
}
}
/// Open a directory/file by path.
///
/// Obtain a file descriptor that can be used for two purposes:
/// - indicate a location in the filesystem tree
/// - perform operations that act purely at the file descriptor level
fn open_by_path<P: AsRef<Path>>(path: P) -> Result<File> {
// When O_PATH is specified in flags, flag bits other than O_CLOEXEC, O_DIRECTORY, and
// O_NOFOLLOW are ignored.
let o_flags = libc::O_PATH | libc::O_CLOEXEC;
OpenOptions::new()
.read(true)
.custom_flags(o_flags)
.open(path.as_ref())
}
fn get_proc_path<F: AsRawFd>(file: F) -> PathBuf {
PathBuf::from(format!("/proc/self/fd/{}", file.as_raw_fd()))
}
fn new_from_file<P: AsRef<Path>>(handle: File, orig_path: P) -> Result<Self> {
let path = Self::get_proc_path(handle.as_raw_fd());
let link_path = fs::read_link(path.as_path())?;
if link_path != orig_path.as_ref() {
Err(Error::new(
ErrorKind::Other,
format!(
"Path changed from {} to {} on open, possible attack",
orig_path.as_ref().display(),
link_path.display()
),
))
} else {
Ok(PinnedPathBuf {
handle,
path,
target: link_path,
})
}
}
#[inline]
fn prepare_path_component(path_comp: &OsStr) -> Result<CString> {
let path = Path::new(path_comp);
let mut comps = path.components();
let name = comps.next();
if !matches!(name, Some(Component::Normal(_))) || comps.next().is_some() {
return Err(Error::new(
ErrorKind::Other,
format!("Path component {} is invalid", path_comp.to_string_lossy()),
));
}
let name = name.unwrap();
if name.as_os_str() != path_comp {
return Err(Error::new(
ErrorKind::Other,
format!("Path component {} is invalid", path_comp.to_string_lossy()),
));
}
CString::new(path_comp.as_bytes()).map_err(|_e| {
Error::new(
ErrorKind::Other,
format!("Path component {} is invalid", path_comp.to_string_lossy()),
)
})
}
}
impl Deref for PinnedPathBuf {
type Target = PathBuf;
fn deref(&self) -> &Self::Target {
&self.path
}
}
impl AsRef<Path> for PinnedPathBuf {
fn as_ref(&self) -> &Path {
self.path.as_path()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::ffi::OsString;
use std::fs::DirBuilder;
use std::io::Write;
use std::os::unix::fs::{symlink, MetadataExt};
use std::sync::{Arc, Barrier};
use std::thread;
#[test]
fn test_pinned_path_buf() {
// Create a root directory, which itself contains symlinks.
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
DirBuilder::new()
.create(rootfs_dir.path().join("b"))
.unwrap();
symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
let rootfs_path = &rootfs_dir.path().join("a");
// Create a file and a symlink to it.
fs::create_dir(rootfs_path.join("symlink_dir")).unwrap();
symlink("/endpoint", rootfs_path.join("symlink_dir/endpoint")).unwrap();
fs::write(rootfs_path.join("endpoint"), "test").unwrap();
// Pin the target and validate the path/content.
let path = PinnedPathBuf::new(rootfs_path.to_path_buf(), "symlink_dir/endpoint").unwrap();
assert!(!path.is_dir());
let path_ref = path.deref();
let target = fs::read_link(path_ref).unwrap();
assert_eq!(target, rootfs_path.join("endpoint").canonicalize().unwrap());
let content = fs::read_to_string(&path).unwrap();
assert_eq!(&content, "test");
// Remove the target file and validate that we could still read data from the pinned path.
fs::remove_file(&target).unwrap();
fs::read_to_string(&target).unwrap_err();
let content = fs::read_to_string(&path).unwrap();
assert_eq!(&content, "test");
}
#[test]
fn test_pinned_path_buf_race() {
let root_dir = tempfile::tempdir().expect("failed to create tmpdir");
let root_path = root_dir.path();
let barrier = Arc::new(Barrier::new(2));
fs::write(root_path.join("a"), b"a").unwrap();
fs::write(root_path.join("b"), b"b").unwrap();
fs::write(root_path.join("c"), b"c").unwrap();
symlink("a", root_path.join("s")).unwrap();
let root_path2 = root_path.to_path_buf();
let barrier2 = barrier.clone();
let thread = thread::spawn(move || {
// step 1
barrier2.wait();
fs::remove_file(root_path2.join("a")).unwrap();
symlink("b", root_path2.join("a")).unwrap();
barrier2.wait();
// step 2
barrier2.wait();
fs::remove_file(root_path2.join("b")).unwrap();
symlink("c", root_path2.join("b")).unwrap();
barrier2.wait();
});
let path = scoped_join(&root_path, "s").unwrap();
let data = fs::read_to_string(&path).unwrap();
assert_eq!(&data, "a");
assert!(path.is_file());
barrier.wait();
barrier.wait();
// Verify the target has been redirected.
let data = fs::read_to_string(&path).unwrap();
assert_eq!(&data, "b");
PinnedPathBuf::from_path(&path).unwrap_err();
let pinned_path = PinnedPathBuf::new(&root_path, "s").unwrap();
let data = fs::read_to_string(&pinned_path).unwrap();
assert_eq!(&data, "b");
// step2
barrier.wait();
barrier.wait();
// Verify it still points to the old target.
let data = fs::read_to_string(&pinned_path).unwrap();
assert_eq!(&data, "b");
thread.join().unwrap();
}
#[test]
fn test_new_pinned_path_buf() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
let path = PinnedPathBuf::from_path(rootfs_path).unwrap();
let _ = OpenOptions::new().read(true).open(&path).unwrap();
}
#[test]
fn test_pinned_path_try_clone() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
let path = PinnedPathBuf::from_path(rootfs_path).unwrap();
let path2 = path.try_clone().unwrap();
assert_ne!(path.as_path(), path2.as_path());
}
#[test]
fn test_new_pinned_path_buf_from_nonexist_file() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
PinnedPathBuf::new(rootfs_path, "does_not_exist").unwrap_err();
}
#[test]
fn test_new_pinned_path_buf_without_read_perm() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
let path = rootfs_path.join("write_only_file");
let mut file = OpenOptions::new()
.read(false)
.write(true)
.create(true)
.mode(0o200)
.open(&path)
.unwrap();
file.write_all(&[0xa5u8]).unwrap();
let md = fs::metadata(&path).unwrap();
let umask = unsafe { libc::umask(0022) };
unsafe { libc::umask(umask) };
assert_eq!(md.mode() & 0o700, 0o200 & !umask);
PinnedPathBuf::from_path(&path).unwrap();
}
#[test]
fn test_pinned_path_buf_path_fd() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
let path = rootfs_path.join("write_only_file");
let mut file = OpenOptions::new()
.read(false)
.write(true)
.create(true)
.mode(0o200)
.open(&path)
.unwrap();
file.write_all(&[0xa5u8]).unwrap();
let handle = PinnedPathBuf::from_path(&path).unwrap();
// Check that `fstat()` etc works with the fd returned by `path_fd()`.
let fd = handle.path_fd();
let mut stat: libc::stat = unsafe { std::mem::zeroed() };
let res = unsafe { libc::fstat(fd, &mut stat as *mut _) };
assert_eq!(res, 0);
}
#[test]
fn test_pinned_path_buf_open_child() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
let path = PinnedPathBuf::from_path(rootfs_path).unwrap();
fs::write(path.join("child"), "test").unwrap();
let path = path.open_child(OsStr::new("child")).unwrap();
let content = fs::read_to_string(&path).unwrap();
assert_eq!(&content, "test");
path.open_child(&OsString::from("__does_not_exist__"))
.unwrap_err();
path.open_child(&OsString::from("test/a")).unwrap_err();
}
#[test]
fn test_prepare_path_component() {
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from(".")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("..")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("/")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("//")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/b")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("./b")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/.")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/..")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/./")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/../")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/./a")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/../a")).is_err());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a")).is_ok());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a.b")).is_ok());
assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a..b")).is_ok());
}
#[test]
fn test_target_fs_object_changed() {
let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
let rootfs_path = rootfs_dir.path();
let file = rootfs_path.join("child");
fs::write(&file, "test").unwrap();
let path = PinnedPathBuf::from_path(&file).unwrap();
let path3 = fs::read_link(path.as_path()).unwrap();
assert_eq!(&path3, path.target());
fs::rename(file, rootfs_path.join("child2")).unwrap();
let path4 = fs::read_link(path.as_path()).unwrap();
assert_ne!(&path4, path.target());
fs::remove_file(rootfs_path.join("child2")).unwrap();
let path5 = fs::read_link(path.as_path()).unwrap();
assert_ne!(&path4, &path5);
}
}

View File

@@ -0,0 +1,294 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::io::{Error, ErrorKind, Result};
use std::path::Path;
use crate::{scoped_join, scoped_resolve, PinnedPathBuf};
const DIRECTORY_MODE_DEFAULT: u32 = 0o777;
const DIRECTORY_MODE_MASK: u32 = 0o777;
/// Safe version of `DirBuilder` to protect from TOCTOU style of attacks.
///
/// The `ScopedDirBuilder` is a counterpart for `DirBuilder`, with safety enhancements of:
/// - ensuring the new directories are created under a specified `root` directory.
/// - ensuring all created directories are still scoped under `root` even under symlink based
/// attacks.
/// - returning a [PinnedPathBuf] for the last level of directory, so it could be used for other
/// operations safely.
#[derive(Debug)]
pub struct ScopedDirBuilder {
root: PinnedPathBuf,
mode: u32,
recursive: bool,
}
impl ScopedDirBuilder {
/// Create a new instance of `ScopedDirBuilder` with with default mode/security settings.
pub fn new<P: AsRef<Path>>(root: P) -> Result<Self> {
let root = root.as_ref().canonicalize()?;
let root = PinnedPathBuf::from_path(root)?;
if !root.metadata()?.is_dir() {
return Err(Error::new(
ErrorKind::Other,
format!("Invalid root path: {}", root.display()),
));
}
Ok(ScopedDirBuilder {
root,
mode: DIRECTORY_MODE_DEFAULT,
recursive: false,
})
}
/// Indicates that directories should be created recursively, creating all parent directories.
///
/// Parents that do not exist are created with the same security and permissions settings.
pub fn recursive(&mut self, recursive: bool) -> &mut Self {
self.recursive = recursive;
self
}
/// Sets the mode to create new directories with. This option defaults to 0o755.
pub fn mode(&mut self, mode: u32) -> &mut Self {
self.mode = mode & DIRECTORY_MODE_MASK;
self
}
/// Creates the specified directory with the options configured in this builder.
///
/// This is a helper to create subdirectory with an absolute path, without stripping off
/// `self.root`. So error will be returned if path does start with `self.root`.
/// It is considered an error if the directory already exists unless recursive mode is enabled.
pub fn create_with_unscoped_path<P: AsRef<Path>>(&self, path: P) -> Result<PinnedPathBuf> {
if !path.as_ref().is_absolute() {
return Err(Error::new(
ErrorKind::Other,
format!(
"Expected absolute directory path: {}",
path.as_ref().display()
),
));
}
// Partially canonicalize `path` so we can strip the `root` part.
let scoped_path = scoped_join("/", path)?;
let stripped_path = scoped_path.strip_prefix(self.root.target()).map_err(|_| {
Error::new(
ErrorKind::Other,
format!(
"Path {} is not under {}",
scoped_path.display(),
self.root.target().display()
),
)
})?;
self.do_mkdir(&stripped_path)
}
/// Creates sub-directory with the options configured in this builder.
///
/// It is considered an error if the directory already exists unless recursive mode is enabled.
pub fn create<P: AsRef<Path>>(&self, path: P) -> Result<PinnedPathBuf> {
let path = scoped_resolve(&self.root, path)?;
self.do_mkdir(&path)
}
fn do_mkdir(&self, path: &Path) -> Result<PinnedPathBuf> {
assert!(path.is_relative());
if path.file_name().is_none() {
if !self.recursive {
return Err(Error::new(
ErrorKind::AlreadyExists,
"directory already exists",
));
} else {
return self.root.try_clone();
}
}
// Safe because `path` have at least one level.
let levels = path.iter().count() - 1;
let mut dir = self.root.try_clone()?;
for (idx, comp) in path.iter().enumerate() {
match dir.open_child(comp) {
Ok(v) => {
if !v.metadata()?.is_dir() {
return Err(Error::new(
ErrorKind::Other,
format!("Path {} is not a directory", v.display()),
));
} else if !self.recursive && idx == levels {
return Err(Error::new(
ErrorKind::AlreadyExists,
"directory already exists",
));
}
dir = v;
}
Err(_e) => {
if !self.recursive && idx != levels {
return Err(Error::new(
ErrorKind::NotFound,
format!("parent directory does not exist"),
));
}
dir = dir.mkdir(comp, self.mode)?;
}
}
}
Ok(dir)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::fs::DirBuilder;
use std::os::unix::fs::{symlink, MetadataExt};
use tempfile::tempdir;
#[test]
fn test_scoped_dir_builder() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
DirBuilder::new()
.create(rootfs_dir.path().join("b"))
.unwrap();
symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
let rootfs_path = &rootfs_dir.path().join("a");
// root directory doesn't exist
ScopedDirBuilder::new(rootfs_path.join("__does_not_exist__")).unwrap_err();
ScopedDirBuilder::new("__does_not_exist__").unwrap_err();
// root is a file
fs::write(rootfs_path.join("txt"), "test").unwrap();
ScopedDirBuilder::new(rootfs_path.join("txt")).unwrap_err();
let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap();
// file with the same name already exists.
builder
.create_with_unscoped_path(rootfs_path.join("txt"))
.unwrap_err();
// parent is a file
builder.create("/txt/a").unwrap_err();
// Not starting with root
builder.create_with_unscoped_path("/txt/a").unwrap_err();
// creating "." without recursive mode should fail
builder
.create_with_unscoped_path(rootfs_path.join("."))
.unwrap_err();
// parent doesn't exist
builder
.create_with_unscoped_path(rootfs_path.join("a/b"))
.unwrap_err();
builder.create("a/b/c").unwrap_err();
let path = builder.create("a").unwrap();
assert!(rootfs_path.join("a").is_dir());
assert_eq!(path.target(), rootfs_path.join("a").canonicalize().unwrap());
// Creating an existing directory without recursive mode should fail.
builder
.create_with_unscoped_path(rootfs_path.join("a"))
.unwrap_err();
// Creating an existing directory with recursive mode should succeed.
builder.recursive(true);
let path = builder
.create_with_unscoped_path(rootfs_path.join("a"))
.unwrap();
assert_eq!(path.target(), rootfs_path.join("a").canonicalize().unwrap());
let path = builder.create(".").unwrap();
assert_eq!(path.target(), rootfs_path.canonicalize().unwrap());
let umask = unsafe { libc::umask(0022) };
unsafe { libc::umask(umask) };
builder.mode(0o740);
let path = builder.create("a/b/c/d").unwrap();
assert_eq!(
path.target(),
rootfs_path.join("a/b/c/d").canonicalize().unwrap()
);
assert!(rootfs_path.join("a/b/c/d").is_dir());
assert_eq!(
rootfs_path.join("a").metadata().unwrap().mode() & 0o777,
DIRECTORY_MODE_DEFAULT & !umask,
);
assert_eq!(
rootfs_path.join("a/b").metadata().unwrap().mode() & 0o777,
0o740 & !umask
);
assert_eq!(
rootfs_path.join("a/b/c").metadata().unwrap().mode() & 0o777,
0o740 & !umask
);
assert_eq!(
rootfs_path.join("a/b/c/d").metadata().unwrap().mode() & 0o777,
0o740 & !umask
);
// Creating should fail if some components are not directory.
builder.create("txt/e/f").unwrap_err();
fs::write(rootfs_path.join("a/b/txt"), "test").unwrap();
builder.create("a/b/txt/h/i").unwrap_err();
}
#[test]
fn test_create_root() {
let mut builder = ScopedDirBuilder::new("/").unwrap();
builder.recursive(true);
builder.create("/").unwrap();
builder.create(".").unwrap();
builder.create("..").unwrap();
builder.create("../../.").unwrap();
builder.create("").unwrap();
builder.create_with_unscoped_path("/").unwrap();
builder.create_with_unscoped_path("/..").unwrap();
builder.create_with_unscoped_path("/../.").unwrap();
}
#[test]
fn test_create_with_absolute_path() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
DirBuilder::new()
.create(rootfs_dir.path().join("b"))
.unwrap();
symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
let rootfs_path = &rootfs_dir.path().join("a");
let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap();
builder.create_with_unscoped_path("/").unwrap_err();
builder
.create_with_unscoped_path(rootfs_path.join("../__xxxx___xxx__"))
.unwrap_err();
builder
.create_with_unscoped_path(rootfs_path.join("c/d"))
.unwrap_err();
// Return `AlreadyExist` when recursive is false
builder.create_with_unscoped_path(&rootfs_path).unwrap_err();
builder
.create_with_unscoped_path(rootfs_path.join("."))
.unwrap_err();
builder.recursive(true);
builder.create_with_unscoped_path(&rootfs_path).unwrap();
builder
.create_with_unscoped_path(rootfs_path.join("."))
.unwrap();
builder
.create_with_unscoped_path(rootfs_path.join("c/d"))
.unwrap();
}
}

View File

@@ -0,0 +1,415 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
use std::io::{Error, ErrorKind, Result};
use std::path::{Component, Path, PathBuf};
// Follow the same configuration as
// [secure_join](https://github.com/cyphar/filepath-securejoin/blob/master/join.go#L51)
const MAX_SYMLINK_DEPTH: u32 = 255;
fn do_scoped_resolve<R: AsRef<Path>, U: AsRef<Path>>(
root: R,
unsafe_path: U,
) -> Result<(PathBuf, PathBuf)> {
let root = root.as_ref().canonicalize()?;
let mut nlinks = 0u32;
let mut curr_path = unsafe_path.as_ref().to_path_buf();
'restart: loop {
let mut subpath = PathBuf::new();
let mut iter = curr_path.components();
'next_comp: while let Some(comp) = iter.next() {
match comp {
// Linux paths don't have prefixes.
Component::Prefix(_) => {
return Err(Error::new(
ErrorKind::Other,
format!("Invalid path prefix in: {}", unsafe_path.as_ref().display()),
));
}
// `RootDir` should always be the first component, and Path::components() ensures
// that.
Component::RootDir | Component::CurDir => {
continue 'next_comp;
}
Component::ParentDir => {
subpath.pop();
}
Component::Normal(n) => {
let path = root.join(&subpath).join(n);
if let Ok(v) = path.read_link() {
nlinks += 1;
if nlinks > MAX_SYMLINK_DEPTH {
return Err(Error::new(
ErrorKind::Other,
format!(
"Too many levels of symlinks: {}",
unsafe_path.as_ref().display()
),
));
}
curr_path = if v.is_absolute() {
v.join(iter.as_path())
} else {
subpath.join(v).join(iter.as_path())
};
continue 'restart;
} else {
subpath.push(n);
}
}
}
}
return Ok((root, subpath));
}
}
/// Resolve `unsafe_path` to a relative path, rooted at and constrained by `root`.
///
/// The `scoped_resolve()` function assumes `root` exists and is an absolute path. It processes
/// each path component in `unsafe_path` as below:
/// - assume it's not a symlink and output if the component doesn't exist yet.
/// - ignore if it's "/" or ".".
/// - go to parent directory but constrained by `root` if it's "..".
/// - recursively resolve to the real path if it's a symlink. All symlink resolutions will be
/// constrained by `root`.
/// - otherwise output the path component.
///
/// # Arguments
/// - `root`: the absolute path to constrain the symlink resolution.
/// - `unsafe_path`: the path to resolve.
///
/// Note that the guarantees provided by this function only apply if the path components in the
/// returned PathBuf are not modified (in other words are not replaced with symlinks on the
/// filesystem) after this function has returned. You may use [crate::PinnedPathBuf] to protect
/// from such TOCTOU attacks.
pub fn scoped_resolve<R: AsRef<Path>, U: AsRef<Path>>(root: R, unsafe_path: U) -> Result<PathBuf> {
do_scoped_resolve(root, unsafe_path).map(|(_root, path)| path)
}
/// Safely join `unsafe_path` to `root`, and ensure `unsafe_path` is scoped under `root`.
///
/// The `scoped_join()` function assumes `root` exists and is an absolute path. It safely joins the
/// two given paths and ensures:
/// - The returned path is guaranteed to be scoped inside `root`.
/// - Any symbolic links in the path are evaluated with the given `root` treated as the root of the
/// filesystem, similar to a chroot.
///
/// It's modelled after [secure_join](https://github.com/cyphar/filepath-securejoin), but only
/// for Linux systems.
///
/// # Arguments
/// - `root`: the absolute path to scope the symlink evaluation.
/// - `unsafe_path`: the path to evaluated and joint with `root`. It is unsafe since it may try to
/// escape from the `root` by using "../" or symlinks.
///
/// # Security
/// On success return, the `scoped_join()` function guarantees that:
/// - The resulting PathBuf must be a child path of `root` and will not contain any symlink path
/// components (they will all get expanded).
/// - When expanding symlinks, all symlink path components must be resolved relative to the provided
/// `root`. In particular, this can be considered a userspace implementation of how chroot(2)
/// operates on file paths.
/// - Non-existent path components are unaffected.
///
/// Note that the guarantees provided by this function only apply if the path components in the
/// returned string are not modified (in other words are not replaced with symlinks on the
/// filesystem) after this function has returned. You may use [crate::PinnedPathBuf] to protect
/// from such TOCTTOU attacks.
pub fn scoped_join<R: AsRef<Path>, U: AsRef<Path>>(root: R, unsafe_path: U) -> Result<PathBuf> {
do_scoped_resolve(root, unsafe_path).map(|(root, path)| root.join(path))
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::DirBuilder;
use std::os::unix::fs;
use tempfile::tempdir;
#[allow(dead_code)]
#[derive(Debug)]
struct TestData<'a> {
name: &'a str,
rootfs: &'a Path,
unsafe_path: &'a str,
result: &'a str,
}
fn exec_tests(tests: &[TestData]) {
for (i, t) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{}]: {:?}", i, t);
let result = scoped_resolve(t.rootfs, t.unsafe_path).unwrap();
let msg = format!("{}, result: {:?}", msg, result);
// Perform the checks
assert_eq!(&result, Path::new(t.result), "{}", msg);
}
}
#[test]
fn test_scoped_resolve() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
DirBuilder::new()
.create(rootfs_dir.path().join("b"))
.unwrap();
fs::symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
let rootfs_path = &rootfs_dir.path().join("a");
let tests = [
TestData {
name: "normal path",
rootfs: rootfs_path,
unsafe_path: "a/b/c",
result: "a/b/c",
},
TestData {
name: "path with .. at beginning",
rootfs: rootfs_path,
unsafe_path: "../../../a/b/c",
result: "a/b/c",
},
TestData {
name: "path with complex .. pattern",
rootfs: rootfs_path,
unsafe_path: "../../../a/../../b/../../c",
result: "c",
},
TestData {
name: "path with .. in middle",
rootfs: rootfs_path,
unsafe_path: "/usr/bin/../../bin/ls",
result: "bin/ls",
},
TestData {
name: "path with . and ..",
rootfs: rootfs_path,
unsafe_path: "/usr/./bin/../../bin/./ls",
result: "bin/ls",
},
TestData {
name: "path with . at end",
rootfs: rootfs_path,
unsafe_path: "/usr/./bin/../../bin/./ls/.",
result: "bin/ls",
},
TestData {
name: "path try to escape by ..",
rootfs: rootfs_path,
unsafe_path: "/usr/./bin/../../../../bin/./ls/../ls",
result: "bin/ls",
},
TestData {
name: "path with .. at the end",
rootfs: rootfs_path,
unsafe_path: "/usr/./bin/../../bin/./ls/..",
result: "bin",
},
TestData {
name: "path ..",
rootfs: rootfs_path,
unsafe_path: "..",
result: "",
},
TestData {
name: "path .",
rootfs: rootfs_path,
unsafe_path: ".",
result: "",
},
TestData {
name: "path /",
rootfs: rootfs_path,
unsafe_path: "/",
result: "",
},
TestData {
name: "empty path",
rootfs: rootfs_path,
unsafe_path: "",
result: "",
},
];
exec_tests(&tests);
}
#[test]
fn test_scoped_resolve_invalid() {
scoped_resolve("./root_is_not_absolute_path", ".").unwrap_err();
scoped_resolve("C:", ".").unwrap_err();
scoped_resolve(r#"\\server\test"#, ".").unwrap_err();
scoped_resolve(r#"http://localhost/test"#, ".").unwrap_err();
// Chinese Unicode characters
scoped_resolve(r#"您好"#, ".").unwrap_err();
}
#[test]
fn test_scoped_resolve_symlink() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
let rootfs_path = &rootfs_dir.path();
std::fs::create_dir(rootfs_path.join("symlink_dir")).unwrap();
fs::symlink("../../../", rootfs_path.join("1")).unwrap();
let tests = [TestData {
name: "relative symlink beyond root",
rootfs: rootfs_path,
unsafe_path: "1",
result: "",
}];
exec_tests(&tests);
fs::symlink("/dddd", rootfs_path.join("2")).unwrap();
let tests = [TestData {
name: "abs symlink pointing to non-exist directory",
rootfs: rootfs_path,
unsafe_path: "2",
result: "dddd",
}];
exec_tests(&tests);
fs::symlink("/", rootfs_path.join("3")).unwrap();
let tests = [TestData {
name: "abs symlink pointing to /",
rootfs: rootfs_path,
unsafe_path: "3",
result: "",
}];
exec_tests(&tests);
fs::symlink("usr/bin/../bin/ls", rootfs_path.join("4")).unwrap();
let tests = [TestData {
name: "symlink with one ..",
rootfs: rootfs_path,
unsafe_path: "4",
result: "usr/bin/ls",
}];
exec_tests(&tests);
fs::symlink("usr/bin/../../bin/ls", rootfs_path.join("5")).unwrap();
let tests = [TestData {
name: "symlink with two ..",
rootfs: rootfs_path,
unsafe_path: "5",
result: "bin/ls",
}];
exec_tests(&tests);
fs::symlink(
"../usr/bin/../../../bin/ls",
rootfs_path.join("symlink_dir/6"),
)
.unwrap();
let tests = [TestData {
name: "symlink try to escape",
rootfs: rootfs_path,
unsafe_path: "symlink_dir/6",
result: "bin/ls",
}];
exec_tests(&tests);
// Detect symlink loop.
fs::symlink("/endpoint_b", rootfs_path.join("endpoint_a")).unwrap();
fs::symlink("/endpoint_a", rootfs_path.join("endpoint_b")).unwrap();
scoped_resolve(rootfs_path, "endpoint_a").unwrap_err();
}
#[test]
fn test_scoped_join() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
let rootfs_path = &rootfs_dir.path();
assert_eq!(
scoped_join(&rootfs_path, "a").unwrap(),
rootfs_path.join("a")
);
assert_eq!(
scoped_join(&rootfs_path, "./a").unwrap(),
rootfs_path.join("a")
);
assert_eq!(
scoped_join(&rootfs_path, "././a").unwrap(),
rootfs_path.join("a")
);
assert_eq!(
scoped_join(&rootfs_path, "c/d/../../a").unwrap(),
rootfs_path.join("a")
);
assert_eq!(
scoped_join(&rootfs_path, "c/d/../../../.././a").unwrap(),
rootfs_path.join("a")
);
assert_eq!(
scoped_join(&rootfs_path, "../../a").unwrap(),
rootfs_path.join("a")
);
assert_eq!(
scoped_join(&rootfs_path, "./../a").unwrap(),
rootfs_path.join("a")
);
}
#[test]
fn test_scoped_join_symlink() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
let rootfs_path = &rootfs_dir.path();
DirBuilder::new()
.recursive(true)
.create(rootfs_dir.path().join("b/c"))
.unwrap();
fs::symlink("b/c", rootfs_dir.path().join("a")).unwrap();
let target = rootfs_path.join("b/c");
assert_eq!(scoped_join(&rootfs_path, "a").unwrap(), target);
assert_eq!(scoped_join(&rootfs_path, "./a").unwrap(), target);
assert_eq!(scoped_join(&rootfs_path, "././a").unwrap(), target);
assert_eq!(scoped_join(&rootfs_path, "b/c/../../a").unwrap(), target);
assert_eq!(
scoped_join(&rootfs_path, "b/c/../../../.././a").unwrap(),
target
);
assert_eq!(scoped_join(&rootfs_path, "../../a").unwrap(), target);
assert_eq!(scoped_join(&rootfs_path, "./../a").unwrap(), target);
assert_eq!(scoped_join(&rootfs_path, "a/../../../a").unwrap(), target);
assert_eq!(scoped_join(&rootfs_path, "a/../../../b/c").unwrap(), target);
}
#[test]
fn test_scoped_join_symlink_loop() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
let rootfs_path = &rootfs_dir.path();
fs::symlink("/endpoint_b", rootfs_path.join("endpoint_a")).unwrap();
fs::symlink("/endpoint_a", rootfs_path.join("endpoint_b")).unwrap();
scoped_join(rootfs_path, "endpoint_a").unwrap_err();
}
#[test]
fn test_scoped_join_unicode_character() {
// create temporary directory to emulate container rootfs with symlink
let rootfs_dir = tempdir().expect("failed to create tmpdir");
let rootfs_path = &rootfs_dir.path().canonicalize().unwrap();
let path = scoped_join(rootfs_path, "您好").unwrap();
assert_eq!(path, rootfs_path.join("您好"));
let path = scoped_join(rootfs_path, "../../../您好").unwrap();
assert_eq!(path, rootfs_path.join("您好"));
let path = scoped_join(rootfs_path, "。。/您好").unwrap();
assert_eq!(path, rootfs_path.join("。。/您好"));
let path = scoped_join(rootfs_path, "您好/../../test").unwrap();
assert_eq!(path, rootfs_path.join("test"));
}
}

View File

@@ -2,6 +2,7 @@
*.patch
*.swp
coverage.txt
coverage.txt.tmp
coverage.html
.git-commit
.git-commit.tmp

View File

@@ -176,6 +176,9 @@ DEFDISABLEBLOCK := false
DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
ifeq ($(ARCH),amd64)
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd
endif
DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
# Default DAX mapping cache size in MiB
#if value is 0, DAX is not enabled
@@ -596,7 +599,7 @@ hook:
go-test: $(GENERATED_FILES)
go clean -testcache
$(QUIET_TEST)../../ci/go-test.sh
$(QUIET_TEST)./go-test.sh
fast-test: $(GENERATED_FILES)
go clean -testcache

View File

@@ -10,6 +10,7 @@ This repository contains the following components:
|-|-|
| `containerd-shim-kata-v2` | The [shimv2 runtime](../../docs/design/architecture/README.md#runtime) |
| `kata-runtime` | [utility program](../../docs/design/architecture/README.md#utility-program) |
| `kata-monitor` | [metrics collector daemon](cmd/kata-monitor/README.md) |
For details of the other Kata Containers repositories, see the
[repository summary](https://github.com/kata-containers/kata-containers).

View File

@@ -0,0 +1,68 @@
# Kata monitor
## Overview
`kata-monitor` is a daemon able to collect and expose metrics related to all the Kata Containers workloads running on the same host.
Once started, it detects all the running Kata Containers runtimes (`containerd-shim-kata-v2`) in the system and exposes few http endpoints to allow the retrieval of the available data.
The main endpoint is the `/metrics` one which aggregates metrics from all the kata workloads.
Available metrics include:
* Kata runtime metrics
* Kata agent metrics
* Kata guest OS metrics
* Hypervisor metrics
* Firecracker metrics
* Kata monitor metrics
All the provided metrics are in Prometheus format. While `kata-monitor` can be used as a standalone daemon on any host running Kata Containers workloads and can be used for retrieving profiling data from the running Kata runtimes, its main expected usage is to be deployed as a DaemonSet on a Kubernetes cluster: there Prometheus should scrape the metrics from the kata-monitor endpoints.
For more information on the Kata Containers metrics architecture and a detailed list of the available metrics provided by Kata monitor check the [Kata 2.0 Metrics Design](../../../../docs/design/kata-2-0-metrics.md) document.
## Usage
Each `kata-monitor` instance detects and monitors the Kata Container workloads running on the same node.
### Kata monitor arguments
The `kata-monitor` binary accepts the following arguments:
* `--listen-address` _IP:PORT_
* `--runtime-enpoint` _PATH_TO_THE_CONTAINER_MANAGER_CRI_INTERFACE_
* `--log-level` _[ trace | debug | info | warn | error | fatal | panic ]_
The **listen-address** specifies the IP and TCP port where the kata-monitor HTTP endpoints will be exposed. It defaults to `127.0.0.1:8090`.
The **runtime-endpoint** is the CRI of a CRI compliant container manager: it will be used to retrieve the CRI `PodSandboxMetadata` (`uid`, `name` and `namespace`) which will be attached to the Kata metrics through the labels `cri_uid`, `cri_name` and `cri_namespace`. It defaults to the containerd socket: `/run/containerd/containerd.sock`.
The **log-level** allows the chose how verbose the logs should be. The default is `info`.
### Kata monitor HTTP endpoints
`kata-monitor` exposes the following endpoints:
* `/metrics` : get Kata sandboxes metrics.
* `/sandboxes` : list all the Kata sandboxes running on the host.
* `/agent-url` : Get the agent URL of a Kata sandbox.
* `/debug/vars` : Internal data of the Kata runtime shim.
* `/debug/pprof/` : Golang profiling data of the Kata runtime shim: index page.
* `/debug/pprof/cmdline` : Golang profiling data of the Kata runtime shim: `cmdline` endpoint.
* `/debug/pprof/profile` : Golang profiling data of the Kata runtime shim: `profile` endpoint (CPU profiling).
* `/debug/pprof/symbol` : Golang profiling data of the Kata runtime shim: `symbol` endpoint.
* `/debug/pprof/trace` : Golang profiling data of the Kata runtime shim: `trace` endpoint.
**NOTE: The debug endpoints are available only if the [Kata Containers configuration file](https://github.com/kata-containers/kata-containers/blob/9d5b03a1b70bbd175237ec4b9f821d6ccee0a1f6/src/runtime/config/configuration-qemu.toml.in#L590-L592) includes** `enable_pprof = true` **in the** `[runtime]` **section**.
The `/sandboxes` endpoint lists the _sandbox ID_ of all the detected Kata runtimes. If accessed via a web browser, it provides html links to the endpoints available for each sandbox.
In order to retrieve data for a specific Kata workload, the _sandbox ID_ should be passed in the query string using the _sandbox_ key. The `/agent-url`, and all the `/debug/`* endpoints require `sandbox_id` to be specified in the query string.
<br>
#### Examples
Retrieve the IDs of the available sandboxes:
```bash
$ curl 127.0.0.1:8090/sandboxes
```
output:
```
6fcf0a90b01e90d8747177aa466c3462d02e02a878bc393649df83d4c314af0c
df96b24bd49ec437c872c1a758edc084121d607ce1242ff5d2263a0e1b693343
```
Retrieve the `agent-url` of the sandbox with ID _df96b24bd49ec437c872c1a758edc084121d607ce1242ff5d2263a0e1b693343_:
```bash
$ curl 127.0.0.1:8090/agent-url?sandbox=df96b24bd49ec437c872c1a758edc084121d607ce1242ff5d2263a0e1b693343
```
output:
```
vsock://830455376:1024
```

View File

@@ -175,6 +175,15 @@ func main() {
}
func indexPage(w http.ResponseWriter, r *http.Request) {
htmlResponse := kataMonitor.IfReturnHTMLResponse(w, r)
if htmlResponse {
indexPageHTML(w, r)
} else {
indexPageText(w, r)
}
}
func indexPageText(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("Available HTTP endpoints:\n"))
spacing := 0
@@ -184,13 +193,35 @@ func indexPage(w http.ResponseWriter, r *http.Request) {
}
}
spacing = spacing + 3
formatter := fmt.Sprintf("%%-%ds: %%s\n", spacing)
formattedString := fmt.Sprintf("%%-%ds: %%s\n", spacing)
for _, endpoint := range endpoints {
w.Write([]byte(fmt.Sprintf(formattedString, endpoint.path, endpoint.desc)))
w.Write([]byte(fmt.Sprintf(formatter, endpoint.path, endpoint.desc)))
}
}
func indexPageHTML(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("<h1>Available HTTP endpoints:</h1>\n"))
var formattedString string
needLinkPaths := []string{"/metrics", "/sandboxes"}
w.Write([]byte("<ul>"))
for _, endpoint := range endpoints {
formattedString = fmt.Sprintf("<b>%s</b>: %s\n", endpoint.path, endpoint.desc)
for _, linkPath := range needLinkPaths {
if linkPath == endpoint.path {
formattedString = fmt.Sprintf("<b><a href='%s'>%s</a></b>: %s\n", endpoint.path, endpoint.path, endpoint.desc)
break
}
}
formattedString = fmt.Sprintf("<li>%s</li>", formattedString)
w.Write([]byte(formattedString))
}
w.Write([]byte("</ul>"))
}
// initLog setup logger
func initLog() {
kataMonitorLog := logrus.WithFields(logrus.Fields{

View File

@@ -8,7 +8,6 @@ package main
import (
"context"
"flag"
"os"
"testing"
"github.com/stretchr/testify/assert"
@@ -43,9 +42,7 @@ func TestFactoryCLIFunctionNoRuntimeConfig(t *testing.T) {
func TestFactoryCLIFunctionInit(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -92,9 +89,7 @@ func TestFactoryCLIFunctionInit(t *testing.T) {
func TestFactoryCLIFunctionDestroy(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -126,9 +121,7 @@ func TestFactoryCLIFunctionDestroy(t *testing.T) {
func TestFactoryCLIFunctionStatus(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)

View File

@@ -71,11 +71,7 @@ func TestCCCheckCLIFunction(t *testing.T) {
func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedSysModuleDir := sysModuleDir
savedProcCPUInfo := procCPUInfo
@@ -91,7 +87,7 @@ func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
procCPUInfo = savedProcCPUInfo
}()
err = os.MkdirAll(sysModuleDir, testDirMode)
err := os.MkdirAll(sysModuleDir, testDirMode)
if err != nil {
t.Fatal(err)
}
@@ -156,11 +152,7 @@ func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
func TestCheckCheckKernelModulesNoUnrestrictedGuest(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedSysModuleDir := sysModuleDir
savedProcCPUInfo := procCPUInfo
@@ -176,7 +168,7 @@ func TestCheckCheckKernelModulesNoUnrestrictedGuest(t *testing.T) {
procCPUInfo = savedProcCPUInfo
}()
err = os.MkdirAll(sysModuleDir, testDirMode)
err := os.MkdirAll(sysModuleDir, testDirMode)
if err != nil {
t.Fatal(err)
}
@@ -255,11 +247,7 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedSysModuleDir := sysModuleDir
savedProcCPUInfo := procCPUInfo
@@ -275,7 +263,7 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
procCPUInfo = savedProcCPUInfo
}()
err = os.MkdirAll(sysModuleDir, testDirMode)
err := os.MkdirAll(sysModuleDir, testDirMode)
if err != nil {
t.Fatal(err)
}
@@ -405,11 +393,7 @@ func TestArchKernelParamHandler(t *testing.T) {
func TestKvmIsUsable(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedKvmDevice := kvmDevice
fakeKVMDevice := filepath.Join(dir, "kvm")
@@ -419,7 +403,7 @@ func TestKvmIsUsable(t *testing.T) {
kvmDevice = savedKvmDevice
}()
err = kvmIsUsable()
err := kvmIsUsable()
assert.Error(err)
err = createEmptyFile(fakeKVMDevice)
@@ -457,9 +441,7 @@ foo : bar
func TestSetCPUtype(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedArchRequiredCPUFlags := archRequiredCPUFlags
savedArchRequiredCPUAttribs := archRequiredCPUAttribs

View File

@@ -67,11 +67,7 @@ foo : bar
{validContents, validNormalizeVendorName, validNormalizeModelName, false},
}
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedProcCPUInfo := procCPUInfo
@@ -84,7 +80,7 @@ foo : bar
procCPUInfo = savedProcCPUInfo
}()
_, _, err = getCPUDetails()
_, _, err := getCPUDetails()
// ENOENT
assert.Error(t, err)
assert.True(t, os.IsNotExist(err))

View File

@@ -9,7 +9,6 @@
package main
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
@@ -18,9 +17,7 @@ import (
func testSetCPUTypeGeneric(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedArchRequiredCPUFlags := archRequiredCPUFlags
savedArchRequiredCPUAttribs := archRequiredCPUAttribs

View File

@@ -7,7 +7,6 @@ package main
import (
"fmt"
"os"
"path/filepath"
"testing"
@@ -118,11 +117,7 @@ func TestArchKernelParamHandler(t *testing.T) {
func TestKvmIsUsable(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedKvmDevice := kvmDevice
fakeKVMDevice := filepath.Join(dir, "kvm")
@@ -132,7 +127,7 @@ func TestKvmIsUsable(t *testing.T) {
kvmDevice = savedKvmDevice
}()
err = kvmIsUsable()
err := kvmIsUsable()
assert.Error(err)
err = createEmptyFile(fakeKVMDevice)

View File

@@ -7,7 +7,6 @@ package main
import (
"fmt"
"os"
"path/filepath"
"testing"
@@ -117,11 +116,7 @@ func TestArchKernelParamHandler(t *testing.T) {
func TestKvmIsUsable(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedKvmDevice := kvmDevice
fakeKVMDevice := filepath.Join(dir, "kvm")
@@ -131,7 +126,7 @@ func TestKvmIsUsable(t *testing.T) {
kvmDevice = savedKvmDevice
}()
err = kvmIsUsable()
err := kvmIsUsable()
assert.Error(err)
err = createEmptyFile(fakeKVMDevice)

View File

@@ -155,11 +155,7 @@ func makeCPUInfoFile(path, vendorID, flags string) error {
// nolint: unused, deadcode
func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel string, validContents string, data []testCPUDetail) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedProcCPUInfo := procCPUInfo
@@ -172,7 +168,7 @@ func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel strin
procCPUInfo = savedProcCPUInfo
}()
_, _, err = getCPUDetails()
_, _, err := getCPUDetails()
// ENOENT
assert.Error(t, err)
assert.True(t, os.IsNotExist(err))
@@ -197,11 +193,7 @@ func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel strin
func genericCheckCLIFunction(t *testing.T, cpuData []testCPUData, moduleData []testModuleData) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
_, config, err := makeRuntimeConfig(dir)
assert.NoError(err)
@@ -307,15 +299,11 @@ func TestCheckGetCPUInfo(t *testing.T) {
{"foo\n\nbar\nbaz\n\n", "foo", false},
}
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
file := filepath.Join(dir, "cpuinfo")
// file doesn't exist
_, err = getCPUInfo(file)
_, err := getCPUInfo(file)
assert.Error(err)
for _, d := range data {
@@ -527,11 +515,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedModProbeCmd := modProbeCmd
savedSysModuleDir := sysModuleDir
@@ -545,7 +529,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
sysModuleDir = savedSysModuleDir
}()
err = os.MkdirAll(sysModuleDir, testDirMode)
err := os.MkdirAll(sysModuleDir, testDirMode)
if err != nil {
t.Fatal(err)
}
@@ -577,11 +561,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
func TestCheckCheckKernelModules(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedModProbeCmd := modProbeCmd
savedSysModuleDir := sysModuleDir
@@ -595,7 +575,7 @@ func TestCheckCheckKernelModules(t *testing.T) {
sysModuleDir = savedSysModuleDir
}()
err = os.MkdirAll(sysModuleDir, testDirMode)
err := os.MkdirAll(sysModuleDir, testDirMode)
if err != nil {
t.Fatal(err)
}
@@ -662,11 +642,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
t.Skip(ktu.TestDisabledNeedNonRoot)
}
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
testData := map[string]kernelModule{
"foo": {
@@ -691,7 +667,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
}()
modPath := filepath.Join(sysModuleDir, "foo/parameters")
err = os.MkdirAll(modPath, testDirMode)
err := os.MkdirAll(modPath, testDirMode)
assert.NoError(err)
modParamFile := filepath.Join(modPath, "param1")
@@ -710,11 +686,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
testData := map[string]kernelModule{
"foo": {
@@ -739,7 +711,7 @@ func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
}()
modPath := filepath.Join(sysModuleDir, "foo/parameters")
err = os.MkdirAll(modPath, testDirMode)
err := os.MkdirAll(modPath, testDirMode)
assert.NoError(err)
modParamFile := filepath.Join(modPath, "param1")
@@ -755,11 +727,7 @@ func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
func TestCheckCLIFunctionFail(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
_, config, err := makeRuntimeConfig(dir)
assert.NoError(err)
@@ -788,11 +756,7 @@ func TestCheckCLIFunctionFail(t *testing.T) {
func TestCheckKernelParamHandler(t *testing.T) {
assert := assert.New(t)
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedModProbeCmd := modProbeCmd
savedSysModuleDir := sysModuleDir
@@ -870,9 +834,7 @@ func TestCheckKernelParamHandler(t *testing.T) {
func TestArchRequiredKernelModules(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
@@ -885,11 +847,7 @@ func TestArchRequiredKernelModules(t *testing.T) {
return
}
dir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
savedModProbeCmd := modProbeCmd
savedSysModuleDir := sysModuleDir

View File

@@ -6,7 +6,6 @@
package main
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
@@ -22,9 +21,7 @@ func getExpectedHostDetails(tmpdir string) (HostInfo, error) {
func TestEnvGetEnvInfoSetsCPUType(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedArchRequiredCPUFlags := archRequiredCPUFlags
savedArchRequiredCPUAttribs := archRequiredCPUAttribs

View File

@@ -9,7 +9,6 @@
package main
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
@@ -18,9 +17,7 @@ import (
func testEnvGetEnvInfoSetsCPUTypeGeneric(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedArchRequiredCPUFlags := archRequiredCPUFlags
savedArchRequiredCPUAttribs := archRequiredCPUAttribs

View File

@@ -364,11 +364,7 @@ func TestEnvGetMetaInfo(t *testing.T) {
}
func TestEnvGetHostInfo(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
expectedHostDetails, err := getExpectedHostDetails(tmpdir)
assert.NoError(t, err)
@@ -389,13 +385,9 @@ func TestEnvGetHostInfo(t *testing.T) {
}
func TestEnvGetHostInfoNoProcCPUInfo(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, err = getExpectedHostDetails(tmpdir)
_, err := getExpectedHostDetails(tmpdir)
assert.NoError(t, err)
err = os.Remove(procCPUInfo)
@@ -406,13 +398,9 @@ func TestEnvGetHostInfoNoProcCPUInfo(t *testing.T) {
}
func TestEnvGetHostInfoNoOSRelease(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, err = getExpectedHostDetails(tmpdir)
_, err := getExpectedHostDetails(tmpdir)
assert.NoError(t, err)
err = os.Remove(osRelease)
@@ -423,13 +411,9 @@ func TestEnvGetHostInfoNoOSRelease(t *testing.T) {
}
func TestEnvGetHostInfoNoProcVersion(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, err = getExpectedHostDetails(tmpdir)
_, err := getExpectedHostDetails(tmpdir)
assert.NoError(t, err)
err = os.Remove(procVersion)
@@ -440,11 +424,7 @@ func TestEnvGetHostInfoNoProcVersion(t *testing.T) {
}
func TestEnvGetEnvInfo(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
// Run test twice to ensure the individual component debug+trace
// options are tested.
@@ -474,9 +454,7 @@ func TestEnvGetEnvInfo(t *testing.T) {
func TestEnvGetEnvInfoNoHypervisorVersion(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
@@ -501,20 +479,14 @@ func TestEnvGetEnvInfoNoHypervisorVersion(t *testing.T) {
func TestEnvGetEnvInfoAgentError(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, _, err = makeRuntimeConfig(tmpdir)
_, _, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
}
func TestEnvGetEnvInfoNoOSRelease(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -530,11 +502,7 @@ func TestEnvGetEnvInfoNoOSRelease(t *testing.T) {
}
func TestEnvGetEnvInfoNoProcCPUInfo(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -550,11 +518,7 @@ func TestEnvGetEnvInfoNoProcCPUInfo(t *testing.T) {
}
func TestEnvGetEnvInfoNoProcVersion(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -570,11 +534,7 @@ func TestEnvGetEnvInfoNoProcVersion(t *testing.T) {
}
func TestEnvGetRuntimeInfo(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -587,11 +547,7 @@ func TestEnvGetRuntimeInfo(t *testing.T) {
}
func TestEnvGetAgentInfo(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -726,11 +682,7 @@ func testEnvShowJSONSettings(t *testing.T, tmpdir string, tmpfile *os.File) erro
}
func TestEnvShowSettings(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
tmpfile, err := os.CreateTemp("", "envShowSettings-")
assert.NoError(t, err)
@@ -747,11 +699,7 @@ func TestEnvShowSettings(t *testing.T) {
}
func TestEnvShowSettingsInvalidFile(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
tmpfile, err := os.CreateTemp("", "envShowSettings-")
assert.NoError(t, err)
@@ -771,11 +719,7 @@ func TestEnvShowSettingsInvalidFile(t *testing.T) {
}
func TestEnvHandleSettings(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -805,9 +749,7 @@ func TestEnvHandleSettings(t *testing.T) {
func TestEnvHandleSettingsInvalidParams(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, _, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
@@ -859,11 +801,7 @@ func TestEnvHandleSettingsInvalidRuntimeConfigType(t *testing.T) {
}
func TestEnvCLIFunction(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -904,11 +842,7 @@ func TestEnvCLIFunction(t *testing.T) {
}
func TestEnvCLIFunctionFail(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
configFile, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(t, err)
@@ -940,9 +874,7 @@ func TestEnvCLIFunctionFail(t *testing.T) {
func TestGetHypervisorInfo(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
@@ -962,9 +894,7 @@ func TestGetHypervisorInfo(t *testing.T) {
func TestGetHypervisorInfoSocket(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)

View File

@@ -0,0 +1,122 @@
// Copyright (c) 2022 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
"io/ioutil"
containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils/shimclient"
"github.com/urfave/cli"
)
var (
sandboxID string
isIPv6 bool
)
var iptablesSubCmds = []cli.Command{
getIPTablesCommand,
setIPTablesCommand,
}
var kataIPTablesCommand = cli.Command{
Name: "iptables",
Usage: "get or set iptables within the Kata Containers guest",
Subcommands: iptablesSubCmds,
Action: func(context *cli.Context) {
cli.ShowSubcommandHelp(context)
},
}
var getIPTablesCommand = cli.Command{
Name: "get",
Usage: "get iptables from the Kata Containers guest",
Flags: []cli.Flag{
cli.StringFlag{
Name: "sandbox-id",
Usage: "the target sandbox for getting the iptables",
Required: true,
Destination: &sandboxID,
},
cli.BoolFlag{
Name: "v6",
Usage: "indicate we're requesting ipv6 iptables",
Destination: &isIPv6,
},
},
Action: func(c *cli.Context) error {
// verify sandbox exists:
if err := katautils.VerifyContainerID(sandboxID); err != nil {
return err
}
url := containerdshim.IPTablesUrl
if isIPv6 {
url = containerdshim.IP6TablesUrl
}
body, err := shimclient.DoGet(sandboxID, defaultTimeout, url)
if err != nil {
return err
}
fmt.Println(string(body))
return nil
},
}
var setIPTablesCommand = cli.Command{
Name: "set",
Usage: "set iptables in a specifc Kata Containers guest based on file",
Flags: []cli.Flag{
cli.StringFlag{
Name: "sandbox-id",
Usage: "the target sandbox for setting the iptables",
Required: true,
Destination: &sandboxID,
},
cli.BoolFlag{
Name: "v6",
Usage: "indicate we're requesting ipv6 iptables",
Destination: &isIPv6,
},
},
Action: func(c *cli.Context) error {
iptablesFile := c.Args().Get(0)
// verify sandbox exists:
if err := katautils.VerifyContainerID(sandboxID); err != nil {
return err
}
// verify iptables were provided:
if iptablesFile == "" {
return fmt.Errorf("iptables file not provided")
}
if !katautils.FileExists(iptablesFile) {
return fmt.Errorf("iptables file does not exist: %s", iptablesFile)
}
// Read file into buffer, and make request to the appropriate shim
buf, err := ioutil.ReadFile(iptablesFile)
if err != nil {
return err
}
url := containerdshim.IPTablesUrl
if isIPv6 {
url = containerdshim.IP6TablesUrl
}
if err = shimclient.DoPut(sandboxID, defaultTimeout, url, "application/octet-stream", buf); err != nil {
return fmt.Errorf("Error observed when making iptables-set request(%s): %s", iptablesFile, err)
}
return nil
},
}

View File

@@ -7,10 +7,11 @@ package main
import (
"encoding/json"
"fmt"
"net/url"
containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
"github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils/shimclient"
"github.com/urfave/cli"
@@ -54,7 +55,10 @@ var addCommand = cli.Command{
},
},
Action: func(c *cli.Context) error {
return volume.Add(volumePath, mountInfo)
if err := volume.Add(volumePath, mountInfo); err != nil {
return cli.NewExitError(err.Error(), 1)
}
return nil
},
}
@@ -69,7 +73,10 @@ var removeCommand = cli.Command{
},
},
Action: func(c *cli.Context) error {
return volume.Remove(volumePath)
if err := volume.Remove(volumePath); err != nil {
return cli.NewExitError(err.Error(), 1)
}
return nil
},
}
@@ -83,13 +90,14 @@ var statsCommand = cli.Command{
Destination: &volumePath,
},
},
Action: func(c *cli.Context) (string, error) {
Action: func(c *cli.Context) error {
stats, err := Stats(volumePath)
if err != nil {
return "", err
return cli.NewExitError(err.Error(), 1)
}
return string(stats), nil
fmt.Println(string(stats))
return nil
},
}
@@ -109,7 +117,10 @@ var resizeCommand = cli.Command{
},
},
Action: func(c *cli.Context) error {
return Resize(volumePath, size)
if err := Resize(volumePath, size); err != nil {
return cli.NewExitError(err.Error(), 1)
}
return nil
},
}
@@ -119,8 +130,14 @@ func Stats(volumePath string) ([]byte, error) {
if err != nil {
return nil, err
}
urlSafeDevicePath := url.PathEscape(volumePath)
body, err := shimclient.DoGet(sandboxId, defaultTimeout, containerdshim.DirectVolumeStatUrl+"/"+urlSafeDevicePath)
volumeMountInfo, err := volume.VolumeMountInfo(volumePath)
if err != nil {
return nil, err
}
urlSafeDevicePath := url.PathEscape(volumeMountInfo.Device)
body, err := shimclient.DoGet(sandboxId, defaultTimeout,
fmt.Sprintf("%s?%s=%s", containerdshim.DirectVolumeStatUrl, containerdshim.DirectVolumePathKey, urlSafeDevicePath))
if err != nil {
return nil, err
}
@@ -133,13 +150,18 @@ func Resize(volumePath string, size uint64) error {
if err != nil {
return err
}
volumeMountInfo, err := volume.VolumeMountInfo(volumePath)
if err != nil {
return err
}
resizeReq := containerdshim.ResizeRequest{
VolumePath: volumePath,
VolumePath: volumeMountInfo.Device,
Size: size,
}
encoded, err := json.Marshal(resizeReq)
if err != nil {
return err
}
return shimclient.DoPost(sandboxId, defaultTimeout, containerdshim.DirectVolumeResizeUrl, encoded)
return shimclient.DoPost(sandboxId, defaultTimeout, containerdshim.DirectVolumeResizeUrl, "application/json", encoded)
}

View File

@@ -125,6 +125,7 @@ var runtimeCommands = []cli.Command{
kataMetricsCLICommand,
factoryCLICommand,
kataVolumeCommand,
kataIPTablesCommand,
}
// runtimeBeforeSubcommands is the function to run before command-line

View File

@@ -258,14 +258,12 @@ func TestMainBeforeSubCommands(t *testing.T) {
func TestMainBeforeSubCommandsInvalidLogFile(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "katatest")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
logFile := filepath.Join(tmpdir, "log")
// create the file as the wrong type to force a failure
err = os.MkdirAll(logFile, testDirMode)
err := os.MkdirAll(logFile, testDirMode)
assert.NoError(err)
set := flag.NewFlagSet("", 0)
@@ -281,9 +279,7 @@ func TestMainBeforeSubCommandsInvalidLogFile(t *testing.T) {
func TestMainBeforeSubCommandsInvalidLogFormat(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "katatest")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
logFile := filepath.Join(tmpdir, "log")
@@ -302,7 +298,7 @@ func TestMainBeforeSubCommandsInvalidLogFormat(t *testing.T) {
ctx := createCLIContext(set)
err = beforeSubcommands(ctx)
err := beforeSubcommands(ctx)
assert.Error(err)
assert.NotNil(kataLog.Logger.Out)
}
@@ -310,9 +306,7 @@ func TestMainBeforeSubCommandsInvalidLogFormat(t *testing.T) {
func TestMainBeforeSubCommandsLoadConfigurationFail(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "katatest")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
logFile := filepath.Join(tmpdir, "log")
configFile := filepath.Join(tmpdir, "config")
@@ -345,9 +339,7 @@ func TestMainBeforeSubCommandsLoadConfigurationFail(t *testing.T) {
func TestMainBeforeSubCommandsShowCCConfigPaths(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "katatest")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
set := flag.NewFlagSet("", 0)
set.Bool("show-default-config-paths", true, "")
@@ -409,9 +401,7 @@ func TestMainBeforeSubCommandsShowCCConfigPaths(t *testing.T) {
func TestMainFatal(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "katatest")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
var exitStatus int
savedExitFunc := exitFunc
@@ -633,9 +623,7 @@ func TestMainCreateRuntime(t *testing.T) {
func TestMainVersionPrinter(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "katatest")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
savedOutputFile := defaultOutputFile

View File

@@ -17,18 +17,14 @@ import (
)
func TestFileExists(t *testing.T) {
dir, err := os.MkdirTemp("", "katatest")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
dir := t.TempDir()
file := filepath.Join(dir, "foo")
assert.False(t, katautils.FileExists(file),
fmt.Sprintf("File %q should not exist", file))
err = createEmptyFile(file)
err := createEmptyFile(file)
if err != nil {
t.Fatal(err)
}
@@ -54,14 +50,10 @@ func TestGetKernelVersion(t *testing.T) {
{validContents, validVersion, false},
}
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
subDir := filepath.Join(tmpdir, "subdir")
err = os.MkdirAll(subDir, testDirMode)
err := os.MkdirAll(subDir, testDirMode)
assert.NoError(t, err)
_, err = getKernelVersion()
@@ -103,11 +95,7 @@ func TestGetDistroDetails(t *testing.T) {
const unknown = "<<unknown>>"
tmpdir, err := os.MkdirTemp("", "")
if err != nil {
panic(err)
}
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
testOSRelease := filepath.Join(tmpdir, "os-release")
testOSReleaseClr := filepath.Join(tmpdir, "os-release-clr")
@@ -131,7 +119,7 @@ VERSION_ID="%s"
`, nonClrExpectedName, nonClrExpectedVersion)
subDir := filepath.Join(tmpdir, "subdir")
err = os.MkdirAll(subDir, testDirMode)
err := os.MkdirAll(subDir, testDirMode)
assert.NoError(t, err)
// override

View File

@@ -125,7 +125,8 @@ virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
#
# Format example:
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
#
# Examples:
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
# see `virtiofsd -h` for possible options.
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
@@ -179,6 +180,78 @@ block_device_driver = "virtio-blk"
# but it will not abort container execution.
#guest_hook_path = "/usr/share/oci/hooks"
#
# These options are related to network rate limiter at the VMM level, and are
# based on the Cloud Hypervisor I/O throttling. Those are disabled by default
# and we strongly advise users to refer the Cloud Hypervisor official
# documentation for a better understanding of its internals:
# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
#
# Bandwidth rate limiter options
#
# net_rate_limiter_bw_max_rate controls network I/O bandwidth (size in bits/sec
# for SB/VM).
# The same value is used for inbound and outbound bandwidth.
# Default 0-sized value means unlimited rate.
#net_rate_limiter_bw_max_rate = 0
#
# net_rate_limiter_bw_one_time_burst increases the initial max rate and this
# initial extra credit does *NOT* affect the overall limit and can be used for
# an *initial* burst of data.
# This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
# set to a non zero value.
#net_rate_limiter_bw_one_time_burst = 0
#
# Operation rate limiter options
#
# net_rate_limiter_ops_max_rate controls network I/O bandwidth (size in ops/sec
# for SB/VM).
# The same value is used for inbound and outbound bandwidth.
# Default 0-sized value means unlimited rate.
#net_rate_limiter_ops_max_rate = 0
#
# net_rate_limiter_ops_one_time_burst increases the initial max rate and this
# initial extra credit does *NOT* affect the overall limit and can be used for
# an *initial* burst of data.
# This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
# set to a non zero value.
#net_rate_limiter_ops_one_time_burst = 0
#
# These options are related to disk rate limiter at the VMM level, and are
# based on the Cloud Hypervisor I/O throttling. Those are disabled by default
# and we strongly advise users to refer the Cloud Hypervisor official
# documentation for a better understanding of its internals:
# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
#
# Bandwidth rate limiter options
#
# disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec
# for SB/VM).
# The same value is used for inbound and outbound bandwidth.
# Default 0-sized value means unlimited rate.
#disk_rate_limiter_bw_max_rate = 0
#
# disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
# initial extra credit does *NOT* affect the overall limit and can be used for
# an *initial* burst of data.
# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
# set to a non zero value.
#disk_rate_limiter_bw_one_time_burst = 0
#
# Operation rate limiter options
#
# disk_rate_limiter_ops_max_rate controls disk I/O bandwidth (size in ops/sec
# for SB/VM).
# The same value is used for inbound and outbound bandwidth.
# Default 0-sized value means unlimited rate.
#disk_rate_limiter_ops_max_rate = 0
#
# disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
# initial extra credit does *NOT* affect the overall limit and can be used for
# an *initial* burst of data.
# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
# set to a non zero value.
#disk_rate_limiter_ops_one_time_burst = 0
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)
@@ -324,3 +397,30 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# enable_pprof = true
# WARNING: All the options in the following section have not been implemented yet.
# This section was added as a placeholder. DO NOT USE IT!
[image]
# Container image service.
#
# Offload the CRI image management service to the Kata agent.
# (default: false)
#service_offload = true
# Container image decryption keys provisioning.
# Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or
# a local file) or remotely (usually after the guest is remotely attested).
# The provision setting is a complete URL that lets the Kata agent decide
# which method to use in order to fetch the keys.
#
# Keys can be stored in a local file, in a measured and attested initrd:
#provision=data:///local/key/file
#
# Keys could be fetched through a special command or binary from the
# initrd (guest) image, e.g. a firmware call:
#provision=file:///path/to/bin/fetcher/in/guest
#
# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
# a HTTPS URL:
#provision=https://my-key-broker.foo/tenant/<tenant-id>

View File

@@ -168,6 +168,8 @@ virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
#
# Format example:
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
# Examples:
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
#
# see `virtiofsd -h` for possible options.
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
@@ -387,6 +389,9 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# be default_memory.
#enable_guest_swap = true
# use legacy serial for guest console if available and implemented for architecture. Default false
#use_legacy_serial = true
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and

167
src/runtime/go-test.sh Executable file
View File

@@ -0,0 +1,167 @@
#!/bin/bash
#
# Copyright (c) 2017-2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -e
script_name=${0##*/}
typeset -A long_options
long_options=(
[help]="Show usage"
[package:]="Specify test package to run"
)
# Set up go test flags
go_test_flags="${KATA_GO_TEST_FLAGS}"
if [ -z "$go_test_flags" ]; then
# KATA_GO_TEST_TIMEOUT can be set to any value accepted by
# "go test -timeout X"
go_test_flags="-timeout ${KATA_GO_TEST_TIMEOUT:-30s}"
# -race flag is not supported on s390x
[ "$(go env GOARCH)" != "s390x" ] && go_test_flags+=" -race"
# s390x requires special linker flags
[ "$(go env GOARCH)" = s390x ] && go_test_flags+=" -ldflags '-extldflags -Wl,--s390-pgste'"
fi
# The "master" coverage file that contains the coverage results for
# all packages run under all scenarios.
test_coverage_file="coverage.txt"
# Temporary coverage file created for a "go test" run. The results in
# this file will be added to the master coverage file.
tmp_coverage_file="${test_coverage_file}.tmp"
warn()
{
local msg="$*"
echo >&2 "WARNING: $msg"
}
usage()
{
cat <<EOF
Usage: $script_name [options]
Options:
EOF
local option
local description
local long_option_names="${!long_options[@]}"
# Sort space-separated list by converting to newline separated list
# and back again.
long_option_names=$(echo "$long_option_names"|tr ' ' '\n'|sort|tr '\n' ' ')
# Display long options
for option in ${long_option_names}
do
description=${long_options[$option]}
# Remove any trailing colon which is for getopt(1) alone.
option=$(echo "$option"|sed 's/:$//g')
printf " --%-10.10s # %s\n" "$option" "$description"
done
}
# Run a command as either root or the current user (which might still be root).
#
# If the first argument is "root", run using sudo, else run as the current
# user. All arguments after the first will be treated as the command to run.
run_as_user()
{
local user="$1"
shift
local cmd=$*
if [ "$user" = root ]; then
# use a shell to ensure PATH is correct.
sudo -E PATH="$PATH" sh -c "$cmd"
else
eval "$cmd"
fi
}
# Test a single golang package
test_go_package()
{
local -r pkg="$1"
local -r user="$2"
printf "INFO: Running 'go test' as %s user on package '%s' with flags '%s'\n" \
"$user" "$pkg" "$go_test_flags"
run_as_user "$user" go test "$go_test_flags" -covermode=atomic -coverprofile=$tmp_coverage_file "$pkg"
# Merge test results into the master coverage file.
run_as_user "$user" tail -n +2 "$tmp_coverage_file" >> "$test_coverage_file"
rm -f "$tmp_coverage_file"
}
# Run all tests and generate a test coverage file.
test_coverage()
{
echo "mode: atomic" > "$test_coverage_file"
users="current"
if [ "$(id -u)" -eq 0 ]; then
warn "Already running as root so will not re-run tests as non-root user."
warn "As a result, only a subset of tests will be run"
warn "(run this script as a non-privileged to ensure all tests are run)."
else
# Run the unit-tests *twice* (since some must run as
# root and others must run as non-root), combining the
# resulting test coverage files.
users+=" root"
fi
echo "INFO: Currently running as user '$(id -un)'"
for user in $users; do
test_go_package "$package" "$user"
done
}
main()
{
local long_option_names="${!long_options[@]}"
local args=$(getopt \
-n "$script_name" \
-a \
--options="h" \
--longoptions="$long_option_names" \
-- "$@")
package="./..."
eval set -- "$args"
[ $? -ne 0 ] && { usage >&2; exit 1; }
while [ $# -gt 1 ]
do
case "$1" in
-h|--help) usage; exit 0 ;;
--package) package="$2"; shift 2;;
--) shift; break ;;
esac
shift
done
test_coverage
}
main "$@"

View File

@@ -50,7 +50,6 @@ func TestCreateSandboxSuccess(t *testing.T) {
}()
tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
// defer os.RemoveAll(tmpdir)
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -99,7 +98,6 @@ func TestCreateSandboxFail(t *testing.T) {
assert := assert.New(t)
tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(tmpdir)
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -137,7 +135,6 @@ func TestCreateSandboxConfigFail(t *testing.T) {
assert := assert.New(t)
tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(tmpdir)
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -187,7 +184,6 @@ func TestCreateContainerSuccess(t *testing.T) {
}
tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(tmpdir)
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -227,7 +223,6 @@ func TestCreateContainerFail(t *testing.T) {
assert := assert.New(t)
tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(tmpdir)
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -278,7 +273,6 @@ func TestCreateContainerConfigFail(t *testing.T) {
}()
tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(tmpdir)
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
assert.NoError(err)
@@ -382,9 +376,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
func TestCreateLoadRuntimeConfig(t *testing.T) {
assert := assert.New(t)
tmpdir, err := os.MkdirTemp("", "")
assert.NoError(err)
defer os.RemoveAll(tmpdir)
tmpdir := t.TempDir()
config, err := createAllRuntimeConfigFiles(tmpdir, "qemu")
assert.NoError(err)

View File

@@ -7,7 +7,6 @@
package containerdshim
import (
"os"
"testing"
taskAPI "github.com/containerd/containerd/runtime/v2/task"
@@ -25,8 +24,8 @@ func TestDeleteContainerSuccessAndFail(t *testing.T) {
MockID: testSandboxID,
}
rootPath, bundlePath, _ := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(rootPath)
_, bundlePath, _ := ktu.SetupOCIConfigFile(t)
_, err := compatoci.ParseConfigJSON(bundlePath)
assert.NoError(err)

View File

@@ -10,6 +10,7 @@ import (
"io"
"os"
sysexec "os/exec"
goruntime "runtime"
"sync"
"syscall"
"time"
@@ -31,6 +32,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
@@ -234,9 +236,19 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
goruntime.LockOSThread()
if os.Getenv("SCHED_CORE") != "" {
if err := utils.Create(utils.ProcessGroup); err != nil {
return "", errors.Wrap(err, "enable sched core support")
}
}
if err := cmd.Start(); err != nil {
return "", err
}
goruntime.UnlockOSThread()
defer func() {
if retErr != nil {
cmd.Process.Kill()

View File

@@ -41,8 +41,7 @@ func TestServiceCreate(t *testing.T) {
assert := assert.New(t)
tmpdir, bundleDir, _ := ktu.SetupOCIConfigFile(t)
defer os.RemoveAll(tmpdir)
_, bundleDir, _ := ktu.SetupOCIConfigFile(t)
ctx := context.Background()

View File

@@ -29,11 +29,17 @@ import (
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/sirupsen/logrus"
)
const (
DirectVolumePathKey = "path"
AgentUrl = "/agent-url"
DirectVolumeStatUrl = "/direct-volume/stats"
DirectVolumeResizeUrl = "/direct-volume/resize"
IPTablesUrl = "/iptables"
IP6TablesUrl = "/ip6tables"
MetricsUrl = "/metrics"
)
var (
@@ -139,7 +145,16 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily {
}
func (s *service) serveVolumeStats(w http.ResponseWriter, r *http.Request) {
volumePath, err := url.PathUnescape(strings.TrimPrefix(r.URL.Path, DirectVolumeStatUrl))
val := r.URL.Query().Get(DirectVolumePathKey)
if val == "" {
msg := fmt.Sprintf("Required parameter %s not found", DirectVolumePathKey)
shimMgtLog.Info(msg)
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(msg))
return
}
volumePath, err := url.PathUnescape(val)
if err != nil {
shimMgtLog.WithError(err).Error("failed to unescape the volume stat url path")
w.WriteHeader(http.StatusInternalServerError)
@@ -184,6 +199,48 @@ func (s *service) serveVolumeResize(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(""))
}
func (s *service) ip6TablesHandler(w http.ResponseWriter, r *http.Request) {
s.genericIPTablesHandler(w, r, true)
}
func (s *service) ipTablesHandler(w http.ResponseWriter, r *http.Request) {
s.genericIPTablesHandler(w, r, false)
}
func (s *service) genericIPTablesHandler(w http.ResponseWriter, r *http.Request, isIPv6 bool) {
logger := shimMgtLog.WithFields(logrus.Fields{"handler": "iptables", "ipv6": isIPv6})
switch r.Method {
case http.MethodPut:
body, err := ioutil.ReadAll(r.Body)
if err != nil {
logger.WithError(err).Error("failed to read request body")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
return
}
if err = s.sandbox.SetIPTables(context.Background(), isIPv6, body); err != nil {
logger.WithError(err).Error("failed to set IPTables")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
}
w.Write([]byte(""))
case http.MethodGet:
buf, err := s.sandbox.GetIPTables(context.Background(), isIPv6)
if err != nil {
logger.WithError(err).Error("failed to get IPTables")
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
}
w.Write(buf)
default:
w.WriteHeader(http.StatusNotImplemented)
return
}
}
func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
// metrics socket will under sandbox's bundle path
metricsAddress := SocketAddress(s.id)
@@ -204,10 +261,12 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec
// bind handler
m := http.NewServeMux()
m.Handle("/metrics", http.HandlerFunc(s.serveMetrics))
m.Handle("/agent-url", http.HandlerFunc(s.agentURL))
m.Handle(MetricsUrl, http.HandlerFunc(s.serveMetrics))
m.Handle(AgentUrl, http.HandlerFunc(s.agentURL))
m.Handle(DirectVolumeStatUrl, http.HandlerFunc(s.serveVolumeStats))
m.Handle(DirectVolumeResizeUrl, http.HandlerFunc(s.serveVolumeResize))
m.Handle(IPTablesUrl, http.HandlerFunc(s.ipTablesHandler))
m.Handle(IP6TablesUrl, http.HandlerFunc(s.ip6TablesHandler))
s.mountPprofHandle(m, ociSpec)
// register shim metrics

View File

@@ -26,9 +26,7 @@ func TestNewTtyIOFifoReopen(t *testing.T) {
assert := assert.New(t)
ctx := context.TODO()
testDir, err := os.MkdirTemp("", "kata-")
assert.NoError(err)
defer os.RemoveAll(testDir)
testDir := t.TempDir()
fifoPath, err := os.MkdirTemp(testDir, "fifo-path-")
assert.NoError(err)
@@ -104,9 +102,7 @@ func TestIoCopy(t *testing.T) {
testBytes2 := []byte("Test2")
testBytes3 := []byte("Test3")
testDir, err := os.MkdirTemp("", "kata-")
assert.NoError(err)
defer os.RemoveAll(testDir)
testDir := t.TempDir()
fifoPath, err := os.MkdirTemp(testDir, "fifo-path-")
assert.NoError(err)

View File

@@ -78,7 +78,7 @@ func wait(ctx context.Context, s *service, c *container, execID string) (int32,
shimLog.WithField("sandbox", s.sandbox.ID()).Error("failed to delete sandbox")
}
} else {
if _, err = s.sandbox.StopContainer(ctx, c.id, false); err != nil {
if _, err = s.sandbox.StopContainer(ctx, c.id, true); err != nil {
shimLog.WithError(err).WithField("container", c.id).Warn("stop container failed")
}
}

View File

@@ -6,17 +6,36 @@
package volume
import (
b64 "encoding/base64"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
const (
mountInfoFileName = "mountInfo.json"
FSGroupMetadataKey = "fsGroup"
FSGroupChangePolicyMetadataKey = "fsGroupChangePolicy"
)
// FSGroupChangePolicy holds policies that will be used for applying fsGroup to a volume.
// This type and the allowed values are tracking the PodFSGroupChangePolicy defined in
// https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/api/core/v1/types.go
// It is up to the client using the direct-assigned volume feature (e.g. CSI drivers) to determine
// the optimal setting for this change policy (i.e. from Pod spec or assuming volume ownership
// based on the storage offering).
type FSGroupChangePolicy string
const (
// FSGroupChangeAlways indicates that volume's ownership should always be changed.
FSGroupChangeAlways FSGroupChangePolicy = "Always"
// FSGroupChangeOnRootMismatch indicates that volume's ownership will be changed
// only when ownership of root directory does not match with the desired group id.
FSGroupChangeOnRootMismatch FSGroupChangePolicy = "OnRootMismatch"
)
var kataDirectVolumeRootPath = "/run/kata-containers/shared/direct-volumes"
@@ -37,19 +56,20 @@ type MountInfo struct {
// Add writes the mount info of a direct volume into a filesystem path known to Kata Container.
func Add(volumePath string, mountInfo string) error {
volumeDir := filepath.Join(kataDirectVolumeRootPath, volumePath)
volumeDir := filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath)))
stat, err := os.Stat(volumeDir)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if stat != nil && !stat.IsDir() {
return fmt.Errorf("%s should be a directory", volumeDir)
}
if errors.Is(err, os.ErrNotExist) {
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
if err := os.MkdirAll(volumeDir, 0700); err != nil {
return err
}
}
if stat != nil && !stat.IsDir() {
return fmt.Errorf("%s should be a directory", volumeDir)
}
var deserialized MountInfo
if err := json.Unmarshal([]byte(mountInfo), &deserialized); err != nil {
return err
@@ -60,14 +80,12 @@ func Add(volumePath string, mountInfo string) error {
// Remove deletes the direct volume path including all the files inside it.
func Remove(volumePath string) error {
// Find the base of the volume path to delete the whole volume path
base := strings.SplitN(volumePath, string(os.PathSeparator), 2)[0]
return os.RemoveAll(filepath.Join(kataDirectVolumeRootPath, base))
return os.RemoveAll(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
}
// VolumeMountInfo retrieves the mount info of a direct volume.
func VolumeMountInfo(volumePath string) (*MountInfo, error) {
mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, volumePath, mountInfoFileName)
mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath)), mountInfoFileName)
if _, err := os.Stat(mountInfoFilePath); err != nil {
return nil, err
}
@@ -84,16 +102,17 @@ func VolumeMountInfo(volumePath string) (*MountInfo, error) {
// RecordSandboxId associates a sandbox id with a direct volume.
func RecordSandboxId(sandboxId string, volumePath string) error {
mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, volumePath, mountInfoFileName)
encodedPath := b64.URLEncoding.EncodeToString([]byte(volumePath))
mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, encodedPath, mountInfoFileName)
if _, err := os.Stat(mountInfoFilePath); err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(kataDirectVolumeRootPath, volumePath, sandboxId), []byte(""), 0600)
return ioutil.WriteFile(filepath.Join(kataDirectVolumeRootPath, encodedPath, sandboxId), []byte(""), 0600)
}
func GetSandboxIdForVolume(volumePath string) (string, error) {
files, err := ioutil.ReadDir(filepath.Join(kataDirectVolumeRootPath, volumePath))
files, err := ioutil.ReadDir(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
if err != nil {
return "", err
}

View File

@@ -6,6 +6,7 @@
package volume
import (
b64 "encoding/base64"
"encoding/json"
"errors"
"os"
@@ -18,16 +19,17 @@ import (
func TestAdd(t *testing.T) {
var err error
kataDirectVolumeRootPath, err = os.MkdirTemp(os.TempDir(), "add-test")
assert.Nil(t, err)
defer os.RemoveAll(kataDirectVolumeRootPath)
kataDirectVolumeRootPath = t.TempDir()
var volumePath = "/a/b/c"
var basePath = "a"
actual := MountInfo{
VolumeType: "block",
Device: "/dev/sda",
FsType: "ext4",
Options: []string{"journal_dev", "noload"},
Metadata: map[string]string{
FSGroupMetadataKey: "3000",
FSGroupChangePolicyMetadataKey: string(FSGroupChangeOnRootMismatch),
},
Options: []string{"journal_dev", "noload"},
}
buf, err := json.Marshal(actual)
assert.Nil(t, err)
@@ -41,22 +43,22 @@ func TestAdd(t *testing.T) {
assert.Equal(t, expected.Device, actual.Device)
assert.Equal(t, expected.FsType, actual.FsType)
assert.Equal(t, expected.Options, actual.Options)
assert.Equal(t, expected.Metadata, actual.Metadata)
_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
assert.Nil(t, err)
// Remove the file
err = Remove(volumePath)
assert.Nil(t, err)
_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath, basePath))
_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
assert.True(t, errors.Is(err, os.ErrNotExist))
// Test invalid mount info json
assert.Error(t, Add(volumePath, "{invalid json}"))
_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath))
assert.Nil(t, err)
}
func TestRecordSandboxId(t *testing.T) {
var err error
kataDirectVolumeRootPath, err = os.MkdirTemp(os.TempDir(), "recordSanboxId-test")
assert.Nil(t, err)
defer os.RemoveAll(kataDirectVolumeRootPath)
kataDirectVolumeRootPath = t.TempDir()
var volumePath = "/a/b/c"
mntInfo := MountInfo{
@@ -82,9 +84,7 @@ func TestRecordSandboxId(t *testing.T) {
func TestRecordSandboxIdNoMountInfoFile(t *testing.T) {
var err error
kataDirectVolumeRootPath, err = os.MkdirTemp(os.TempDir(), "recordSanboxId-test")
assert.Nil(t, err)
defer os.RemoveAll(kataDirectVolumeRootPath)
kataDirectVolumeRootPath = t.TempDir()
var volumePath = "/a/b/c"
sandboxId := uuid.Generate().String()

View File

@@ -15,6 +15,7 @@ import (
"sync"
"time"
containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
mutils "github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils/shimclient"
"github.com/prometheus/client_golang/prometheus"
@@ -78,6 +79,21 @@ func (km *KataMonitor) ProcessMetricsRequest(w http.ResponseWriter, r *http.Requ
scrapeDurationsHistogram.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Millisecond)))
}()
// this is likely the same as `kata-runtime metrics <SANDBOX>`.
sandboxID, err := getSandboxIDFromReq(r)
if err == nil && sandboxID != "" {
metrics, err := GetSandboxMetrics(sandboxID)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
return
}
w.Write([]byte(metrics))
return
}
// if no sandbox provided, will get all sandbox's metrics.
// prepare writer for writing response.
contentType := expfmt.Negotiate(r.Header)
@@ -224,7 +240,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
}
func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*dto.MetricFamily, error) {
body, err := shimclient.DoGet(sandboxID, defaultTimeout, "metrics")
body, err := shimclient.DoGet(sandboxID, defaultTimeout, containerdshim.MetricsUrl)
if err != nil {
return nil, err
}
@@ -234,7 +250,7 @@ func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*
// GetSandboxMetrics will get sandbox's metrics from shim
func GetSandboxMetrics(sandboxID string) (string, error) {
body, err := shimclient.DoGet(sandboxID, defaultTimeout, "metrics")
body, err := shimclient.DoGet(sandboxID, defaultTimeout, containerdshim.MetricsUrl)
if err != nil {
return "", err
}

Some files were not shown because too many files have changed in this diff Show More