Compare commits

...

67 Commits

Author SHA1 Message Date
dependabot[bot]
20d9147c0b build(deps): bump docker/login-action from 3.4.0 to 4.0.0
Bumps [docker/login-action](https://github.com/docker/login-action) from 3.4.0 to 4.0.0.
- [Release notes](https://github.com/docker/login-action/releases)
- [Commits](74a5d14239...b45d80f862)

---
updated-dependencies:
- dependency-name: docker/login-action
  dependency-version: 4.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-23 14:33:40 +00:00
Fabiano Fidêncio
1ec97d25e7 Merge pull request #12704 from stevenhorsman/security-fixes-23-mar-26
Security fixes 23 mar 26
2026-03-23 15:27:07 +01:00
Fabiano Fidêncio
aa6890eae1 Merge pull request #12675 from manuelh-dev/mahuber/cdh-storage-options
agent: add mkfs_opts parameter to cdh_secure_mount
2026-03-23 15:18:38 +01:00
Fabiano Fidêncio
fe817bb47b Merge pull request #12705 from fidencio/topic/tests-nginx-connectibity-2nd-try
tests: nginx-connectivity: Use `-O index.html` to override the downloaded file
2026-03-23 13:08:51 +01:00
Fabiano Fidêncio
514a2b1a7c Merge pull request #12264 from fidencio/topic/nvidia-gpu-cc-use-nydus-snapshotter
nvidia: cc: Use nydus-snapshotter
2026-03-23 12:50:15 +01:00
stevenhorsman
2edb588ed9 kata-ctl: Pin micro_http
the micro_http crate was just pointing the the main branch and hadn't been updated for
around 3 years, so pin to the latest for stability and update to remediate RUSTSEC-2024-0002

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-23 10:34:28 +00:00
stevenhorsman
9871256771 versions: Bump cloud-hypervisor to v51
In v51 the license was added, so try bumping to this version
to solve the cargo deny issue

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-23 10:34:28 +00:00
dependabot[bot]
8de7f29981 agent-ctl: Bump aws-lc-rs to 1.16.2
Bump aws-lc-rs, so that aws-lc-sys updates to 0.39.0 to remediate
RUSTSEC-2026-0044 and https://osv.dev/vulnerability/RUSTSEC-2026-0048

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-23 10:34:28 +00:00
dependabot[bot]
1c63738b80 build(deps): bump aws-lc-fips-sys in /src/tools/agent-ctl
Bumps [aws-lc-fips-sys](https://github.com/aws/aws-lc-rs) from 0.13.12 to 0.13.13.
- [Release notes](https://github.com/aws/aws-lc-rs/releases)
- [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-fips-sys/v0.13.12...aws-lc-fips-sys/v0.13.13)

---
updated-dependencies:
- dependency-name: aws-lc-fips-sys
  dependency-version: 0.13.13
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-23 10:34:28 +00:00
dependabot[bot]
6e79a9d6ad build(deps): bump rustls-webpki in /src/tools/agent-ctl
Bumps [rustls-webpki](https://github.com/rustls/webpki) from 0.103.3 to 0.103.10.
- [Release notes](https://github.com/rustls/webpki/releases)
- [Commits](https://github.com/rustls/webpki/compare/v/0.103.3...v/0.103.10)

---
updated-dependencies:
- dependency-name: rustls-webpki
  dependency-version: 0.103.10
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-23 10:34:27 +00:00
dependabot[bot]
8df9cf35df build(deps): bump rustls-webpki in /tools/packaging/kata-deploy/binary
Bumps [rustls-webpki](https://github.com/rustls/webpki) from 0.103.8 to 0.103.10.
- [Release notes](https://github.com/rustls/webpki/releases)
- [Commits](https://github.com/rustls/webpki/compare/v/0.103.8...v/0.103.10)

---
updated-dependencies:
- dependency-name: rustls-webpki
  dependency-version: 0.103.10
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-23 10:34:27 +00:00
dependabot[bot]
ef32923461 build(deps): bump tar from 0.4.44 to 0.4.45
Bumps [tar](https://github.com/alexcrichton/tar-rs) from 0.4.44 to 0.4.45.
- [Commits](https://github.com/alexcrichton/tar-rs/compare/0.4.44...0.4.45)

---
updated-dependencies:
- dependency-name: tar
  dependency-version: 0.4.45
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-23 10:34:27 +00:00
stevenhorsman
85e17c2e77 deps: Bump rustls-webpki
Bump rusttls-webpki to 0.103.10 to remediate RUSTSEC-2026-0049

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-23 10:34:27 +00:00
stevenhorsman
c3868f8e60 deps: Bump aws-lc-rs to 1.16.2
Bump aws-lc-rs, so that aws-lc-sys updates to 0.39.0 to remediate
RUSTSEC-2026-0044 and https://osv.dev/vulnerability/RUSTSEC-2026-0048

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-23 10:34:27 +00:00
stevenhorsman
27417d9d15 ci: Add more crates to dependabot groups
Add aws-lc, and rustls-webpki, so that in future the different
component bumps are all done together

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-23 10:34:27 +00:00
Fabiano Fidêncio
83f37f4beb tests: nginx-connectivity: Override index.html (2nd try)
We need to explicitly pass `-O index.html` as the busybox' wget has a
different behaviour than GNU's wget.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-03-23 11:11:44 +01:00
Fabiano Fidêncio
e44dfccf7a Revert "tests: nginx-connectivity: Allow overriding the downloded file"
This reverts commit 4403289123.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-03-23 11:06:23 +01:00
Hyounggyu Choi
1035504492 Merge pull request #12701 from fidencio/topic/tests-arm-nginx-connectivity
tests: nginx-connectivity: Allow overriding the downloded file
2026-03-23 10:37:25 +01:00
Steve Horsman
20cb65b1fb Merge pull request #12624 from lifupan/bump_rust_vmms
runtime-rs: Bump rust vmms for dragonball
2026-03-23 08:56:47 +00:00
Fabiano Fidêncio
864f181faf Merge pull request #12694 from manuelh-dev/mahuber/nv-test-timeout
tests: nvidia: Increase run test timeout
2026-03-23 09:13:20 +01:00
Fabiano Fidêncio
642b5661ff Merge pull request #12651 from manuelh-dev/mahuber/doc-update-nvidia-gpu-op
docs: Update NVIDIA GPU passthrough QEMU scenario
2026-03-23 09:01:02 +01:00
Fabiano Fidêncio
4403289123 tests: nginx-connectivity: Allow overriding the downloded file
In case a wget fails for one reason or another, it'll leave behind an
'index.html' file.  Let's make sure we allow overriding that file so the
retry loop doesn't fail for no reason.

Fixes: #12670

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-03-23 04:08:24 +01:00
Alex Lyn
d2c2ec6e23 Merge pull request #12633 from LandonTClipp/docs_materialx
docs: Move to mkdocs-material, port Helm to docs site
2026-03-23 09:29:25 +08:00
Fupan Li
608f378bff dragonball: make sure the nydus's worker thread access network
Since the dragonball's vmm thread had been joined in the pod's
netns, which wouldn't access the network, thus we should make
sure the nydus's worker thread join into the runD's main thread's
netns which would access the network.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-22 22:44:24 +08:00
Fabiano Fidêncio
f14895bdc4 Merge pull request #12673 from manuelh-dev/mahuber/release-doc-update
docs: Update release process notes
2026-03-22 13:11:03 +01:00
Fabiano Fidêncio
fd716c017d Merge pull request #12567 from agamdua/ebpf-confs
kernel: Add debug kernel with eBPF configs to static tarball builds
2026-03-22 13:06:54 +01:00
Fabiano Fidêncio
740d380b8e tests: nvidia: cc: Use nydus-snapshotter
So we can test what we just changed in the config files.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-03-22 10:10:34 +01:00
Fabiano Fidêncio
6194510e90 nvidia: cc: Use nydus-snapshotter
We've been using `experimental_force_guest_pull`, but now that we have a
containerd release that should work more reliably with the multi
snapshotter setup, we want to give it a try.

Note: We need containerd 2.2.2+.

Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
2026-03-22 10:10:34 +01:00
Agam Dua
7e3fd74779 kernel: bump config version
With debug/ebpf updates in place, let's bump the kata config version.

Signed-off-by: Agam Dua <agam_dua@apple.com>
Co-authored-by: Eric Ernst <eric_ernst@apple.com>
2026-03-20 15:04:15 -07:00
Agam Dua
f6319da73d tests: Add eBPF and dwarves to spell check dictionary
Add missing terms to the spell check dictionary to fix CI failures
for kernel debug documentation:

- eBPF
- dwarves: Linux package with DWARF/BTF tools (pahole) required for
  CONFIG_DEBUG_INFO_BTF kernel option

Also fix the casing of "ebpf" to "eBPF" in the kernel README to match
the official naming convention.

Signed-off-by: Agam Dua <agam_dua@apple.com>
2026-03-20 15:04:08 -07:00
Agam Dua
91d6c39f06 kernel: Fix debug build and add debug symbols to installation
Fixed a bug with the debug kernel build where common/ was repeated
after the common path variable, resulting in the debug
confs never being picked up.

This exposed a subsequent bug where the debug conf
was included in other builds, this is also fixed by creating a
separate directory for debug confs with one file at the moment,
debug.conf that contains debug configurations and bpf specific
configs.

To enable kernel builds (specifically for bpf) the dwarves package was added
to the kernel dockerfile for the pahole package.

Signed-off-by: Agam Dua <agam_dua@apple.com>
2026-03-20 14:50:23 -07:00
Agam Dua
5ab0744c25 ci: Add pipeline for building and distributing the debug kernel
Add the debug kernel to the kata tarball alongside the other kernels.

Also update the kernel README documentation to describe the new debug
kernel build process.

Signed-off-by: Agam Dua <agam_dua@apple.com>
2026-03-20 14:50:23 -07:00
Agam Dua
e905b74267 kernel: Add eBPF configs for debug builds
Adds a BPF section in the debug.conf kernel configuration options
 to enable eBPF and BTF support for debug kernel builds.

Signed-off-by: Agam Dua <agam_dua@apple.com>
2026-03-20 14:50:23 -07:00
LandonTClipp
5333e45313 docs: Fix static-checks.sh when running locally
This fixes the test_dir variable in static-checks.sh so that
when a --repo-path is provided, the test_dir variable uses that
for the location instead of the GOPATH location.

Signed-off-by: LandonTClipp <11232769+LandonTClipp@users.noreply.github.com>
2026-03-20 14:51:45 -05:00
LandonTClipp
795869152d docs: Move to mkdocs-material, port Helm to docs site
This supersedes https://github.com/kata-containers/kata-containers/pull/12622.
I replaced Zensical with mkdocs-materialx. Materialx is a fork of mkdocs-material
created after mkdocs-material was put into maintenance mode. We'll use this
platform until Zensical is more feature complete.

Added a few of the existing docs into the site to make a more user-friendly flow.

Signed-off-by: LandonTClipp <11232769+LandonTClipp@users.noreply.github.com>
2026-03-20 14:51:39 -05:00
Manuel Huber
8903b12d34 tests: nvidia: Increase run test timeout
Increase the timeout as a few new features and tests are going to be
onboarded for the NVIDIA GPU CI.

Signed-off-by: Manuel Huber <manuelh@nvidia.com>
2026-03-20 11:12:52 -07:00
Manuel Huber
476f550977 docs: Update NVIDIA GPU passthrough QEMU scenario
With the upcoming GPU operator 26.3 relase and recent changes to
kata-containers, we adapt this documentation with notes on multi
GPU passthrough, support for TDX, changed deployment instructions,
and with various other minor improvements.

Signed-off-by: Manuel Huber <manuelh@nvidia.com>
2026-03-20 10:53:14 -07:00
Manuel Huber
ae59cf26a0 kata-deploy: Check kata-tarball size limits
For kata tarballs we eventually release to GitHub, check their
size against the GitHub size limit. With this, we fail in case of
an ongoing release process in 'CI | Publish Kata Containers payload'
instead of only later on in the 'Release Kata Containers' action,
and we fail during PR builds, avoiding this situation at all.

Signed-off-by: Manuel Huber <manuelh@nvidia.com>
2026-03-20 10:40:55 -07:00
RuoqingHe
cfc1836a31 Merge pull request #12672 from stevenhorsman/agent-security-fixes
agent: Bump tracing-subscriber
2026-03-20 17:37:16 +08:00
Steve Horsman
7ab6e11e10 Merge pull request #12678 from kata-containers/dependabot/go_modules/src/runtime/google.golang.org/grpc-1.79.3
build(deps): bump google.golang.org/grpc from 1.72.0 to 1.79.3 in /src/runtime
2026-03-20 08:49:35 +00:00
Steve Horsman
e475fb2116 Merge pull request #12680 from kata-containers/dependabot/go_modules/src/tools/csi-kata-directvolume/google.golang.org/grpc-1.79.3
build(deps): bump google.golang.org/grpc from 1.63.2 to 1.79.3 in /src/tools/csi-kata-directvolume
2026-03-20 08:49:27 +00:00
RuoqingHe
f62a6b6ab2 Merge pull request #12677 from stevenhorsman/cspell-action
Switch to use cspell for spell checking
2026-03-20 13:27:36 +08:00
Manuel Huber
4afb55154a docs: Update release process notes
Update the Release-Process.md file with some clarifications on the
release process.

Signed-off-by: Manuel Huber <manuelh@nvidia.com>
2026-03-19 15:14:23 -07:00
stevenhorsman
38a655487f vsock-exporter: Switch bincode for serde_json
bincode is not maintained, so switch to serde_json to
resolve RUSTSEC-2025-0141

Assisted-By: Bob
Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:45:17 +00:00
stevenhorsman
e1d7d5bef8 agent: Remove async-std
It's a dev-dependency that doesn't seem to be used, so
remove it and resolve RUSTSEC-2025-0052

Assisted-By: Bob
Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:45:17 +00:00
stevenhorsman
e4eda5e1d8 agent: Bump tracing-subscriber
- Bump tracing-subscriber to 0.3.20 to resolve RUSTSEC-2025-0055
- Switch deprecated `slog_info!` for `slog::info!`

Generated-By: Bob
Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:45:17 +00:00
stevenhorsman
e62df07b6a static-checks: Delete kata-spell-check
The old hunspell based spell-check was causing contributors
challenges and proving a barrier to doc updates. We've replaced
it with a cspell based-solution, so clean up the old approach.

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:22:54 +00:00
stevenhorsman
44ec815f77 gatekeeper: Add check-spelling to required
Add the new job to the list of static checks that runs on doc updates.

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:22:54 +00:00
stevenhorsman
c2cedd7c02 workflows: Add spellcheck workflow
Add a separate spellcheck workflow, so we can replace
the complex hunspell approach embedded in static-checks

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:22:54 +00:00
stevenhorsman
d06dadd8ef docs: Spelling updates
Either fixing typos, or including program/repo name in
backticks

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:22:54 +00:00
stevenhorsman
829a32ee67 spellcheck: Add cspell files
Add cspell config and initial dictionary

Assisted-by: Bob (dictionary ordering and catergorisation)
Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2026-03-19 10:22:54 +00:00
dependabot[bot]
2f5415d8f5 build(deps): bump google.golang.org/grpc
Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.63.2 to 1.79.3.
- [Release notes](https://github.com/grpc/grpc-go/releases)
- [Commits](https://github.com/grpc/grpc-go/compare/v1.63.2...v1.79.3)

---
updated-dependencies:
- dependency-name: google.golang.org/grpc
  dependency-version: 1.79.3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-19 10:03:45 +00:00
dependabot[bot]
3876a80208 build(deps): bump google.golang.org/grpc in /src/runtime
Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.72.0 to 1.79.3.
- [Release notes](https://github.com/grpc/grpc-go/releases)
- [Commits](https://github.com/grpc/grpc-go/compare/v1.72.0...v1.79.3)

---
updated-dependencies:
- dependency-name: google.golang.org/grpc
  dependency-version: 1.79.3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-19 10:03:30 +00:00
Manuel Huber
62d74bb1fd agent: add mkfs_opts parameter to cdh_secure_mount
Add an mkfs_opts parameter to cdh_secure_mount so that its users
can parametrize these options depending on their needs. For now,
there is two users providing explicit values (container image
layer storage and container data storage features).

Signed-off-by: Manuel Huber <manuelh@nvidia.com>
2026-03-18 15:37:32 -07:00
Fupan Li
eabb98ecab dragonball: fix the issue of type miss match with api change
For aarch64, it should match the api for get_one_reg and
set_one_reg.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-16 14:41:53 +08:00
Fupan Li
c1b7069e50 tools: fix the genpolicy building issue
Add the new helper item bring by the cargo bump

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:04 +00:00
Fupan Li
fddd1e8b6e dragonball: update the Cargo.lock and rm the unused crate
update the Cargo.lock  and rm the unused crate

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:04 +00:00
Fupan Li
d6178d78b1 dragonball: fix the tcp test address for 127.0.0.2
Fix TCP test addresses from 127.0.0.2 to 127.0.0.1 for vsock backend
tests

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:04 +00:00
Fupan Li
1c7b14e282 dragonball: Fix the feature gating for host devices
Fix feature-gating for PCI/virtio in dragonball
device_manager and mptable test

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:04 +00:00
Fupan Li
e9bda42b01 dragonball: fix the failed UT tests
Fix dragonball make check: clippy and format errors

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
a66c93caaa dragonball: add GuestRegionCollection error
add GuestRegionCollection error variant with proper
error context preservation

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
17454c0969 dragonball: Fix remaining warnings
remove unused imports (ioctl_ioc_nr, std::io) that cause -D warnings failures

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
f8617241f4 dragonball: Fix dragonball compilation errors for upgraded
Fix dragonball compilation errors for upgraded
dependencies (vm-memory 0.17.1, kvm-ioctls 0.24.0, vfio-ioctls 0.5.2)

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
d0f0dc2008 dragonball: fix the dbs-virtio-devices compiled errors
Update dbs-virtio-devices to compile with:
- virtio-bindings 0.2.x: VIRTIO_F_VERSION_1, VIRTIO_F_NOTIFY_ON_EMPTY,
  VIRTIO_F_RING_PACKED moved from virtio_blk/virtio_net/virtio_ring to
  virtio_config module.
- virtio-queue 0.17.0: Descriptor no longer exported at top level, use
  desc::split::Descriptor instead.
- vhost 0.15.0: Master->Frontend, VhostUserMaster->VhostUserFrontend,
  MasterReqHandler->FrontendReqHandler,
  VhostUserMasterReqHandler->VhostUserFrontendReqHandler,
  SLAVE_REQ->BACKEND_REQ, SLAVE_SEND_FD->BACKEND_SEND_FD,
  set_slave_request_fd->set_backend_request_fd.
  FS slave messages (VhostUserFSSlaveMsg etc.) removed from vhost crate;
  SlaveReqHandler now implements VhostUserFrontendReqHandler with
  handle_config_change only.
- fuse-backend-rs 0.14.0: Handle CachePolicy::Metadata variant,
  fix get_rootfs() returning tuple, use buffer-based I/O for Ufile
  since ReadVolatile/WriteVolatile are not implemented for Box<dynUfile>.
- vm-memory 0.17.1: GuestRegionMmap::new returns Option instead of
  Result, mmap::Error removed.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
3e39c1fad3 dragonball: fix the issue of kvm-binding upgraded
Fix the compiling errors caused by kvm-binding and
kvm-ioctls upgraded.

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
a6a81124cb runtime-rs: fix the api changes for vm-memory 0.17.1 API
Rename vm-memory GuestMemory methods for 0.17.1 upgrade
Rename read_from -> read_volatile_from, write_to -> write_volatile_to,
read_exact_from -> read_exact_volatile_from, and write_all_to ->
write_all_volatile_to across all dragonball Rust source files.
Change bitmap() return type from &Self::B to BS<'_, Self::B>
Move as_slice/as_mut_slice from GuestMemoryRegion trait impl to inherent
impl block, using get_host_address for mmap regions
Update GuestMemory impl: remove type I, use impl Iterator return type
Replace Error with GuestRegionCollectionError for region collection errors
Fix VolatileSlice::with_bitmap call to include mmap parameter
Fix test: use ptr_guard().as_ptr() instead of removed as_ptr()

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
Fupan Li
8d09a0e7e7 runtime-rs: Bump the rust-vmm related crates
vm-memory 0.10.0 → =0.17.1
vmm-sys-util 0.11.0 → 0.15.0
kvm-bindings 0.6.0 → 0.14.0
kvm-ioctls =0.12.1 → 0.24.0
virtio-queue 0.7.0 → 0.17.0
virtio-bindings 0.1.0 → 0.2.0
fuse-backend-rs 0.10.5 → 0.14.0

Signed-off-by: Fupan Li <fupan.lfp@antgroup.com>
2026-03-12 10:58:03 +00:00
220 changed files with 7542 additions and 7349 deletions

37
.cspell.yaml Normal file
View File

@@ -0,0 +1,37 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json
version: "0.2"
language: en,en-GB
dictionaryDefinitions:
- name: kata-terms
path: ./tests/spellcheck/kata-dictionary.txt
addWords: true
dictionaries:
- en-GB
- en_US
- bash
- git
- golang
- k8s
- python
- rust
- companies
- mnemonics
- peopleNames
- softwareTerms
- networking-terms
- kata-terms
ignoreRegExpList:
- /@[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}/gi # Ignores github handles
# Ignore code blocks
- /^\s*`{3,}[\s\S]*?^\s*`{3,}/gm
- /`[^`\n]+`/g
ignorePaths:
- "**/vendor/**" # vendor files aren't owned by us
- "**/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/**" # Generated files
- "**/requirements.txt"
useGitignore: true

View File

@@ -37,9 +37,9 @@ updates:
# create groups for common dependencies, so they can all go in a single PR
# We can extend this as we see more frequent groups
groups:
bit-vec:
aws-libcrypto:
patterns:
- bit-vec
- aws-lc-*
bumpalo:
patterns:
- bumpalo
@@ -67,6 +67,9 @@ updates:
rustix:
patterns:
- rustix
rustls-webpki:
patterns:
- rustls-webpki
slab:
patterns:
- slab

View File

@@ -47,6 +47,7 @@ jobs:
- coco-guest-components
- firecracker
- kernel
- kernel-debug
- kernel-dragonball-experimental
- kernel-nvidia-gpu
- nydus
@@ -68,7 +69,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -120,7 +121,7 @@ jobs:
version: "1.2.0"
# for pushing attestations to the registry
- uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
- uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
registry: ghcr.io
@@ -171,7 +172,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -265,7 +266,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -347,6 +348,16 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
@@ -374,7 +385,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -447,6 +458,16 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-tools-artifacts versions.yaml kata-tools-static.tar.zst
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata-tools tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-tools-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -45,6 +45,7 @@ jobs:
- cloud-hypervisor
- firecracker
- kernel
- kernel-debug
- kernel-dragonball-experimental
- kernel-nvidia-gpu
- kernel-cca-confidential
@@ -57,7 +58,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -108,7 +109,7 @@ jobs:
version: "1.2.0"
# for pushing attestations to the registry
- uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
- uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
registry: ghcr.io
@@ -155,7 +156,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -246,7 +247,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -326,6 +327,16 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -45,7 +45,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -103,7 +103,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -178,7 +178,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -262,6 +262,16 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -52,7 +52,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -98,7 +98,7 @@ jobs:
echo "oci-digest=${oci_image#*@}" >> "$GITHUB_OUTPUT"
# for pushing attestations to the registry
- uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
- uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
if: ${{ env.PERFORM_ATTESTATION == 'yes' }}
with:
registry: ghcr.io
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -264,7 +264,7 @@ jobs:
steps:
- name: Login to Kata Containers quay.io
if: ${{ inputs.push-to-registry == 'yes' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -350,6 +350,16 @@ jobs:
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml
env:
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
- name: Check kata tarball size (GitHub release asset limit)
run: |
# https://docs.github.com/en/repositories/releasing-projects-on-github/about-releases#storage-and-bandwidth-quotas
GITHUB_ASSET_MAX_BYTES=2147483648
tarball_size=$(stat -c "%s" kata-static.tar.zst)
if [[ "${tarball_size}" -ge "${GITHUB_ASSET_MAX_BYTES}" ]]; then
echo "::error::tarball size (${tarball_size} bytes) >= GitHub release asset limit (${GITHUB_ASSET_MAX_BYTES} bytes)"
exit 1
fi
echo "tarball size: ${tarball_size} bytes"
- name: store-artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:

View File

@@ -39,7 +39,7 @@ jobs:
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -92,7 +92,7 @@ jobs:
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}

View File

@@ -201,7 +201,7 @@ jobs:
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}

View File

@@ -4,17 +4,18 @@ on:
branches:
- main
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
deploy-docs:
name: deploy-docs
build:
runs-on: ubuntu-24.04
name: Build docs
permissions:
contents: read
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5.0.0
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
@@ -23,10 +24,30 @@ jobs:
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: 3.x
- run: pip install zensical
- run: zensical build --clean
- run: pip install -r docs/requirements.txt
- run: python3 -m mkdocs build --config-file ./mkdocs.yaml --site-dir site/
id: build
- uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0
with:
path: site
- uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5
id: deployment
with:
path: site/
name: github-pages
deploy:
needs: build
runs-on: ubuntu-24.04
name: Deploy docs
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5
id: deployment
with:
artifact_name: github-pages

View File

@@ -150,7 +150,7 @@ jobs:
persist-credentials: false
- name: Login to Kata Containers quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -81,7 +81,7 @@ jobs:
- name: Login to Kata Containers quay.io
if: ${{ inputs.registry == 'quay.io' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
@@ -89,7 +89,7 @@ jobs:
- name: Login to Kata Containers ghcr.io
if: ${{ inputs.registry == 'ghcr.io' }}
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}

View File

@@ -37,14 +37,14 @@ jobs:
runs-on: ubuntu-22.04
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Kata Containers quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -37,14 +37,14 @@ jobs:
runs-on: ubuntu-24.04-arm
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Kata Containers quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -34,14 +34,14 @@ jobs:
runs-on: ubuntu-24.04-ppc64le
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Kata Containers quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -38,14 +38,14 @@ jobs:
runs-on: ubuntu-24.04-s390x
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Kata Containers quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -92,14 +92,14 @@ jobs:
persist-credentials: false
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Kata Containers quay.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: quay.io
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}

View File

@@ -49,6 +49,8 @@ jobs:
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KUBERNETES: kubeadm
KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
SNAPSHOTTER: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'nydus' || '' }}
USE_EXPERIMENTAL_SNAPSHOTTER_SETUP: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
K8S_TEST_HOST_TYPE: baremetal
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -98,7 +100,7 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Run tests ${{ matrix.environment.vmm }}
timeout-minutes: 30
timeout-minutes: 60
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
env:
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}

30
.github/workflows/spellcheck.yaml vendored Normal file
View File

@@ -0,0 +1,30 @@
name: Spelling check
on: ["pull_request"]
permissions: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
check-spelling:
name: check-spelling
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
persist-credentials: false
- name: Check Spelling
uses: streetsidesoftware/cspell-action@9cd41bb518a24fefdafd9880cbab8f0ceba04d28 # 8.3.0
with:
files: |
**/*.md
**/*.rst
**/*.txt
incremental_files_only: true
config: ".cspell.yaml"

View File

@@ -138,7 +138,7 @@ jobs:
go-version: ${{ env.GO_VERSION }}
- name: Install system dependencies
run: |
sudo apt-get update && sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc
sudo apt-get update && sudo apt-get -y install moreutils
- name: Install open-policy-agent
run: |
cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}"

1966
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -56,19 +56,19 @@ exclude = [
[workspace.dependencies]
# Rust-VMM crates
event-manager = "0.2.1"
kvm-bindings = "0.6.0"
kvm-ioctls = "=0.12.1"
linux-loader = "0.8.0"
event-manager = "0.4.0"
kvm-bindings = "0.14.0"
kvm-ioctls = "0.24.0"
linux-loader = "0.13.0"
seccompiler = "0.5.0"
vfio-bindings = "0.3.0"
vfio-ioctls = "0.1.0"
virtio-bindings = "0.1.0"
virtio-queue = "0.7.0"
vm-fdt = "0.2.0"
vm-memory = "0.10.0"
vm-superio = "0.5.0"
vmm-sys-util = "0.11.0"
vfio-bindings = "0.6.1"
vfio-ioctls = "0.5.0"
virtio-bindings = "0.2.0"
virtio-queue = "0.17.0"
vm-fdt = "0.3.0"
vm-memory = "=0.17.1"
vm-superio = "0.8.0"
vmm-sys-util = "0.15.0"
# Local dependencies from Dragonball Sandbox crates
dragonball = { path = "src/dragonball" }

View File

@@ -49,8 +49,11 @@ docs-url-alive-check:
build-and-publish-kata-debug:
bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG}
docs-serve:
docker run --rm -p 8000:8000 -v ./docs:/docs:ro -v ${PWD}/zensical.toml:/zensical.toml:ro zensical/zensical serve --config-file /zensical.toml -a 0.0.0.0:8000
docs-build:
docker build -t kata-docs:latest -f ./docs/Dockerfile ./docs
docs-serve: docs-build
docker run --rm -p 8000:8000 -v ${PWD}:/docs:ro kata-docs:latest serve --config-file /docs/mkdocs.yaml -a 0.0.0.0:8000
.PHONY: \
all \
@@ -59,4 +62,5 @@ docs-serve:
default \
static-checks \
docs-url-alive-check \
docs-build \
docs-serve

View File

@@ -378,7 +378,7 @@ that is used in the test" section. From there you can see exactly what you'll
have to use when deploying kata-deploy in your local cluster.
> [!NOTE]
> TODO: WAINER TO FINISH THIS PART BASED ON HIS PR TO RUN A LOCAL CI
> TODO: @wainersm TO FINISH THIS PART BASED ON HIS PR TO RUN A LOCAL CI
## Adding new runners

View File

@@ -98,7 +98,7 @@ Let's say the OCP pipeline passed running with
but failed running with
``quay.io/kata-containers/kata-deploy-ci:kata-containers-9f512c016e75599a4a921bd84ea47559fe610057-amd64``
and you'd like to know which PR caused the regression. You can either run with
all the 60 tags between or you can utilize the [bisecter](https://github.com/ldoktor/bisecter)
all the 60 tags between or you can utilize the [`bisecter`](https://github.com/ldoktor/bisecter)
to optimize the number of steps in between.
Before running the bisection you need a reproducer script. Sample one called

18
docs/.nav.yml Normal file
View File

@@ -0,0 +1,18 @@
# https://lukasgeiter.github.io/mkdocs-awesome-nav/
nav:
- Home: index.md
- Getting Started:
- prerequisites.md
- installation.md
- Configuration:
- helm-configuration.md
- runtime-configuration.md
- Platform Support:
- hypervisors.md
- Guides:
- Use Cases:
- NVIDIA GPU Passthrough: use-cases/NVIDIA-GPU-passthrough-and-Kata-QEMU.md
- NVIDIA vGPU: use-cases/NVIDIA-GPU-passthrough-and-Kata.md
- Intel Discrete GPU: use-cases/Intel-Discrete-GPU-passthrough-and-Kata.md
- Misc:
- Architecture: design/architecture/

View File

@@ -730,7 +730,7 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
##### Connecting to the debug console
Next, connect to the debug console. The VSOCKS paths vary slightly between each
Next, connect to the debug console. The VSOCK paths vary slightly between each
VMM solution.
In case of cloud-hypervisor, connect to the `vsock` as shown:

11
docs/Dockerfile Normal file
View File

@@ -0,0 +1,11 @@
# Copyright 2026 Kata Contributors
#
# SPDX-License-Identifier: Apache-2.0
#
FROM python:3.12-slim
WORKDIR /
COPY ./requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT ["python3", "-m", "mkdocs"]

View File

@@ -188,15 +188,14 @@ and compare them with standard tools (e.g. `diff(1)`).
# Spelling
Since this project uses a number of terms not found in conventional
dictionaries, we have a
[spell checking tool](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-spelling)
that checks both dictionary words and the additional terms we use.
dictionaries, we have a [kata-dictionary](../tests/spellcheck/kata-dictionary.txt)
that contains some project specific terms we use.
Run the spell checking tool on your document before raising a PR to ensure it
You can run the `cspell` checking tool on your document before raising a PR to ensure it
is free of mistakes.
If your document introduces new terms, you need to update the custom
dictionary used by the spell checking tool to incorporate the new words.
dictionary to incorporate the new words.
# Names

View File

@@ -32,23 +32,26 @@ their own validation and patch backporting from there.
## Release Process
### Bump the `VERSION` and `Chart.yaml` file
When the `kata-containers/kata-containers` repository is ready for a new
release, first create a PR to set the release in the [`VERSION`](./../VERSION)
file and update the `version` and `appVersion` in the
[`Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml)
file and have it merged.
### Lock the `main` branch
### Lock the `main` branch and announce release process
In order to prevent any PRs getting merged during the release process, and
slowing the release process down, by impacting the payload caches, we have
recently trialed setting the `main` branch to read only whilst the release
action runs.
recently trialed setting the `main` branch to read-only.
Once the `kata-containers/kata-containers` repository is ready for a new
release, lock the main branch until the release action has completed.
Notify the #kata-dev Slack channel about the ongoing release process.
Ideally, CI usage by others should be reduced to a minimum during the
ongoing release process.
> [!NOTE]
> Admin permission is needed to complete this task.
> Admin permission is needed to lock/unlock the `main` branch.
### Bump the `VERSION` and `Chart.yaml` file
Create a PR to set the release in the [`VERSION`](./../VERSION) file and to
update the `version` and `appVersion` fields in the
[`Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml)
file. Temporarily unlock the main branch to merge the PR.
### Wait for the `VERSION` bump PR payload publish to complete
@@ -60,7 +63,7 @@ and are cached, so that the release process can just download these artifacts
rather than needing to build them all, which takes time and can reveal errors in
infra.
### Check GitHub Actions
### Trigger the `Release Kata Containers` GitHub Action
We make use of [GitHub actions](https://github.com/features/actions) in the
[release](https://github.com/kata-containers/kata-containers/actions/workflows/release.yaml)

View File

Before

Width:  |  Height:  |  Size: 710 B

After

Width:  |  Height:  |  Size: 710 B

View File

@@ -231,12 +231,6 @@ Run the
[markdown checker](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-markdown)
on your documentation changes.
### Spell check
Run the
[spell checker](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-spelling)
on your documentation changes.
## Finally
You may wish to read the documentation that the

View File

@@ -43,7 +43,7 @@ To fulfill the [Kata design requirements](kata-design-requirements.md), and base
|`sandbox.AddInterface(inf)`| Add new NIC to the sandbox.|
|`sandbox.RemoveInterface(inf)`| Remove a NIC from the sandbox.|
|`sandbox.ListInterfaces()`| List all NICs and their configurations in the sandbox, return a `pbTypes.Interface` list.|
|`sandbox.UpdateRoutes(routes)`| Update the sandbox route table (e.g. for portmapping support), return a `pbTypes.Route` list.|
|`sandbox.UpdateRoutes(routes)`| Update the sandbox route table (e.g. for port mapping support), return a `pbTypes.Route` list.|
|`sandbox.ListRoutes()`| List the sandbox route table, return a `pbTypes.Route` list.|
### Sandbox Relay API

View File

@@ -8,7 +8,7 @@ The following benchmarking result shows the performance improvement compared wit
## Proposal - Bring `lazyload` ability to Kata Containers
`Nydusd` is a fuse/`virtiofs` daemon which is provided by `nydus` project and it supports `PassthroughFS` and [RAFS](https://github.com/dragonflyoss/image-service/blob/master/docs/nydus-design.md) (Registry Acceleration File System) natively, so in Kata Containers, we can use `nydusd` in place of `virtiofsd` and mount `nydus` image to guest in the meanwhile.
`Nydusd` is a fuse/`virtiofs` daemon which is provided by `nydus` project and it supports `PassthroughFS` and [`rafs`](https://github.com/dragonflyoss/image-service/blob/master/docs/nydus-design.md) (Registry Acceleration File System) natively, so in Kata Containers, we can use `nydusd` in place of `virtiofsd` and mount `nydus` image to guest in the meanwhile.
The process of creating/starting Kata Containers with `virtiofsd`,

264
docs/helm-configuration.md Normal file
View File

@@ -0,0 +1,264 @@
# Helm Configuration
## Parameters
The helm chart provides a comprehensive set of configuration options. You may view the parameters and their descriptions by going to the [GitHub source](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml) or by using helm:
```sh
# List available kata-deploy chart versions:
# helm search repo kata-deploy-charts/kata-deploy --versions
#
# Then replace X.Y.Z below with the desired chart version:
helm show values --version X.Y.Z oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy
```
### shims
Kata ships with a number of pre-built artifacts and runtimes. You may selectively enable or disable specific shims. For example:
```yaml title="values.yaml"
shims:
disableAll: true
qemu:
enabled: true
qemu-nvidia-gpu:
enabled: true
qemu-nvidia-gpu-snp:
enabled: false
```
Shims can also have configuration options specific to them:
```yaml
qemu-nvidia-gpu:
enabled: ~
supportedArches:
- amd64
- arm64
allowedHypervisorAnnotations: []
containerd:
snapshotter: ""
runtimeClass:
# This label is automatically added by gpu-operator. Override it
# if you want to use a different label.
# Uncomment once GPU Operator v26.3 is out
# nodeSelector:
# nvidia.com/cc.ready.state: "false"
```
It's best to reference the default `values.yaml` file above for more details.
### Custom Runtimes
Kata allows you to create custom runtime configurations. This is done by overlaying one of the pre-existing runtime configs with user-provided configs. For example, we can use the `qemu-nvidia-gpu` as a base config and overlay our own parameters to it:
```yaml
customRuntimes:
enabled: false
runtimes:
my-gpu-runtime:
baseConfig: "qemu-nvidia-gpu" # Required: existing config to use as base
dropIn: | # Optional: overrides via config.d mechanism
[hypervisor.qemu]
default_memory = 1024
default_vcpus = 4
runtimeClass: |
kind: RuntimeClass
apiVersion: node.k8s.io/v1
metadata:
name: kata-my-gpu-runtime
labels:
app.kubernetes.io/managed-by: kata-deploy
handler: kata-my-gpu-runtime
overhead:
podFixed:
memory: "640Mi"
cpu: "500m"
scheduling:
nodeSelector:
katacontainers.io/kata-runtime: "true"
# Optional: CRI-specific configuration
containerd:
snapshotter: "nydus" # Configure containerd snapshotter (nydus, erofs, etc.)
crio:
pullType: "guest-pull" # Configure CRI-O runtime_pull_image = true
```
Again, view the default [`values.yaml`](#parameters) file for more details.
## Examples
We provide a few examples that you can pass to helm via the `-f`/`--values` flag.
### [`try-kata-tee.values.yaml`](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-tee.values.yaml)
This file enables only the TEE (Trusted Execution Environment) shims for confidential computing:
```sh
helm install kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
--version VERSION \
-f try-kata-tee.values.yaml
```
Includes:
- `qemu-snp` - AMD SEV-SNP (amd64)
- `qemu-tdx` - Intel TDX (amd64)
- `qemu-se` - IBM Secure Execution for Linux (SEL) (s390x)
- `qemu-se-runtime-rs` - IBM Secure Execution for Linux (SEL) Rust runtime (s390x)
- `qemu-cca` - Arm Confidential Compute Architecture (arm64)
- `qemu-coco-dev` - Confidential Containers development (amd64, s390x)
- `qemu-coco-dev-runtime-rs` - Confidential Containers development Rust runtime (amd64, s390x)
### [`try-kata-nvidia-gpu.values.yaml`](https://github.com/kata-containers/kata-containers/blob/main/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml)
This file enables only the NVIDIA GPU-enabled shims:
```sh
helm install kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
--version VERSION \
-f try-kata-nvidia-gpu.values.yaml
```
Includes:
- `qemu-nvidia-gpu` - Standard NVIDIA GPU support (amd64, arm64)
- `qemu-nvidia-gpu-snp` - NVIDIA GPU with AMD SEV-SNP (amd64)
- `qemu-nvidia-gpu-tdx` - NVIDIA GPU with Intel TDX (amd64)
### `nodeSelector`
We can deploy Kata only to specific nodes using `nodeSelector`
```sh
# First, label the nodes where you want kata-containers to be installed
$ kubectl label nodes worker-node-1 kata-containers=enabled
$ kubectl label nodes worker-node-2 kata-containers=enabled
# Then install the chart with `nodeSelector`
$ helm install kata-deploy \
--set nodeSelector.kata-containers="enabled" \
"${CHART}" --version "${VERSION}"
```
You can also use a values file:
```yaml title="values.yaml"
nodeSelector:
kata-containers: "enabled"
node-type: "worker"
```
```sh
$ helm install kata-deploy -f values.yaml "${CHART}" --version "${VERSION}"
```
### Multiple Kata installations on the Same Node
For debugging, testing and other use-case it is possible to deploy multiple
versions of Kata on the very same node. All the needed artifacts are getting the
`multiInstallSuffix` appended to distinguish each installation. **BEWARE** that one
needs at least **containerd-2.0** since this version has drop-in conf support
which is a prerequisite for the `multiInstallSuffix` to work properly.
```sh
$ helm install kata-deploy-cicd \
-n kata-deploy-cicd \
--set env.multiInstallSuffix=cicd \
--set env.debug=true \
"${CHART}" --version "${VERSION}"
```
Note: `runtimeClasses` are automatically created by Helm (via
`runtimeClasses.enabled=true`, which is the default).
Now verify the installation by examining the `runtimeClasses`:
```sh
$ kubectl get runtimeClasses
NAME HANDLER AGE
kata-clh-cicd kata-clh-cicd 77s
kata-cloud-hypervisor-cicd kata-cloud-hypervisor-cicd 77s
kata-dragonball-cicd kata-dragonball-cicd 77s
kata-fc-cicd kata-fc-cicd 77s
kata-qemu-cicd kata-qemu-cicd 77s
kata-qemu-coco-dev-cicd kata-qemu-coco-dev-cicd 77s
kata-qemu-nvidia-gpu-cicd kata-qemu-nvidia-gpu-cicd 77s
kata-qemu-nvidia-gpu-snp-cicd kata-qemu-nvidia-gpu-snp-cicd 77s
kata-qemu-nvidia-gpu-tdx-cicd kata-qemu-nvidia-gpu-tdx-cicd 76s
kata-qemu-runtime-rs-cicd kata-qemu-runtime-rs-cicd 77s
kata-qemu-se-runtime-rs-cicd kata-qemu-se-runtime-rs-cicd 77s
kata-qemu-snp-cicd kata-qemu-snp-cicd 77s
kata-qemu-tdx-cicd kata-qemu-tdx-cicd 77s
kata-stratovirt-cicd kata-stratovirt-cicd 77s
```
## RuntimeClass Node Selectors for TEE Shims
**Manual configuration:** Any `nodeSelector` you set under `shims.<shim>.runtimeClass.nodeSelector`
is **always applied** to that shim's RuntimeClass, whether or not NFD is present. Use this when
you want to pin TEE workloads to specific nodes (e.g. without NFD, or with custom labels).
**Auto-inject when NFD is present:** If you do *not* set a `runtimeClass.nodeSelector` for a
TEE shim, the chart can **automatically inject** NFD-based labels when NFD is detected in the
cluster (deployed by this chart with `node-feature-discovery.enabled=true` or found externally):
- AMD SEV-SNP shims: `amd.feature.node.kubernetes.io/snp: "true"`
- Intel TDX shims: `intel.feature.node.kubernetes.io/tdx: "true"`
- IBM Secure Execution for Linux (SEL) shims (s390x): `feature.node.kubernetes.io/cpu-security.se.enabled: "true"`
The chart uses Helm's `lookup` function to detect NFD (by looking for the
`node-feature-discovery-worker` DaemonSet). Auto-inject only runs when NFD is detected and
no manual `runtimeClass.nodeSelector` is set for that shim.
**Note**: NFD detection requires cluster access. During `helm template` (dry-run without a
cluster), external NFD is not seen, so auto-injected labels are not added. Manual
`runtimeClass.nodeSelector` values are still applied in all cases.
## Customizing Configuration with Drop-in Files
When kata-deploy installs Kata Containers, the base configuration files should not
be modified directly. Instead, use drop-in configuration files to customize
settings. This approach ensures your customizations survive kata-deploy upgrades.
### How Drop-in Files Work
The Kata runtime reads the base configuration file and then applies any `.toml`
files found in the `config.d/` directory alongside it. Files are processed in
alphabetical order, with later files overriding earlier settings.
### Creating Custom Drop-in Files
To add custom settings, create a `.toml` file in the appropriate `config.d/`
directory. Use a numeric prefix to control the order of application.
**Reserved prefixes** (used by kata-deploy):
- `10-*`: Core kata-deploy settings
- `20-*`: Debug settings
- `30-*`: Kernel parameters
**Recommended prefixes for custom settings**: `50-89`
### Drop-In Config Examples
#### Adding Custom Kernel Parameters
```bash
# SSH into the node or use kubectl exec
sudo mkdir -p /opt/kata/share/defaults/kata-containers/runtimes/qemu/config.d/
sudo cat > /opt/kata/share/defaults/kata-containers/runtimes/qemu/config.d/50-custom.toml << 'EOF'
[hypervisor.qemu]
kernel_params = "my_param=value"
EOF
```
#### Changing Default Memory Size
```bash
sudo cat > /opt/kata/share/defaults/kata-containers/runtimes/qemu/config.d/50-memory.toml << 'EOF'
[hypervisor.qemu]
default_memory = 4096
EOF
```

View File

@@ -23,7 +23,7 @@ workloads with isolated sandboxes (i.e. Kata Containers).
As a result, the CRI implementations extended their semantics for the requirements:
- At the beginning, [Frakti](https://github.com/kubernetes/frakti) checks the network configuration of a Pod, and
- At the beginning, [`Frakti`](https://github.com/kubernetes/frakti) checks the network configuration of a Pod, and
treat Pod with `host` network as trusted, while others are treated as untrusted.
- The containerd introduced an annotation for untrusted Pods since [v1.0](https://github.com/containerd/cri/blob/v1.0.0-rc.0/docs/config.md):
```yaml

View File

@@ -18,7 +18,7 @@ The host kernel must be equal to or later than upstream version [6.11](https://c
[`sev-utils`](https://github.com/amd/sev-utils/blob/coco-202501150000/docs/snp.md) is an easy way to install the required host kernel with the `setup-host` command. However, it will also build compatible guest kernel, OVMF, and QEMU components which are not necessary as these components are packaged with kata. The `sev-utils` script utility can be used with these additional components to test the memory encrypted launch and attestation of a base QEMU SNP guest.
For a simplified way to build just the upstream compatible host kernel, use the Confidential Containers fork of [AMDESE AMDSEV](https://github.com/confidential-containers/amdese-amdsev/tree/amd-snp-202501150000). Individual components can be built by running the following command:
For a simplified way to build just the upstream compatible host kernel, use the Confidential Containers fork of [`amdese-amdsev`](https://github.com/confidential-containers/amdese-amdsev/tree/amd-snp-202501150000). Individual components can be built by running the following command:
```
./build.sh kernel host --install
@@ -65,7 +65,7 @@ $ ./configure --enable-virtfs --target-list=x86_64-softmmu --enable-debug
$ make -j "$(nproc)"
$ popd
```
- Create cert-chain for SNP attestation ( using [snphost](https://github.com/virtee/snphost/blob/main/docs/snphost.1.adoc) )
- Create cert-chain for SNP attestation ( using [`snphost`](https://github.com/virtee/snphost/blob/main/docs/snphost.1.adoc) )
```bash
$ git clone https://github.com/virtee/snphost.git && cd snphost/
$ cargo build
@@ -178,4 +178,3 @@ sudo reboot
```bash
sudo rmmod kvm_amd && sudo modprobe kvm_amd sev_snp=0
```

View File

@@ -315,7 +315,7 @@ $ kata-agent-ctl connect --server-address "unix:///var/run/kata/$PODID/root/kata
### compact_threshold
Control the mem-agent compaction function compact threshold.<br>
compact_threshold is the pages number.<br>
When examining the /proc/pagetypeinfo, if there's an increase in the number of movable pages of orders smaller than the compact_order compared to the amount following the previous compaction period, and this increase surpasses a certain threshold specifically, more than compact_threshold number of pages, or the number of free pages has decreased by compact_threshold since the previous compaction. Current compact run period will not do compaction because there is no enough fragmented pages to be compaction.<br>
When examining the `/proc/pagetypeinfo`, if there's an increase in the number of movable pages of orders smaller than the compact_order compared to the amount following the previous compaction period, and this increase surpasses a certain threshold specifically, more than compact_threshold number of pages, or the number of free pages has decreased by compact_threshold since the previous compaction. Current compact run period will not do compaction because there is no enough fragmented pages to be compaction.<br>
This design aims to minimize the impact of unnecessary compaction calls on system performance.<br>
Default to 1024.

View File

@@ -8,7 +8,7 @@
> **Note:** `cri-tools` is only used for debugging and validation purpose, and don't use it to run production workloads.
> **Note:** For how to install and configure `cri-tools` with CRI runtimes like `containerd` or CRI-O, please also refer to other [howtos](./README.md).
> **Note:** For how to install and configure `cri-tools` with CRI runtimes like `containerd` or CRI-O, please also refer to other [how-tos](./README.md).
## Use `crictl` run Pods in Kata containers

View File

@@ -16,83 +16,38 @@ which hypervisors you may wish to investigate further.
## Types
| Hypervisor | Written in | Architectures | Type |
|-|-|-|-|
|[Cloud Hypervisor] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
|[Firecracker] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
|[QEMU] | C | all | Type 2 ([KVM]) | `configuration-qemu.toml` |
|[`Dragonball`] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
|[StratoVirt] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) |
| Hypervisor | Written in | Architectures | GPU Support | Intel TDX | AMD SEV-SNP |
|-|-|-|-|-|-|
|[Cloud Hypervisor](#cloud-hypervisor) | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
|[Firecracker](#firecracker) | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
|[QEMU](#qemu) | C | all | :white_check_mark: | :white_check_mark: | :white_check_mark: |
|[Dragonball](#dragonball) | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
|StratoVirt | rust | `aarch64`, `x86_64` | :x: | :x: | :x: |
## Determine currently configured hypervisor
Each Kata runtime is configured for a specific hypervisor through the runtime's configuration file. For example:
```bash
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/' | grep Path
```toml title="/opt/kata/share/defaults/kata-containers/configuration.toml"
[hypervisor.qemu]
path = "/opt/kata/bin/qemu-system-x86_64"
```
## Choose a Hypervisor
```toml title="/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
[hypervisor.clh]
path = "/opt/kata/bin/cloud-hypervisor"
```
The table below provides a brief summary of some of the differences between
the hypervisors:
## Cloud Hypervisor
| Hypervisor | Summary | Features | Limitations | Container Creation speed | Memory density | Use cases | Comment |
|-|-|-|-|-|-|-|-|
|[Cloud Hypervisor] | Low latency, small memory footprint, small attack surface | Minimal | | excellent | excellent | High performance modern cloud workloads | |
|[Firecracker] | Very slimline | Extremely minimal | Doesn't support all device types | excellent | excellent | Serverless / FaaS | |
|[QEMU] | Lots of features | Lots | | good | good | Good option for most users | |
|[`Dragonball`] | Built-in VMM, low CPU and memory overhead| Minimal | | excellent | excellent | Optimized for most container workloads | `out-of-the-box` Kata Containers experience |
|[StratoVirt] | Unified architecture supporting three scenarios: VM, container, and serverless | Extremely minimal(`MicroVM`) to Lots(`StandardVM`) | | excellent | excellent | Common container workloads | `StandardVM` type of StratoVirt for Kata is under development |
[Cloud Hypervisor](https://www.cloudhypervisor.org/) is a more modern hypervisor written in Rust.
For further details, see the [Virtualization in Kata Containers](design/virtualization.md) document and the official documentation for each hypervisor.
## Firecracker
## Hypervisor configuration files
[Firecracker](https://firecracker-microvm.github.io/) is a minimal and lightweight hypervisor created for the AWS Lambda product.
Since each hypervisor offers different features and options, Kata Containers
provides a separate
[configuration file](../src/runtime/README.md#configuration)
for each. The configuration files contain comments explaining which options
are available, their default values and how each setting can be used.
## QEMU
| Hypervisor | Golang runtime config file | golang runtime short name | golang runtime default | rust runtime config file | rust runtime short name | rust runtime default |
|-|-|-|-|-|-|-|
| [Cloud Hypervisor] | [`configuration-clh.toml`](../src/runtime/config/configuration-clh.toml.in) | `clh` | | [`configuration-cloud-hypervisor.toml`](../src/runtime-rs/config/configuration-cloud-hypervisor.toml.in) | `cloud-hypervisor` | |
| [Firecracker] | [`configuration-fc.toml`](../src/runtime/config/configuration-fc.toml.in) | `fc` | | | | |
| [QEMU] | [`configuration-qemu.toml`](../src/runtime/config/configuration-qemu.toml.in) | `qemu` | yes | [`configuration-qemu.toml`](../src/runtime-rs/config/configuration-qemu-runtime-rs.toml.in) | `qemu` | |
| [`Dragonball`] | | | | [`configuration-dragonball.toml`](../src/runtime-rs/config/configuration-dragonball.toml.in) | `dragonball` | yes |
| [StratoVirt] | [`configuration-stratovirt.toml`](../src/runtime/config/configuration-stratovirt.toml.in) | `stratovirt` | | | | |
QEMU is the best supported hypervisor for NVIDIA-based GPUs and for confidential computing use-cases (such as Intel TDX and AMD SEV-SNP). Runtimes that use this are normally named `kata-qemu-nvidia-gpu-*`. The Kata project focuses primarily on QEMU runtimes for GPU support.
> **Notes:**
>
> - The short names specified are used by the [`kata-manager`](../utils/README.md) tool.
> - As shown by the default columns, each runtime type has its own default hypervisor.
> - The [golang runtime](../src/runtime) is the current default runtime.
> - The [rust runtime](../src/runtime-rs), also known as `runtime-rs`,
> is the newer runtime written in the rust language.
> - See the [Configuration](../README.md#configuration) for further details.
> - The configuration file links in the table link to the "source"
> versions: these are not usable configuration files as they contain
> variables that need to be expanded:
> - The links are provided for reference only.
> - The final (installed) versions, where all variables have been
> expanded, are built from these source configuration files.
> - The pristine configuration files are usually installed in the
> `/opt/kata/share/defaults/kata-containers/` or
> `/usr/share/defaults/kata-containers/` directories.
> - Some hypervisors may have the same name for both golang and rust
> runtimes, but the file contents may differ.
> - If there is no configuration file listed for the golang or
> rust runtimes, this either means the hypervisor cannot be run with
> a particular runtime, or that a driver has not yet been made
> available for that runtime.
## Dragonball
## Switch configured hypervisor
To switch the configured hypervisor, you only need to run a single command.
See [the `kata-manager` documentation](../utils/README.md#choose-a-hypervisor) for further details.
[Cloud Hypervisor]: https://github.com/cloud-hypervisor/cloud-hypervisor
[Firecracker]: https://github.com/firecracker-microvm/firecracker
[KVM]: https://en.wikipedia.org/wiki/Kernel-based_Virtual_Machine
[QEMU]: http://www.qemu.org
[`Dragonball`]: https://github.com/kata-containers/kata-containers/blob/main/src/dragonball
[StratoVirt]: https://gitee.com/openeuler/stratovirt
Dragonball is a special hypervisor created by the Ant Group that runs in the same process as the Rust-based containerd shim.

94
docs/index.md Normal file
View File

@@ -0,0 +1,94 @@
# Kata Containers
Kata Containers is an open source community working to build a secure container runtime with lightweight virtual machines (VM's) that feel and perform like standard Linux containers, but provide stronger workload isolation using hardware virtualization technology as a second layer of defense.
## How it Works
Kata implements the [Open Containers Runtime Specification](https://github.com/opencontainers/runtime-spec). More specifically, it implements a containerd shim that implements the expected interface for managing container lifecycles. The default containerd runtime of `runc` spawns a container like this:
```mermaid
flowchart TD
subgraph Host
containerd
runc
process[Container Process]
containerd --> runc --> process
end
```
When containerd receives a request to spawn a container, it will pull the container image down and then call out to the runc shim (usually located at `/usr/local/bin/containerd-shim-runc-v2`). runc will then create various process isolation resources like Linux namespaces (networking, PIDs, mounts etc), seccomp filters, Linux capability reductions, and then spawn the process inside of those resources. This process runs in the host kernel.
Kata spawns containers like this:
```mermaid
flowchart TD
subgraph Host
containerdOuter[containerd]
kata
containerdOuter --> kata
kata --> kataAgent
subgraph VM
kataAgent[Kata Agent]
process[Container Process]
kataAgent --> process
end
end
```
The container process spawned inside of the VM allows us to isolate the guest kernel from the host system. This is the fundamental principle of how Kata achieves its isolation boundaries.
## Example
When Kata is installed in a system, a number of artifacts are laid down. containerd's config will be modified as such:
```toml title="/etc/containerd/config.toml"
imports = ["/opt/kata/containerd/config.d/kata-deploy.toml"]
```
This file will contain configuration for various flavors of Kata runtimes. We can see the vanilla CPU runtime config here:
```toml title="/opt/kata/containerd/config.d/kata-deploy.toml"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.kata-qemu]
runtime_type = "io.containerd.kata-qemu.v2"
runtime_path = "/opt/kata/bin/containerd-shim-kata-v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.kata-qemu.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
```
Because containerd's CRI is aware of the Kata runtimes, we can spawn Kubernetes pods:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: test
spec:
runtimeClassName: kata-qemu
containers:
- name: test
image: "quay.io/libpod/ubuntu:latest"
command: ["/bin/bash", "-c"]
args: ["echo hello"]
```
We can also spawn a Kata container by submitting a request to containerd like so:
<div class="annotate" markdown>
```sh
$ ctr image pull quay.io/libpod/ubuntu:latest
$ ctr run --runtime "io.containerd.kata.v2" --runtime-config-path /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --rm -t "quay.io/libpod/ubuntu:latest" foo sh
# echo hello
hello
```
</div>
!!! tip
`ctr` is not aware of the CRI config in `/etc/containerd/config.toml`. This is why you must specify the `--runtime-config-path`. Additionally, the `--runtime` value is converted into a specific binary name which containerd then searches for in its `PATH`. See the [containerd docs](https://github.com/containerd/containerd/blob/release/2.2/core/runtime/v2/README.md#usage) for more details.

64
docs/installation.md Normal file
View File

@@ -0,0 +1,64 @@
# Installation
## Helm Chart
[helm](https://helm.sh/docs/intro/install/) can be used to install templated kubernetes manifests.
### Prerequisites
- **Kubernetes ≥ v1.22** v1.22 is the first release where the CRI v1 API
became the default and `RuntimeClass` left alpha. The chart depends on those
stable interfaces; earlier clusters need `featuregates` or CRI shims that are
out of scope.
- **Kata Release 3.12** - v3.12.0 introduced publishing the helm-chart on the
release page for easier consumption, since v3.8.0 we shipped the helm-chart
via source code in the kata-containers `GitHub` repository.
- CRIcompatible runtime (containerd or CRIO). If one wants to use the
`multiInstallSuffix` feature one needs at least **containerd-2.0** which
supports drop-in config files
- Nodes must allow loading kernel modules and installing Kata artifacts (the
chart runs privileged containers to do so)
### `helm install`
```sh
# Install directly from the official ghcr.io OCI registry
# update the VERSION X.YY.Z to your needs or just use the latest
export VERSION=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
$ helm install kata-deploy "${CHART}" --version "${VERSION}"
# See everything you can configure
$ helm show values "${CHART}" --version "${VERSION}"
```
This installs the `kata-deploy` DaemonSet and the default Kata `RuntimeClass`
resources on your cluster.
To see what versions of the chart are available:
```sh
$ helm show chart oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy
```
### `helm uninstall`
```sh
$ helm uninstall kata-deploy -n kube-system
```
During uninstall, Helm will report that some resources were kept due to the
resource policy (`ServiceAccount`, `ClusterRole`, `ClusterRoleBinding`). This
is **normal**. A post-delete hook Job runs after uninstall and removes those
resources so no cluster-wide `RBAC` is left behind.
## Pre-Built Release
Kata can also be installed using the pre-built releases: https://github.com/kata-containers/kata-containers/releases
This method does not have any facilities for artifact lifecycle management.

116
docs/prerequisites.md Normal file
View File

@@ -0,0 +1,116 @@
# Prerequisites
## Kubernetes
If using Kubernetes, at least version `v1.22` is recommended. This is the first release that the CRI v1 API and the `RuntimeClass` left alpha.
## containerd
Kata requires a [CRI](https://kubernetes.io/docs/concepts/containers/cri/)-compatible container runtime. containerd is commonly used for Kata. We recommend installing containerd using your platform's package distribution mechanism. We recommend at least the latest version of containerd v2.1.x.[^1]
### Debian/Ubuntu
To install on Debian-based systems:
```sh
$ apt update
$ apt install containerd
$ systemctl status containerd
● containerd.service - containerd container runtime
Loaded: loaded (/etc/systemd/system/containerd.service; enabled; preset: enabled)
Drop-In: /etc/systemd/system/containerd.service.d
└─http-proxy.conf
Active: active (running) since Wed 2026-02-25 22:58:13 UTC; 5 days ago
Docs: https://containerd.io
Main PID: 3767885 (containerd)
Tasks: 540
Memory: 70.7G (peak: 70.8G)
CPU: 4h 9min 26.153s
CGroup: /runtime.slice/containerd.service
├─ 12694 /usr/local/bin/container
```
### Fedora/RedHat
To install on Fedora-based systems:
```
$ yum install containerd
```
??? help
Documentation assistance is requested for more specific instructions on Fedora systems.
### Pre-Built Releases
Many Linux distributions will not package the latest versions of containerd. If you find that your distribution provides very old versions of containerd, it's recommended to upgrade with the [pre-built releases](https://github.com/containerd/containerd/releases).
#### Executable
Download the latest release of containerd:
```sh
$ wget https://github.com/containerd/containerd/releases/download/v${VERSION}/containerd-${VERSION}-linux-${PLATFORM}.tar.gz
# Extract to the current directory
$ tar -xf ./containerd*.tar.gz
# Extract to root if you want it installed to its final location.
$ tar -C / -xf ./*.tar.gz
```
### Containerd Config
Containerd requires a config file at `/etc/containerd/config.toml`. This needs to be populated with a simple default config:
```sh
$ /usr/local/bin/containerd config default > /etc/containerd/config.toml
```
### Systemd Unit File
Install the systemd unit file:
```sh
$ wget -O /etc/systemd/system/containerd.service https://raw.githubusercontent.com/containerd/containerd/main/containerd.service
```
!!! info
- You must modify the `ExecStart` line to the location of the installed containerd executable.
- containerd's `PATH` variable must allow it to find `containerd-shim-kata-v2`. You can do this by either creating a symlink from `/usr/local/bin/containerd-shim-kata-v2` to `/opt/kata/bin/containerd-shim-kata-v2` or by modifying containerd's `PATH` variable to search in `/opt/kata/bin/`. See the Environment= command in systemd.exec(5) for further details.
Reload systemd and start containerd:
```sh
$ systemctl daemon-reload
$ systemctl enable --now containerd
$ systemctl start containerd
$ systemctl status containerd
```
More details can be found on the [containerd installation docs](https://github.com/containerd/containerd/blob/main/docs/getting-started.md).
### Enable CRI
If you're using Kubernetes, you must enable the containerd Container Runtime Interface (CRI) plugin:
```sh
$ ctr plugins ls | grep cri
io.containerd.cri.v1 images - ok
io.containerd.cri.v1 runtime linux/amd64 ok
io.containerd.grpc.v1 cri - ok
```
If these are not enabled, you'll need to remove it from the `disabled_plugins` section of the containerd config.
[^1]: Kata makes use of containerd's drop-in config merging in `/etc/containerd/config.d/` which is only available starting from containerd v2. containerd v1 may work, but some Kata features will not work as expected.
## runc
The default `runc` runtime needs to be installed for non-kata containers. More details can be found at the [containerd docs](https://github.com/containerd/containerd/blob/979c80d8a5d7fc7be34102a1ada53ae5a0ff09e8/docs/RUNC.md).

9
docs/requirements.txt Normal file
View File

@@ -0,0 +1,9 @@
mkdocs-materialx==10.0.9
mkdocs-glightbox==0.4.0
mkdocs-macros-plugin==1.5.0
mkdocs-awesome-nav==3.3.0
mkdocs-open-in-new-tab==1.0.8
mkdocs-redirects==1.2.2
CairoSVG==2.9.0
pillow==12.1.1
click==8.2.1

View File

@@ -0,0 +1,56 @@
# Runtime Configuration
The containerd shims (both the Rust and Go implementations) take configuration files to control their behavior. These files are in `/opt/kata/share/defaults/kata-containers/`. An example excerpt:
```toml title="/opt/kata/share/defaults/kata-containers/configuration.toml"
[hypervisor.qemu]
path = "/opt/kata/bin/qemu-system-x86_64"
kernel = "/opt/kata/share/kata-containers/vmlinux.container"
image = "/opt/kata/share/kata-containers/kata-containers.img"
machine_type = "q35"
# rootfs filesystem type:
# - ext4 (default)
# - xfs
# - erofs
rootfs_type = "ext4"
# Enable running QEMU VMM as a non-root user.
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
rootless = false
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = ["enable_iommu", "virtio_fs_extra_args", "kernel_params"]
```
These files should never be modified directly. If you wish to create a modified version of these files, you may create your own [custom runtime](helm-configuration.md#custom-runtimes). For example, to modify the image path, we provide these values to helm:
```yaml title="values.yaml"
customRuntimes:
enabled: true
runtimes:
my-gpu-runtime:
baseConfig: "qemu-nvidia-gpu"
dropIn: |
[hypervisor.qemu]
image = "/path/to/custom-image.img"
runtimeClass: |
kind: RuntimeClass
apiVersion: node.k8s.io/v1
metadata:
name: kata-my-gpu-runtime
labels:
app.kubernetes.io/managed-by: kata-deploy
handler: kata-my-gpu-runtime
overhead:
podFixed:
memory: "640Mi"
cpu: "500m"
scheduling:
nodeSelector:
katacontainers.io/kata-runtime: "true"
```

View File

@@ -175,7 +175,7 @@ specific).
##### Dragonball networking
For Dragonball, the `virtio-net` backend default is within Dragonbasll's VMM.
For Dragonball, the `virtio-net` backend default is within Dragonball's VMM.
#### virtio-vsock

View File

@@ -1,11 +1,12 @@
# Enabling NVIDIA GPU workloads using GPU passthrough with Kata Containers
This page provides:
1. A description of the components involved when running GPU workloads with
Kata Containers using the NVIDIA TEE and non-TEE GPU runtime classes.
1. An explanation of the orchestration flow on a Kubernetes node for this
scenario.
1. A deployment guide enabling to utilize these runtime classes.
1. A deployment guide to utilize these runtime classes.
The goal is to educate readers familiar with Kubernetes and Kata Containers
on NVIDIA's reference implementation which is reflected in Kata CI's build
@@ -18,58 +19,56 @@ Confidential Containers.
> **Note:**
>
> The current supported mode for enabling GPU workloads in the TEE scenario
> is single GPU passthrough (one GPU per pod) on AMD64 platforms (AMD SEV-SNP
> being the only supported TEE scenario so far with support for Intel TDX being
> on the way).
> The currently supported modes for enabling GPU workloads in the TEE
> scenario are: (1) singleGPU passthrough (one physical GPU per pod) and
> (2) multi-GPU passthrough on NVSwitch (NVLink) based HGX systems
> (for example, HGX Hopper (SXM) and HGX Blackwell / HGX B200).
## Component Overview
Before providing deployment guidance, we describe the components involved to
support running GPU workloads. We start from a top to bottom perspective
from the NVIDIA GPU operator via the Kata runtime to the components within
from the NVIDIA GPU Operator via the Kata runtime to the components within
the NVIDIA GPU Utility Virtual Machine (UVM) root filesystem.
### NVIDIA GPU Operator
A central component is the
[NVIDIA GPU operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU operator
[NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) which can be
deployed onto your cluster as a helm chart. Installing the GPU Operator
delivers various operands on your nodes in the form of Kubernetes DaemonSets.
These operands are vital to support the flow of orchestrating pod manifests
using NVIDIA GPU runtime classes with GPU passthrough on your nodes. Without
getting into the details, the most important operands and their
responsibilities are:
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs to the `vfio-pci`
driver for VFIO passthrough.
- **nvidia-vfio-manager:** Binding discovered NVIDIA GPUs and nvswitches to
the `vfio-pci` driver for VFIO passthrough.
- **nvidia-cc-manager:** Transitioning GPUs into confidential computing (CC)
and non-CC mode (see the
[NVIDIA/k8s-cc-manager](https://github.com/NVIDIA/k8s-cc-manager)
repository).
- **nvidia-kata-manager:** Creating host-side CDI specifications for GPU
passthrough, resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu` (see the
[NVIDIA/k8s-kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
repository).
- **nvidia-sandbox-device-plugin** (see the
[NVIDIA/sandbox-device-plugin](https://github.com/NVIDIA/sandbox-device-plugin)
repository):
- Creating host-side CDI specifications for GPU passthrough,
resulting in the file `/var/run/cdi/nvidia.yaml`, containing
`kind: nvidia.com/pgpu`
- Allocating GPUs during pod deployment.
- Discovering NVIDIA GPUs, their capabilities, and advertising these to
the Kubernetes control plane (allocatable resources as type
`nvidia.com/pgpu` resources will appear for the node and GPU Device IDs
will be registered with Kubelet). These GPUs can thus be allocated as
container resources in your pod manifests. See below GPU operator
container resources in your pod manifests. See below GPU Operator
deployment instructions for the use of the key `pgpu`, controlled via a
variable.
To summarize, the GPU operator manages the GPUs on each node, allowing for
To summarize, the GPU Operator manages the GPUs on each node, allowing for
simple orchestration of pod manifests using Kata Containers. Once the cluster
with GPU operator and Kata bits is up and running, the end user can schedule
with GPU Operator and Kata bits is up and running, the end user can schedule
Kata NVIDIA GPU workloads, using resource limits and the
`kata-qemu-nvidia-gpu` or `kata-qemu-nvidia-gpu-snp` runtime classes, for
example:
`kata-qemu-nvidia-gpu`, `kata-qemu-nvidia-gpu-tdx` or
`kata-qemu-nvidia-gpu-snp` runtime classes, for example:
```yaml
apiVersion: v1
@@ -213,7 +212,7 @@ API and kernel drivers, interacting with the pass-through GPU device.
An additional step is exercised in our CI samples: when using images from an
authenticated registry, the guest-pull mechanism triggers attestation using
trustee's Key Broker Service (KBS) for secure release of the NGC API
Trustee's Key Broker Service (KBS) for secure release of the NGC API
authentication key used to access the NVCR container registry. As part of
this, the attestation agent exercises composite attestation and transitions
the GPU into `Ready` state (without this, the GPU has to explicitly be
@@ -232,24 +231,40 @@ NVIDIA GPU CI validation jobs. Note that, this setup:
- uses the genpolicy tool to attach Kata agent security policies to the pod
manifest
- has dedicated (composite) attestation tests, a CUDA vectorAdd test, and a
NIM/RA test sample with secure API key release
NIM/RA test sample with secure API key release using sealed secrets.
A similar deployment guide and scenario description can be found in NVIDIA resources
under
[Early Access: NVIDIA GPU Operator with Confidential Containers based on Kata](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/confidential-containers.html).
[NVIDIA Confidential Containers Overview (Early Access)](https://docs.nvidia.com/datacenter/cloud-native/confidential-containers/latest/overview.html).
### Feature Set
The NVIDIA stack for Kata Containers leverages features for the confidential
computing scenario from both the confidential containers open source project
and from the Kata Containers source tree, such as:
- composite attestation using Trustee and the NVIDIA Remote Attestation
Service NRAS
- generating kata agent security policies using the genpolicy tool
- use of signed sealed secrets
- access to authenticated registries for container image guest-pull
- container image signature verification and encrypted container images
- ephemeral container data and image layer storage
### Requirements
The requirements for the TEE scenario are:
- Ubuntu 25.10 as host OS
- CPU with AMD SEV-SNP support with proper BIOS/UEFI version and settings
- CPU with AMD SEV-SNP or Intel TDX support with proper BIOS/UEFI version
and settings
- CC-capable Hopper/Blackwell GPU with proper VBIOS version.
BIOS and VBIOS configuration is out of scope for this guide. Other resources,
such as the documentation found on the
[NVIDIA Trusted Computing Solutions](https://docs.nvidia.com/nvtrust/index.html)
page and the above linked NVIDIA documentation, provide guidance on
page, on the
[Secure AI Compatibility Matrix](https://www.nvidia.com/en-us/data-center/solutions/confidential-computing/secure-ai-compatibility-matrix/)
page, and on the above linked NVIDIA documentation, provide guidance on
selecting proper hardware and on properly configuring its firmware and OS.
### Installation
@@ -257,12 +272,16 @@ selecting proper hardware and on properly configuring its firmware and OS.
#### Containerd and Kubernetes
First, set up your Kubernetes cluster. For instance, in Kata CI, our NVIDIA
jobs use a single-node vanilla Kubernetes cluster with a 2.x containerd
version and Kata's current supported Kubernetes version. We set this cluster
up using the `deploy_k8s` function from `tests/integration/kubernetes/gha-run.sh`
as follows:
jobs use a single-node vanilla Kubernetes cluster with a 2.1 containerd
version and Kata's current supported Kubernetes version. This cluster is
being set up using the `deploy_k8s` function from the script file
`tests/integration/kubernetes/gha-run.sh`. If you intend to run this script,
follow these steps, and make sure you have `yq` and `helm` installed. Note
that, these scripts query the GitHub API, so creating and declaring a
personal access token prevents rate limiting issues.
You can execute the function as follows:
```bash
$ export GH_TOKEN="<your-gh-pat>"
$ export KUBERNETES="vanilla"
$ export CONTAINER_ENGINE="containerd"
$ export CONTAINER_ENGINE_VERSION="v2.1"
@@ -276,8 +295,11 @@ $ deploy_k8s
> `runtimeRequestTimeout` timeout value than the two minute default timeout.
> Using the guest-pull mechanism, pulling large images may take a significant
> amount of time and may delay container start, possibly leading your Kubelet
> to de-allocate your pod before it transitions from the *container created*
> to the *container running* state.
> to de-allocate your pod before it transitions from the *container creating*
> to the *container running* state. The NVIDIA shim configurations use a
> `create_container_timeout` of 1200s, which is the equivalent value on shim
> side, controlling the time the shim allows for a container to remain in
> *container creating* state.
> **Note:**
>
@@ -291,7 +313,7 @@ $ deploy_k8s
#### GPU Operator
Assuming you have the helm tools installed, deploy the latest version of the
GPU Operator as a helm chart (minimum version: `v25.10.0`):
GPU Operator as a helm chart (minimum version: `v26.3.0`):
```bash
$ helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
@@ -300,33 +322,27 @@ $ helm install --wait --generate-name \
nvidia/gpu-operator \
--set sandboxWorkloads.enabled=true \
--set sandboxWorkloads.defaultWorkload=vm-passthrough \
--set kataManager.enabled=true \
--set kataManager.config.runtimeClasses=null \
--set kataManager.repository=nvcr.io/nvidia/cloud-native \
--set kataManager.image=k8s-kata-manager \
--set kataManager.version=v0.2.4 \
--set ccManager.enabled=true \
--set ccManager.defaultMode=on \
--set ccManager.repository=nvcr.io/nvidia/cloud-native \
--set ccManager.image=k8s-cc-manager \
--set ccManager.version=v0.2.0 \
--set sandboxDevicePlugin.repository=nvcr.io/nvidia/cloud-native \
--set sandboxDevicePlugin.image=nvidia-sandbox-device-plugin \
--set sandboxDevicePlugin.version=v0.0.1 \
--set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \
--set 'sandboxDevicePlugin.env[0].value=pgpu' \
--set sandboxWorkloads.mode=kata \
--set nfd.enabled=true \
--set nfd.nodefeaturerules=true
```
> **Note:**
>
> For heterogeneous clusters with different GPU types, you can omit
> the `P_GPU_ALIAS` environment variable lines. This will cause the sandbox
> device plugin to create GPU model-specific resource types (e.g.,
> `nvidia.com/GH100_H100L_94GB`) instead of the generic `nvidia.com/pgpu`,
> which in turn can be used by pods through respective resource limits.
> For simplicity, this guide uses the generic alias.
> For heterogeneous clusters with different GPU types, you can specify an
> empty `P_GPU_ALIAS` environment variable for the sandbox device plugin:
> `- --set 'sandboxDevicePlugin.env[0].name=P_GPU_ALIAS' \`
> `- --set 'sandboxDevicePlugin.env[0].value=""' \`
> This will cause the sandbox device plugin to create GPU model-specific
> resource types (e.g., `nvidia.com/GH100_H100L_94GB`) instead of the
> default `pgpu` type, which usually results in advertising a resource of
> type `nvidia.com/pgpu`
> The exposed device resource types can be used for pods by specifying
> respective resource limits.
> Your node's nvswitches are exposed as resources of type
> `nvidia.com/nvswitch` by default. Using the variable `NVSWITCH_ALIAS`
> allows to control the advertising behavior similar to the `P_GPU_ALIAS`
> variable.
> **Note:**
>
@@ -351,8 +367,7 @@ $ helm install kata-deploy \
--create-namespace \
-f "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/tags/${VERSION}/tools/packaging/kata-deploy/helm-chart/kata-deploy/try-kata-nvidia-gpu.values.yaml" \
--set nfd.enabled=false \
--set shims.qemu-nvidia-gpu-tdx.enabled=false \
--wait --timeout 10m --atomic \
--wait --timeout 10m \
"${CHART}" --version "${VERSION}"
```
@@ -382,31 +397,22 @@ mode which requires entering a licensing agreement with NVIDIA, see the
### Cluster validation and preparation
If you did not use the `sandboxWorkloads.defaultWorkload=vm-passthrough`
parameter during GPU operator deployment, label your nodes for GPU VM
parameter during GPU Operator deployment, label your nodes for GPU VM
passthrough, for the example of using all nodes for GPU passthrough, run:
```bash
$ kubectl label nodes --all nvidia.com/gpu.workload.config=vm-passthrough --overwrite
```
Check if the `nvidia-cc-manager` pod is running if you intend to run GPU TEE
scenarios. If not, you need to manually label the node as CC capable. Current
GPU Operator node feature rules do not yet recognize all CC capable GPU PCI
IDs. Run the following command:
```bash
$ kubectl label nodes --all nvidia.com/cc.capable=true
```
After this, assure the `nvidia-cc-manager` pod is running. With the suggested
parameters for GPU Operator deployment, the `nvidia-cc-manager` will
automatically transition the GPU into CC mode.
With the suggested parameters for GPU Operator deployment, the
`nvidia-cc-manager` operand will automatically transition the GPU into CC
mode.
After deployment, you can transition your node(s) to the desired CC state,
using either the `on` or `off` value, depending on your scenario. For the
non-CC scenario, transition to the `off` state via:
using either the `on`, `ppcie`, or `off` value, depending on your scenario.
For the non-CC scenario, transition to the `off` state via:
`kubectl label nodes --all nvidia.com/cc.mode=off` and wait until all pods
are back running. When an actual change is exercised, various GPU operator
are back running. When an actual change is exercised, various GPU Operator
operands will be restarted.
Ensure all pods are running:
@@ -425,9 +431,10 @@ $ lspci -nnk -d 10de:
### Run the CUDA vectorAdd sample
Create the following file:
Create the pod manifest with:
```yaml
```bash
$ cat > cuda-vectoradd-kata.yaml.in << 'EOF'
apiVersion: v1
kind: Pod
metadata:
@@ -445,6 +452,7 @@ spec:
limits:
nvidia.com/pgpu: "1"
memory: 16Gi
EOF
```
Depending on your scenario and on the CC state, export your desired runtime
@@ -477,6 +485,17 @@ To stop the pod, run: `kubectl delete pod cuda-vectoradd-kata`.
### Next steps
#### Use multi-GPU passthrough
If you have machines supporting multi-GPU passthrough, use a pod deployment
manifest which uses 8 pgpu and 4 nvswitch resources.
On the NVIDIA Hopper architecture multi-GPU passthrough uses protected PCIe
(PPCIE) which claims exclusive use of the nvswitches for a single CVM. In
this case, transition your relevant node(s) GPU mode to `ppcie` mode.
The NVIDIA Blackwell architecture uses NVLink encryption which places the
switches outside of the Trusted Computing Base (TCB) and so does not
require a separate switch setting.
#### Transition between CC and non-CC mode
Use the previously described node labeling approach to transition between
@@ -492,7 +511,7 @@ and a basic NIM/RAG deployment. Running CI tests for the TEE GPU scenario
requires KBS to be deployed (except for the CUDA vectorAdd test). The best
place to get started running these tests locally is to look into our
[NVIDIA CI workflow manifest](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml)
and into the underling
and into the underlying
[run_kubernetes_nv_tests.sh](https://github.com/kata-containers/kata-containers/blob/main/tests/integration/kubernetes/run_kubernetes_nv_tests.sh)
script. For example, to run the CUDA vectorAdd scenario against the TEE GPU
runtime class use the following commands:
@@ -547,6 +566,22 @@ With GPU passthrough being supported by the
you can use the tool to create a Kata agent security policy. Our CI deploys
all sample pod manifests with a Kata agent security policy.
Note that, using containerd 2.1 in upstream's CI, we use the following
modification to the genpolicy default settings:
```bash
[
{
"op": "replace",
"path": "/kata_config/oci_version",
"value": "1.2.1"
}
]
```
This modification is applied via the genpolicy drop-in configuration file
`src\tools\genpolicy\drop-in-examples\20-oci-1.2.1-drop-in.json`.
When using a newer containerd version, such as containerd 2.2, the OCI
version field needs to be adjusted to "1.3.0", for instance.
#### Deploy pods using your own containers and manifests
You can author pod manifests leveraging your own containers, for instance,
@@ -564,6 +599,3 @@ following annotation in the manifest:
>
> - musl-based container images (e.g., using Alpine), or distro-less
> containers are not supported.
> - for the TEE scenario, only single-GPU passthrough per pod is supported,
> so your pod resource limit must be: `nvidia.com/pgpu: "1"` (on a system
> with multiple GPUs, you can thus pass through one GPU per pod).

91
mkdocs.yaml Normal file
View File

@@ -0,0 +1,91 @@
site_name: "Kata Containers Docs"
site_description: "Developer and user documentation for the Kata Containers project."
site_author: "Kata Containers Community"
repo_url: "https://github.com/kata-containers/kata-containers"
site_url: "https://kata-containers.github.io/kata-containers"
edit_uri: "edit/main/docs/"
repo_name: kata-containers
theme:
name: materialx
favicon: "assets/images/favicon.svg"
logo: "assets/images/favicon.svg"
topbar_style: glass
palette:
- media: "(prefers-color-scheme)"
toggle:
icon: material/brightness-auto
name: Switch to light mode
- media: "(prefers-color-scheme: light)"
scheme: default
primary: blue
accent: light blue
toggle:
icon: material/weather-sunny
name: Switch to dark mode
- media: "(prefers-color-scheme: dark)"
scheme: slate
primary: cyan
accent: cyan
toggle:
icon: material/brightness-4
name: Switch to system preference
features:
- content.action.edit
- content.action.view
- content.code.annotate
- content.code.copy
- content.code.select
- content.footnote.tooltips
- content.tabs.link
- content.tooltips
- navigation.expand
- navigation.indexes
- navigation.path
- navigation.sections
- navigation.tabs
- navigation.tracking
- navigation.top
- navigation.instant
- navigation.instant.prefetch
- navigation.instant.progress
- toc.follow
markdown_extensions:
- abbr
- admonition
- attr_list
- def_list
- footnotes
- md_in_html
- pymdownx.arithmatex:
generic: true
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- pymdownx.details
- pymdownx.highlight:
anchor_linenums: true
line_spans: __span
pygments_lang_class: true
auto_title: true
- pymdownx.keys
- pymdownx.magiclink
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.inlinehilite
- pymdownx.tabbed:
alternate_style: true
- pymdownx.tilde
- pymdownx.caret
- pymdownx.mark
- toc:
permalink: true
plugins:
- search
- awesome-nav

1830
src/agent/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -63,9 +63,9 @@ cgroups = { package = "cgroups-rs", git = "https://github.com/kata-containers/cg
# Tracing
tracing = "0.1.41"
tracing-subscriber = "0.2.18"
tracing-opentelemetry = "0.13.0"
opentelemetry = { version = "0.14.0", features = ["rt-tokio-current-thread"] }
tracing-subscriber = "0.3.20"
tracing-opentelemetry = "0.17.0"
opentelemetry = { version = "0.17.0", features = ["rt-tokio"] }
# Configuration
serde = { version = "1.0.129", features = ["derive"] }
@@ -78,7 +78,6 @@ strum_macros = "0.26.2"
tempfile = "3.19.1"
which = "4.3.0"
rstest = "0.18.0"
async-std = { version = "1.12.0", features = ["attributes"] }
# Local dependencies
kata-agent-policy = { path = "policy" }
@@ -195,7 +194,6 @@ pv_core = { git = "https://github.com/ibm-s390-linux/s390-tools", rev = "4942504
tempfile.workspace = true
which.workspace = true
rstest.workspace = true
async-std.workspace = true
test-utils.workspace = true

View File

@@ -89,7 +89,7 @@ pub fn baremount(
let destination_str = destination.to_string_lossy();
if let Ok(m) = get_linux_mount_info(destination_str.deref()) {
if m.fs_type == fs_type && !flags.contains(MsFlags::MS_REMOUNT) {
slog_info!(logger, "{source:?} is already mounted at {destination:?}");
slog::info!(logger, "{source:?} is already mounted at {destination:?}");
return Ok(());
}
}

View File

@@ -2322,6 +2322,7 @@ async fn cdh_handler_trusted_storage(oci: &mut Spec) -> Result<()> {
&dev_major_minor,
"luks2",
KATA_IMAGE_WORK_DIR,
"-E lazy_journal_init",
)
.await?;
break;
@@ -2336,6 +2337,7 @@ pub(crate) async fn cdh_secure_mount(
device_id: &str,
encrypt_type: &str,
mount_point: &str,
mkfs_opts: &str,
) -> Result<()> {
if !confidential_data_hub::is_cdh_client_initialized() {
return Ok(());
@@ -2345,11 +2347,12 @@ pub(crate) async fn cdh_secure_mount(
info!(
sl(),
"cdh_secure_mount: device_type {}, device_id {}, encrypt_type {}, integrity {}",
"cdh_secure_mount: device_type {}, device_id {}, encrypt_type {}, integrity {}, mkfs_opts {}",
device_type,
device_id,
encrypt_type,
integrity
integrity,
mkfs_opts
);
let options = std::collections::HashMap::from([
@@ -2357,7 +2360,7 @@ pub(crate) async fn cdh_secure_mount(
("sourceType".to_string(), "empty".to_string()),
("targetType".to_string(), "fileSystem".to_string()),
("filesystemType".to_string(), "ext4".to_string()),
("mkfsOpts".to_string(), "-E lazy_journal_init".to_string()),
("mkfsOpts".to_string(), mkfs_opts.to_string()),
("encryptionType".to_string(), encrypt_type.to_string()),
("dataIntegrity".to_string(), integrity),
]);

View File

@@ -59,8 +59,14 @@ async fn handle_block_storage(
.contains(&"encryption_key=ephemeral".to_string());
if has_ephemeral_encryption {
crate::rpc::cdh_secure_mount("block-device", dev_num, "luks2", &storage.mount_point)
.await?;
crate::rpc::cdh_secure_mount(
"block-device",
dev_num,
"luks2",
&storage.mount_point,
"-O ^has_journal -m 0 -i 163840 -I 128",
)
.await?;
set_ownership(logger, storage)?;
new_device(storage.mount_point.clone())
} else {

View File

@@ -5,7 +5,8 @@
use anyhow::Result;
use opentelemetry::sdk::propagation::TraceContextPropagator;
use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
use opentelemetry::trace::TracerProvider;
use opentelemetry::{global, sdk::trace::Config};
use slog::{info, o, Logger};
use std::collections::HashMap;
use tracing_opentelemetry::OpenTelemetryLayer;
@@ -23,15 +24,12 @@ pub fn setup_tracing(name: &'static str, logger: &Logger) -> Result<()> {
let config = Config::default();
let builder = opentelemetry::sdk::trace::TracerProvider::builder()
.with_batch_exporter(exporter, opentelemetry::runtime::TokioCurrentThread)
.with_batch_exporter(exporter, opentelemetry::runtime::Tokio)
.with_config(config);
let provider = builder.build();
// We don't need a versioned tracer.
let version = None;
let tracer = provider.get_tracer(name, version);
let tracer = provider.tracer(name);
let _global_provider = global::set_tracer_provider(provider);

View File

@@ -10,7 +10,7 @@ libc.workspace = true
thiserror.workspace = true
opentelemetry = { workspace = true, features = ["serialize"] }
tokio-vsock.workspace = true
bincode = "1.3.3"
serde_json = "1.0"
byteorder = "1.4.3"
slog = { workspace = true, features = [
"dynamic-keys",

View File

@@ -58,7 +58,7 @@ pub enum Error {
#[error("connection error: {0}")]
ConnectionError(String),
#[error("serialisation error: {0}")]
SerialisationError(#[from] bincode::Error),
SerialisationError(#[from] serde_json::Error),
#[error("I/O error: {0}")]
IOError(#[from] std::io::Error),
}
@@ -81,8 +81,7 @@ async fn write_span(
let mut writer = writer.lock().await;
let encoded_payload: Vec<u8> =
bincode::serialize(&span).map_err(|e| make_io_error(e.to_string()))?;
serde_json::to_vec(span).map_err(|e| make_io_error(e.to_string()))?;
let payload_len: u64 = encoded_payload.len() as u64;
let mut payload_len_as_bytes: [u8; HEADER_SIZE_BYTES as usize] =

View File

@@ -50,6 +50,7 @@ vm-memory = { workspace = true, features = ["backend-mmap"] }
crossbeam-channel = "0.5.6"
vfio-bindings = { workspace = true, optional = true }
vfio-ioctls = { workspace = true, optional = true }
kata-sys-util = { path = "../libs/kata-sys-util" }
[dev-dependencies]
slog-async = "2.7.0"

View File

@@ -1,16 +1,15 @@
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::{Read, Write};
use std::sync::atomic::Ordering;
use std::sync::Arc;
use vm_memory::bitmap::{Bitmap, BS};
use vm_memory::guest_memory::GuestMemoryIterator;
use vm_memory::mmap::{Error, NewBitmap};
use vm_memory::mmap::NewBitmap;
use vm_memory::{
guest_memory, AtomicAccess, Bytes, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion,
GuestRegionMmap, GuestUsize, MemoryRegionAddress, VolatileSlice,
GuestRegionCollectionError, GuestRegionMmap, GuestUsize, MemoryRegionAddress, ReadVolatile,
VolatileSlice, WriteVolatile,
};
use crate::GuestRegionRaw;
@@ -67,63 +66,63 @@ impl<B: Bitmap> Bytes<MemoryRegionAddress> for GuestRegionHybrid<B> {
}
}
fn read_from<F>(
fn read_volatile_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: Read,
F: ReadVolatile,
{
match self {
GuestRegionHybrid::Mmap(region) => region.read_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_from(addr, src, count),
GuestRegionHybrid::Mmap(region) => region.read_volatile_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_volatile_from(addr, src, count),
}
}
fn read_exact_from<F>(
fn read_exact_volatile_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: Read,
F: ReadVolatile,
{
match self {
GuestRegionHybrid::Mmap(region) => region.read_exact_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_exact_from(addr, src, count),
GuestRegionHybrid::Mmap(region) => region.read_exact_volatile_from(addr, src, count),
GuestRegionHybrid::Raw(region) => region.read_exact_volatile_from(addr, src, count),
}
}
fn write_to<F>(
fn write_volatile_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: Write,
F: WriteVolatile,
{
match self {
GuestRegionHybrid::Mmap(region) => region.write_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_to(addr, dst, count),
GuestRegionHybrid::Mmap(region) => region.write_volatile_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_volatile_to(addr, dst, count),
}
}
fn write_all_to<F>(
fn write_all_volatile_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: Write,
F: WriteVolatile,
{
match self {
GuestRegionHybrid::Mmap(region) => region.write_all_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_all_to(addr, dst, count),
GuestRegionHybrid::Mmap(region) => region.write_all_volatile_to(addr, dst, count),
GuestRegionHybrid::Raw(region) => region.write_all_volatile_to(addr, dst, count),
}
}
@@ -168,7 +167,7 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
}
}
fn bitmap(&self) -> &Self::B {
fn bitmap(&self) -> BS<'_, Self::B> {
match self {
GuestRegionHybrid::Mmap(region) => region.bitmap(),
GuestRegionHybrid::Raw(region) => region.bitmap(),
@@ -189,20 +188,6 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
}
}
unsafe fn as_slice(&self) -> Option<&[u8]> {
match self {
GuestRegionHybrid::Mmap(region) => region.as_slice(),
GuestRegionHybrid::Raw(region) => region.as_slice(),
}
}
unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
match self {
GuestRegionHybrid::Mmap(region) => region.as_mut_slice(),
GuestRegionHybrid::Raw(region) => region.as_mut_slice(),
}
}
fn get_slice(
&self,
offset: MemoryRegionAddress,
@@ -223,6 +208,39 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionHybrid<B> {
}
}
impl<B: Bitmap> GuestRegionHybrid<B> {
/// Returns a slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
pub unsafe fn as_slice(&self) -> Option<&[u8]> {
match self {
GuestRegionHybrid::Mmap(region) => {
let addr = region.get_host_address(MemoryRegionAddress(0)).ok()?;
Some(std::slice::from_raw_parts(addr, region.len() as usize))
}
GuestRegionHybrid::Raw(region) => region.as_slice(),
}
}
/// Returns a mutable slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
#[allow(clippy::mut_from_ref)]
pub unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
match self {
GuestRegionHybrid::Mmap(region) => {
let addr = region.get_host_address(MemoryRegionAddress(0)).ok()?;
Some(std::slice::from_raw_parts_mut(addr, region.len() as usize))
}
GuestRegionHybrid::Raw(region) => region.as_mut_slice(),
}
}
}
/// [`GuestMemory`](trait.GuestMemory.html) implementation that manage hybrid types of guest memory
/// regions.
///
@@ -248,7 +266,9 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
/// * `regions` - The vector of regions.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
pub fn from_regions(mut regions: Vec<GuestRegionHybrid<B>>) -> Result<Self, Error> {
pub fn from_regions(
mut regions: Vec<GuestRegionHybrid<B>>,
) -> Result<Self, GuestRegionCollectionError> {
Self::from_arc_regions(regions.drain(..).map(Arc::new).collect())
}
@@ -264,9 +284,11 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
/// * `regions` - The vector of `Arc` regions.
/// The regions shouldn't overlap and they should be sorted
/// by the starting address.
pub fn from_arc_regions(regions: Vec<Arc<GuestRegionHybrid<B>>>) -> Result<Self, Error> {
pub fn from_arc_regions(
regions: Vec<Arc<GuestRegionHybrid<B>>>,
) -> Result<Self, GuestRegionCollectionError> {
if regions.is_empty() {
return Err(Error::NoMemoryRegion);
return Err(GuestRegionCollectionError::NoMemoryRegion);
}
for window in regions.windows(2) {
@@ -274,11 +296,11 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
let next = &window[1];
if prev.start_addr() > next.start_addr() {
return Err(Error::UnsortedMemoryRegions);
return Err(GuestRegionCollectionError::UnsortedMemoryRegions);
}
if prev.last_addr() >= next.start_addr() {
return Err(Error::MemoryRegionOverlap);
return Err(GuestRegionCollectionError::MemoryRegionOverlap);
}
}
@@ -292,7 +314,7 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
pub fn insert_region(
&self,
region: Arc<GuestRegionHybrid<B>>,
) -> Result<GuestMemoryHybrid<B>, Error> {
) -> Result<GuestMemoryHybrid<B>, GuestRegionCollectionError> {
let mut regions = self.regions.clone();
regions.push(region);
regions.sort_by_key(|x| x.start_addr());
@@ -310,7 +332,7 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
&self,
base: GuestAddress,
size: GuestUsize,
) -> Result<(GuestMemoryHybrid<B>, Arc<GuestRegionHybrid<B>>), Error> {
) -> Result<(GuestMemoryHybrid<B>, Arc<GuestRegionHybrid<B>>), GuestRegionCollectionError> {
if let Ok(region_index) = self.regions.binary_search_by_key(&base, |x| x.start_addr()) {
if self.regions.get(region_index).unwrap().len() as GuestUsize == size {
let mut regions = self.regions.clone();
@@ -319,32 +341,13 @@ impl<B: Bitmap> GuestMemoryHybrid<B> {
}
}
Err(Error::InvalidGuestRegion)
Err(GuestRegionCollectionError::NoMemoryRegion)
}
}
/// An iterator over the elements of `GuestMemoryHybrid`.
///
/// This struct is created by `GuestMemory::iter()`. See its documentation for more.
pub struct Iter<'a, B>(std::slice::Iter<'a, Arc<GuestRegionHybrid<B>>>);
impl<'a, B> Iterator for Iter<'a, B> {
type Item = &'a GuestRegionHybrid<B>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(AsRef::as_ref)
}
}
impl<'a, B: 'a> GuestMemoryIterator<'a, GuestRegionHybrid<B>> for GuestMemoryHybrid<B> {
type Iter = Iter<'a, B>;
}
impl<B: Bitmap + 'static> GuestMemory for GuestMemoryHybrid<B> {
type R = GuestRegionHybrid<B>;
type I = Self;
fn num_regions(&self) -> usize {
self.regions.len()
}
@@ -359,15 +362,15 @@ impl<B: Bitmap + 'static> GuestMemory for GuestMemoryHybrid<B> {
index.map(|x| self.regions[x].as_ref())
}
fn iter(&self) -> Iter<'_, B> {
Iter(self.regions.iter())
fn iter(&self) -> impl Iterator<Item = &GuestRegionHybrid<B>> {
self.regions.iter().map(AsRef::as_ref)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Seek;
use std::io::{Read, Seek, Write};
use vm_memory::{GuestMemoryError, MmapRegion};
use vmm_sys_util::tempfile::TempFile;
@@ -654,14 +657,14 @@ mod tests {
// Rewind file pointer after write operation.
file_to_write_mmap_region.rewind().unwrap();
guest_region
.read_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.read_volatile_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.unwrap();
let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file();
file_read_from_mmap_region
.set_len(size_of_file as u64)
.unwrap();
guest_region
.write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_volatile_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.unwrap();
// Rewind file pointer after write operation.
file_read_from_mmap_region.rewind().unwrap();
@@ -679,7 +682,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.read_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.read_volatile_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -689,7 +692,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.write_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.write_volatile_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -719,14 +722,14 @@ mod tests {
// Rewind file pointer after write operation.
file_to_write_mmap_region.rewind().unwrap();
guest_region
.read_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.read_volatile_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.unwrap();
let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file();
file_read_from_mmap_region
.set_len(size_of_file as u64)
.unwrap();
guest_region
.write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_volatile_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.unwrap();
// Rewind file pointer after write operation.
file_read_from_mmap_region.rewind().unwrap();
@@ -744,7 +747,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.read_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.read_volatile_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -754,7 +757,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_region
.write_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.write_volatile_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -788,14 +791,14 @@ mod tests {
.unwrap();
file_to_write_mmap_region.rewind().unwrap();
guest_mmap_region
.read_exact_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.read_exact_volatile_from(write_addr, &mut file_to_write_mmap_region, size_of_file)
.unwrap();
let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file();
file_read_from_mmap_region
.set_len(size_of_file as u64)
.unwrap();
guest_mmap_region
.write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_volatile_to(write_addr, &mut file_read_from_mmap_region, size_of_file)
.unwrap();
file_read_from_mmap_region.rewind().unwrap();
let mut content = String::new();
@@ -818,14 +821,14 @@ mod tests {
.unwrap();
file_to_write_raw_region.rewind().unwrap();
guest_raw_region
.read_exact_from(write_addr, &mut file_to_write_raw_region, size_of_file)
.read_exact_volatile_from(write_addr, &mut file_to_write_raw_region, size_of_file)
.unwrap();
let mut file_read_from_raw_region = TempFile::new().unwrap().into_file();
file_read_from_raw_region
.set_len(size_of_file as u64)
.unwrap();
guest_raw_region
.write_all_to(write_addr, &mut file_read_from_raw_region, size_of_file)
.write_all_volatile_to(write_addr, &mut file_read_from_raw_region, size_of_file)
.unwrap();
file_read_from_raw_region.rewind().unwrap();
let mut content = String::new();
@@ -842,7 +845,11 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_mmap_region
.read_exact_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file)
.read_exact_volatile_from(
invalid_addr,
&mut file_to_write_mmap_region,
size_of_file
)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -852,7 +859,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_mmap_region
.write_all_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.write_all_volatile_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -862,7 +869,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_raw_region
.read_exact_from(invalid_addr, &mut file_to_write_raw_region, size_of_file)
.read_exact_volatile_from(invalid_addr, &mut file_to_write_raw_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -872,7 +879,7 @@ mod tests {
let invalid_addr = MemoryRegionAddress(0x900);
assert!(matches!(
guest_raw_region
.write_all_to(invalid_addr, &mut file_read_from_raw_region, size_of_file)
.write_all_volatile_to(invalid_addr, &mut file_read_from_raw_region, size_of_file)
.err()
.unwrap(),
GuestMemoryError::InvalidBackendAddress
@@ -1076,13 +1083,16 @@ mod tests {
let guest_region = GuestMemoryHybrid::<()>::from_regions(regions);
assert!(matches!(
guest_region.err().unwrap(),
Error::UnsortedMemoryRegions
GuestRegionCollectionError::UnsortedMemoryRegions
));
// Error no memory region case.
let regions = Vec::<GuestRegionHybrid<()>>::new();
let guest_region = GuestMemoryHybrid::<()>::from_regions(regions);
assert!(matches!(guest_region.err().unwrap(), Error::NoMemoryRegion));
assert!(matches!(
guest_region.err().unwrap(),
GuestRegionCollectionError::NoMemoryRegion
));
}
#[test]

View File

@@ -1,7 +1,6 @@
// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::{Read, Write};
use std::sync::atomic::Ordering;
use vm_memory::bitmap::{Bitmap, BS};
@@ -9,7 +8,7 @@ use vm_memory::mmap::NewBitmap;
use vm_memory::volatile_memory::compute_offset;
use vm_memory::{
guest_memory, volatile_memory, Address, AtomicAccess, Bytes, FileOffset, GuestAddress,
GuestMemoryRegion, GuestUsize, MemoryRegionAddress, VolatileSlice,
GuestMemoryRegion, GuestUsize, MemoryRegionAddress, ReadVolatile, VolatileSlice, WriteVolatile,
};
/// Guest memory region for virtio-fs DAX window.
@@ -73,67 +72,67 @@ impl<B: Bitmap> Bytes<MemoryRegionAddress> for GuestRegionRaw<B> {
.map_err(Into::into)
}
fn read_from<F>(
fn read_volatile_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: Read,
F: ReadVolatile,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.read_from::<F>(maddr, src, count)
.read_volatile_from::<F>(maddr, src, count)
.map_err(Into::into)
}
fn read_exact_from<F>(
fn read_exact_volatile_from<F>(
&self,
addr: MemoryRegionAddress,
src: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: Read,
F: ReadVolatile,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.read_exact_from::<F>(maddr, src, count)
.read_exact_volatile_from::<F>(maddr, src, count)
.map_err(Into::into)
}
fn write_to<F>(
fn write_volatile_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<usize>
where
F: Write,
F: WriteVolatile,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.write_to::<F>(maddr, dst, count)
.write_volatile_to::<F>(maddr, dst, count)
.map_err(Into::into)
}
fn write_all_to<F>(
fn write_all_volatile_to<F>(
&self,
addr: MemoryRegionAddress,
dst: &mut F,
count: usize,
) -> guest_memory::Result<()>
where
F: Write,
F: WriteVolatile,
{
let maddr = addr.raw_value() as usize;
self.as_volatile_slice()
.unwrap()
.write_all_to::<F>(maddr, dst, count)
.write_all_volatile_to::<F>(maddr, dst, count)
.map_err(Into::into)
}
@@ -170,8 +169,8 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
self.guest_base
}
fn bitmap(&self) -> &Self::B {
&self.bitmap
fn bitmap(&self) -> BS<'_, Self::B> {
self.bitmap.slice_at(0)
}
fn get_host_address(&self, addr: MemoryRegionAddress) -> guest_memory::Result<*mut u8> {
@@ -186,18 +185,6 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
None
}
unsafe fn as_slice(&self) -> Option<&[u8]> {
// This is safe because we mapped the area at addr ourselves, so this slice will not
// overflow. However, it is possible to alias.
Some(std::slice::from_raw_parts(self.addr, self.size))
}
unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
// This is safe because we mapped the area at addr ourselves, so this slice will not
// overflow. However, it is possible to alias.
Some(std::slice::from_raw_parts_mut(self.addr, self.size))
}
fn get_slice(
&self,
offset: MemoryRegionAddress,
@@ -216,6 +203,7 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
(self.addr as usize + offset) as *mut _,
count,
self.bitmap.slice_at(offset),
None,
)
})
}
@@ -226,6 +214,27 @@ impl<B: Bitmap> GuestMemoryRegion for GuestRegionRaw<B> {
}
}
impl<B: Bitmap> GuestRegionRaw<B> {
/// Returns a slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
pub unsafe fn as_slice(&self) -> Option<&[u8]> {
Some(std::slice::from_raw_parts(self.addr, self.size))
}
/// Returns a mutable slice corresponding to the region.
///
/// # Safety
/// This is safe because we mapped the area at addr ourselves, so this slice will not
/// overflow. However, it is possible to alias.
#[allow(clippy::mut_from_ref)]
pub unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> {
Some(std::slice::from_raw_parts_mut(self.addr, self.size))
}
}
#[cfg(test)]
mod tests {
extern crate vmm_sys_util;
@@ -348,7 +357,7 @@ mod tests {
unsafe { GuestRegionRaw::<()>::new(GuestAddress(0x10_0000), &mut buf as *mut _, 1024) };
let s = m.get_slice(MemoryRegionAddress(2), 3).unwrap();
assert_eq!(s.as_ptr(), &mut buf[2] as *mut _);
assert_eq!(s.ptr_guard().as_ptr(), &buf[2] as *const _);
}
/*
@@ -600,7 +609,7 @@ mod tests {
File::open(Path::new("c:\\Windows\\system32\\ntoskrnl.exe")).unwrap()
};
gm.write_obj(!0u32, addr).unwrap();
gm.read_exact_from(addr, &mut file, mem::size_of::<u32>())
gm.read_exact_volatile_from(addr, &mut file, mem::size_of::<u32>())
.unwrap();
let value: u32 = gm.read_obj(addr).unwrap();
if cfg!(unix) {
@@ -610,7 +619,7 @@ mod tests {
}
let mut sink = Vec::new();
gm.write_all_to(addr, &mut sink, mem::size_of::<u32>())
gm.write_all_volatile_to(addr, &mut sink, mem::size_of::<u32>())
.unwrap();
if cfg!(unix) {
assert_eq!(sink, vec![0; mem::size_of::<u32>()]);

View File

@@ -113,20 +113,23 @@ arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5);
/// * `mem` - Reserved DRAM for current VM.
pub fn setup_regs(vcpu: &VcpuFd, cpu_id: u8, boot_ip: u64, fdt_address: u64) -> Result<()> {
// Get the register index of the PSTATE (Processor State) register.
vcpu.set_one_reg(arm64_core_reg!(pstate), PSTATE_FAULT_BITS_64 as u128)
.map_err(Error::SetCoreRegister)?;
vcpu.set_one_reg(
arm64_core_reg!(pstate),
&(PSTATE_FAULT_BITS_64 as u128).to_le_bytes(),
)
.map_err(Error::SetCoreRegister)?;
// Other vCPUs are powered off initially awaiting PSCI wakeup.
if cpu_id == 0 {
// Setting the PC (Processor Counter) to the current program address (kernel address).
vcpu.set_one_reg(arm64_core_reg!(pc), boot_ip as u128)
vcpu.set_one_reg(arm64_core_reg!(pc), &(boot_ip as u128).to_le_bytes())
.map_err(Error::SetCoreRegister)?;
// Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
// "The device tree blob (dtb) must be placed on an 8-byte boundary and must
// not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
// We are choosing to place it the end of DRAM. See `get_fdt_addr`.
vcpu.set_one_reg(arm64_core_reg!(regs), fdt_address as u128)
vcpu.set_one_reg(arm64_core_reg!(regs), &(fdt_address as u128).to_le_bytes())
.map_err(Error::SetCoreRegister)?;
}
Ok(())
@@ -157,9 +160,10 @@ pub fn is_system_register(regid: u64) -> bool {
///
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
pub fn read_mpidr(vcpu: &VcpuFd) -> Result<u64> {
vcpu.get_one_reg(MPIDR_EL1)
.map(|value| value as u64)
.map_err(Error::GetSysRegister)
let mut reg_data = 0u128.to_le_bytes();
vcpu.get_one_reg(MPIDR_EL1, &mut reg_data)
.map_err(Error::GetSysRegister)?;
Ok(u128::from_le_bytes(reg_data) as u64)
}
#[cfg(test)]

View File

@@ -10,10 +10,10 @@ This repository contains the following submodules:
| Name | Arch| Description |
| --- | --- | --- |
| [`bootparam`](src/x86_64/bootparam.rs) | x86_64 | Magic addresses externally used to lay out x86_64 VMs |
| [fdt](src/aarch64/fdt.rs) | aarch64| Create FDT for Aarch64 systems |
| [layout](src/x86_64/layout.rs) | x86_64 | x86_64 layout constants |
| [layout](src/aarch64/layout.rs/) | aarch64 | aarch64 layout constants |
| [mptable](src/x86_64/mptable.rs) | x86_64 | MP Table configurations used for defining VM boot status |
| [`fdt`](src/aarch64/fdt.rs) | aarch64| Create FDT for Aarch64 systems |
| [`layout`](src/x86_64/layout.rs) | x86_64 | x86_64 layout constants |
| [`layout`](src/aarch64/layout.rs/) | aarch64 | aarch64 layout constants |
| [`mptable`](src/x86_64/mptable.rs) | x86_64 | MP Table configurations used for defining VM boot status |
## Acknowledgement

View File

@@ -10,7 +10,6 @@
use libc::c_char;
use std::collections::HashMap;
use std::io;
use std::mem;
use std::result;
use std::slice;
@@ -205,7 +204,7 @@ pub fn setup_mptable<M: GuestMemory>(
return Err(Error::AddressOverflow);
}
mem.read_from(base_mp, &mut io::repeat(0), mp_size)
mem.write_slice(&vec![0u8; mp_size], base_mp)
.map_err(|_| Error::Clear)?;
{
@@ -452,23 +451,11 @@ mod tests {
let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr));
let mpc_table: MpcTableWrapper = mem.read_obj(mpc_offset).unwrap();
struct Sum(u8);
impl io::Write for Sum {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
for v in buf.iter() {
self.0 = self.0.wrapping_add(*v);
}
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
let mut sum = Sum(0);
mem.write_to(mpc_offset, &mut sum, mpc_table.0.length as usize)
let mut buf = Vec::new();
mem.write_volatile_to(mpc_offset, &mut buf, mpc_table.0.length as usize)
.unwrap();
assert_eq!(sum.0, 0);
let sum: u8 = buf.iter().fold(0u8, |acc, &v| acc.wrapping_add(v));
assert_eq!(sum, 0);
}
#[test]

View File

@@ -25,7 +25,7 @@ use std::collections::HashMap;
use std::io::{Error, ErrorKind};
use std::sync::{Arc, Mutex};
use kvm_bindings::{kvm_irq_routing, kvm_irq_routing_entry};
use kvm_bindings::{kvm_irq_routing_entry, KvmIrqRouting as KvmIrqRoutingWrapper};
use kvm_ioctls::VmFd;
use super::*;
@@ -196,26 +196,18 @@ impl KvmIrqRouting {
}
fn set_routing(&self, routes: &HashMap<u64, kvm_irq_routing_entry>) -> Result<()> {
// Allocate enough buffer memory.
let elem_sz = std::mem::size_of::<kvm_irq_routing>();
let total_sz = std::mem::size_of::<kvm_irq_routing_entry>() * routes.len() + elem_sz;
let elem_cnt = total_sz.div_ceil(elem_sz);
let mut irq_routings = Vec::<kvm_irq_routing>::with_capacity(elem_cnt);
irq_routings.resize_with(elem_cnt, Default::default);
let mut irq_routing = KvmIrqRoutingWrapper::new(routes.len())
.map_err(|_| Error::other("Failed to create KvmIrqRouting"))?;
// Prepare the irq_routing header.
let irq_routing = &mut irq_routings[0];
irq_routing.nr = routes.len() as u32;
irq_routing.flags = 0;
// Safe because we have just allocated enough memory above.
let irq_routing_entries = unsafe { irq_routing.entries.as_mut_slice(routes.len()) };
for (idx, entry) in routes.values().enumerate() {
irq_routing_entries[idx] = *entry;
{
let irq_routing_entries = irq_routing.as_mut_slice();
for (idx, entry) in routes.values().enumerate() {
irq_routing_entries[idx] = *entry;
}
}
self.vm_fd
.set_gsi_routing(irq_routing)
.set_gsi_routing(&irq_routing)
.map_err(from_sys_util_errno)?;
Ok(())

View File

@@ -3,7 +3,7 @@
This crate is a collection of modules that provides helpers and utilities to create a TDX Dragonball VM.
Currently this crate involves:
- tdx-ioctls
- `tdx-ioctls`
## Acknowledgement

View File

@@ -11,7 +11,7 @@ use kvm_bindings::{CpuId, __IncompleteArrayField, KVMIO};
use thiserror::Error;
use vmm_sys_util::fam::{FamStruct, FamStructWrapper};
use vmm_sys_util::ioctl::ioctl_with_val;
use vmm_sys_util::{generate_fam_struct_impl, ioctl_ioc_nr, ioctl_iowr_nr};
use vmm_sys_util::{generate_fam_struct_impl, ioctl_iowr_nr};
/// Tdx capability list.
pub type TdxCaps = FamStructWrapper<TdxCapabilities>;

View File

@@ -13,7 +13,7 @@ use std::os::raw::*;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val};
use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr};
use vmm_sys_util::ioctl_iow_nr;
use crate::net::net_gen;

View File

@@ -23,15 +23,15 @@ dbs-address-space = { workspace = true }
dbs-boot = { workspace = true }
epoll = ">=4.3.1, <4.3.2"
io-uring = "0.5.2"
fuse-backend-rs = { version = "0.10.5", optional = true }
fuse-backend-rs = { version = "0.14.0", optional = true }
kvm-bindings = { workspace = true }
kvm-ioctls = { workspace = true }
libc = "0.2.119"
log = "0.4.14"
nix = "0.24.3"
nydus-api = "0.3.1"
nydus-rafs = "0.3.2"
nydus-storage = "0.6.4"
nydus-api = "0.4.1"
nydus-rafs = "0.4.1"
nydus-storage = "0.7.2"
rlimit = "0.7.0"
serde = "1.0.27"
serde_json = "1.0.9"
@@ -42,8 +42,9 @@ virtio-queue = { workspace = true }
vmm-sys-util = { workspace = true }
vm-memory = { workspace = true, features = ["backend-mmap"] }
sendfd = "0.4.3"
vhost-rs = { version = "0.6.1", package = "vhost", optional = true }
vhost-rs = { version = "0.15.0", package = "vhost", optional = true }
timerfd = "1.0"
kata-sys-util = { workspace = true}
[dev-dependencies]
vm-memory = { workspace = true, features = ["backend-mmap", "backend-atomic"] }
@@ -63,7 +64,7 @@ virtio-fs-pro = [
]
virtio-mem = ["virtio-mmio"]
virtio-balloon = ["virtio-mmio"]
vhost = ["virtio-mmio", "vhost-rs/vhost-user-master", "vhost-rs/vhost-kern"]
vhost = ["virtio-mmio", "vhost-rs/vhost-user-frontend", "vhost-rs/vhost-kern"]
vhost-net = ["vhost", "vhost-rs/vhost-net"]
vhost-user = ["vhost"]
vhost-user-fs = ["vhost-user"]

View File

@@ -34,7 +34,7 @@ use dbs_utils::epoll_manager::{
use dbs_utils::metric::{IncMetric, SharedIncMetric, SharedStoreMetric, StoreMetric};
use log::{debug, error, info, trace};
use serde::Serialize;
use virtio_bindings::bindings::virtio_blk::VIRTIO_F_VERSION_1;
use virtio_bindings::bindings::virtio_config::VIRTIO_F_VERSION_1;
use virtio_queue::{QueueOwnedT, QueueSync, QueueT};
use vm_memory::{
ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion,

View File

@@ -20,6 +20,7 @@ use dbs_utils::{
};
use log::{debug, error, info, warn};
use virtio_bindings::bindings::virtio_blk::*;
use virtio_bindings::bindings::virtio_config::VIRTIO_F_VERSION_1;
use virtio_queue::QueueT;
use vm_memory::GuestMemoryRegion;
use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};

View File

@@ -2,13 +2,13 @@
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
use std::io::{self, Seek, SeekFrom, Write};
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::ops::Deref;
use std::result;
use log::error;
use virtio_bindings::bindings::virtio_blk::*;
use virtio_queue::{Descriptor, DescriptorChain};
use virtio_queue::{desc::split::Descriptor, DescriptorChain};
use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryError};
use crate::{
@@ -231,13 +231,19 @@ impl Request {
for io in data_descs {
match self.request_type {
RequestType::In => {
mem.read_from(GuestAddress(io.data_addr), disk, io.data_len)
let mut buf = vec![0u8; io.data_len];
disk.read_exact(&mut buf)
.map_err(|e| ExecuteError::Read(GuestMemoryError::IOError(e)))?;
mem.write_slice(&buf, GuestAddress(io.data_addr))
.map_err(ExecuteError::Read)?;
len += io.data_len;
}
RequestType::Out => {
mem.write_to(GuestAddress(io.data_addr), disk, io.data_len)
let mut buf = vec![0u8; io.data_len];
mem.read_slice(&mut buf, GuestAddress(io.data_addr))
.map_err(ExecuteError::Write)?;
disk.write_all(&buf)
.map_err(|e| ExecuteError::Write(GuestMemoryError::IOError(e)))?;
}
RequestType::Flush => match disk.flush() {
Ok(_) => {}

View File

@@ -2,6 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use kata_sys_util::netns::NetnsGuard;
use std::any::Any;
use std::collections::HashMap;
use std::ffi::CString;
@@ -29,7 +30,7 @@ use nydus_api::ConfigV2;
use nydus_rafs::blobfs::{BlobFs, Config as BlobfsConfig};
use nydus_rafs::{fs::Rafs, RafsIoRead};
use rlimit::Resource;
use virtio_bindings::bindings::virtio_blk::VIRTIO_F_VERSION_1;
use virtio_bindings::bindings::virtio_config::VIRTIO_F_VERSION_1;
use virtio_queue::QueueT;
use vm_memory::{
FileOffset, GuestAddress, GuestAddressSpace, GuestRegionMmap, GuestUsize, MmapRegion,
@@ -233,6 +234,7 @@ impl<AS: GuestAddressSpace> VirtioFs<AS> {
CachePolicy::Always => Duration::from_secs(CACHE_ALWAYS_TIMEOUT),
CachePolicy::Never => Duration::from_secs(CACHE_NONE_TIMEOUT),
CachePolicy::Auto => Duration::from_secs(CACHE_AUTO_TIMEOUT),
CachePolicy::Metadata => Duration::from_secs(CACHE_AUTO_TIMEOUT),
}
}
@@ -453,6 +455,11 @@ impl<AS: GuestAddressSpace> VirtioFs<AS> {
prefetch_list_path: Option<String>,
) -> FsResult<()> {
debug!("http_server rafs");
// We need to make sure the nydus worker thread in the runD main process's network namespace
// instead of the vmm thread's netns, which wouldn't access the host network.
let _netns_guard =
NetnsGuard::new("/proc/self/ns/net").map_err(|e| FsError::BackendFs(e.to_string()))?;
let file = Path::new(&source);
let (mut rafs, rafs_cfg) = match config.as_ref() {
Some(cfg) => {
@@ -541,7 +548,7 @@ impl<AS: GuestAddressSpace> VirtioFs<AS> {
)));
}
};
let any_fs = rootfs.deref().as_any();
let any_fs = rootfs.0.deref().as_any();
if let Some(fs_swap) = any_fs.downcast_ref::<Rafs>() {
let mut file = <dyn RafsIoRead>::from_file(&source)
.map_err(|e| FsError::BackendFs(format!("RafsIoRead failed: {e:?}")))?;
@@ -611,8 +618,7 @@ impl<AS: GuestAddressSpace> VirtioFs<AS> {
};
let region = Arc::new(
GuestRegionMmap::new(mmap_region, GuestAddress(guest_addr))
.map_err(Error::InsertMmap)?,
GuestRegionMmap::new(mmap_region, GuestAddress(guest_addr)).ok_or(Error::InsertMmap)?,
);
self.handler.insert_region(region.clone())?;

View File

@@ -245,8 +245,8 @@ pub enum Error {
#[error("set user memory region failed: {0}")]
SetUserMemoryRegion(kvm_ioctls::Error),
/// Inserting mmap region failed.
#[error("inserting mmap region failed: {0}")]
InsertMmap(vm_memory::mmap::Error),
#[error("inserting mmap region failed")]
InsertMmap,
/// Failed to set madvise on guest memory region.
#[error("failed to set madvice() on guest memory region")]
Madvise(#[source] nix::Error),

View File

@@ -30,7 +30,7 @@ use dbs_utils::epoll_manager::{
};
use kvm_ioctls::VmFd;
use log::{debug, error, info, trace, warn};
use virtio_bindings::bindings::virtio_blk::VIRTIO_F_VERSION_1;
use virtio_bindings::bindings::virtio_config::VIRTIO_F_VERSION_1;
use virtio_queue::{DescriptorChain, QueueOwnedT, QueueSync, QueueT};
use vm_memory::{
ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryError,
@@ -1389,7 +1389,7 @@ pub(crate) mod tests {
.map_err(Error::NewMmapRegion)?;
let region =
Arc::new(GuestRegionMmap::new(mmap_region, guest_addr).map_err(Error::InsertMmap)?);
Arc::new(GuestRegionMmap::new(mmap_region, guest_addr).ok_or(Error::InsertMmap)?);
Ok(region)
}

View File

@@ -22,6 +22,7 @@ use dbs_utils::net::{net_gen, MacAddr, Tap};
use dbs_utils::rate_limiter::{BucketUpdate, RateLimiter, TokenType};
use libc;
use log::{debug, error, info, trace, warn};
use virtio_bindings::bindings::virtio_config::VIRTIO_F_VERSION_1;
use virtio_bindings::bindings::virtio_net::*;
use virtio_queue::{QueueOwnedT, QueueSync, QueueT};
use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryRegion, GuestRegionMmap};

View File

@@ -6,7 +6,7 @@ use log::{debug, error, warn};
use virtio_bindings::bindings::virtio_net::{
virtio_net_ctrl_hdr, virtio_net_ctrl_mq, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
};
use virtio_queue::{Descriptor, DescriptorChain};
use virtio_queue::{desc::split::Descriptor, DescriptorChain};
use vm_memory::{Bytes, GuestMemory};
use crate::{DbsGuestAddressSpace, Error as VirtioError, Result as VirtioResult};

View File

@@ -26,6 +26,7 @@ use vhost_rs::vhost_user::message::VhostUserVringAddrFlags;
#[cfg(not(test))]
use vhost_rs::VhostBackend;
use vhost_rs::{VhostUserMemoryRegionInfo, VringConfigData};
use virtio_bindings::bindings::virtio_config::{VIRTIO_F_NOTIFY_ON_EMPTY, VIRTIO_F_VERSION_1};
use virtio_bindings::bindings::virtio_net::*;
use virtio_bindings::bindings::virtio_ring::*;
use virtio_queue::{DescriptorChain, QueueT};

View File

@@ -25,7 +25,7 @@ use vhost_rs::vhost_user::message::{
VhostUserConfigFlags, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
VHOST_USER_CONFIG_OFFSET,
};
use vhost_rs::vhost_user::{Master, VhostUserMaster};
use vhost_rs::vhost_user::{Frontend, VhostUserFrontend};
use vhost_rs::{Error as VhostError, VhostBackend};
use virtio_bindings::bindings::virtio_blk::{VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_SEG_MAX};
use virtio_queue::QueueT;
@@ -231,7 +231,7 @@ impl VhostUserBlockDevice {
info!("vhost-user-blk: try to connect to {vhost_socket:?}");
// Connect to the vhost-user socket.
let mut master = Master::connect(&vhost_socket, 1).map_err(VirtIoError::VhostError)?;
let mut master = Frontend::connect(&vhost_socket, 1).map_err(VirtIoError::VhostError)?;
info!("vhost-user-blk: get features");
let avail_features = master.get_features().map_err(VirtIoError::VhostError)?;
@@ -290,11 +290,11 @@ impl VhostUserBlockDevice {
})
}
fn reconnect_to_server(&mut self) -> VirtIoResult<Master> {
fn reconnect_to_server(&mut self) -> VirtIoResult<Frontend> {
if !Path::new(self.vhost_socket.as_str()).exists() {
return Err(VirtIoError::InternalError);
}
let master = Master::connect(&self.vhost_socket, 1).map_err(VirtIoError::VhostError)?;
let master = Frontend::connect(&self.vhost_socket, 1).map_err(VirtIoError::VhostError)?;
Ok(master)
}
@@ -360,7 +360,7 @@ impl VhostUserBlockDevice {
if !Path::new(self.vhost_socket.as_str()).exists() {
return Err(ActivateError::InternalError);
}
let master = Master::connect(String::from(self.vhost_socket.as_str()), 1)
let master = Frontend::connect(String::from(self.vhost_socket.as_str()), 1)
.map_err(VirtIoError::VhostError)?;
self.endpoint.set_master(master);
@@ -388,7 +388,7 @@ impl VhostUserBlockDevice {
R: GuestMemoryRegion + Send + Sync + 'static,
>(
&mut self,
master: Master,
master: Frontend,
config: EndpointParam<AS, Q, R>,
ops: &mut EventOps,
) -> std::result::Result<(), VirtIoError> {

View File

@@ -10,10 +10,10 @@ use dbs_utils::epoll_manager::{EventOps, EventSet, Events};
use log::*;
use vhost_rs::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVringAddrFlags};
use vhost_rs::vhost_user::{
Error as VhostUserError, Listener as VhostUserListener, Master, VhostUserMaster,
Error as VhostUserError, Frontend, Listener as VhostUserListener, VhostUserFrontend,
};
use vhost_rs::{Error as VhostError, VhostBackend, VhostUserMemoryRegionInfo, VringConfigData};
use virtio_bindings::bindings::virtio_net::VIRTIO_F_RING_PACKED;
use virtio_bindings::bindings::virtio_config::VIRTIO_F_RING_PACKED;
use virtio_queue::QueueT;
use vm_memory::{
Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, MemoryRegionAddress,
@@ -50,7 +50,7 @@ impl Listener {
}
// Wait for an incoming connection until success.
pub fn accept(&self) -> VirtioResult<(Master, u64)> {
pub fn accept(&self) -> VirtioResult<(Frontend, u64)> {
loop {
match self.try_accept() {
Ok(Some((master, mut feature))) => {
@@ -65,14 +65,14 @@ impl Listener {
}
}
pub fn try_accept(&self) -> VirtioResult<Option<(Master, u64)>> {
pub fn try_accept(&self) -> VirtioResult<Option<(Frontend, u64)>> {
let sock = match self.listener.accept() {
Ok(Some(conn)) => conn,
Ok(None) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut master = Master::from_stream(sock, 1);
let mut master = Frontend::from_stream(sock, 1);
info!("{}: try to get virtio features from slave.", self.name);
match Endpoint::initialize(&mut master) {
Ok(Some(features)) => Ok(Some((master, features))),
@@ -159,8 +159,8 @@ impl<AS: GuestAddressSpace, Q: QueueT, R: GuestMemoryRegion> EndpointParam<'_, A
/// Caller needs to ensure mutual exclusive access to the object.
pub(super) struct Endpoint {
/// Underlying vhost-user communication endpoint.
conn: Option<Master>,
old: Option<Master>,
conn: Option<Frontend>,
old: Option<Frontend>,
/// Token to register epoll event for the underlying socket.
slot: u32,
/// Identifier string for logs.
@@ -168,7 +168,7 @@ pub(super) struct Endpoint {
}
impl Endpoint {
pub fn new(master: Master, slot: u32, name: String) -> Self {
pub fn new(master: Frontend, slot: u32, name: String) -> Self {
Endpoint {
conn: Some(master),
old: None,
@@ -186,7 +186,7 @@ impl Endpoint {
/// * - Ok(Some(avial_features)): virtio features from the slave
/// * - Ok(None): underlying communicaiton channel gets broken during negotiation
/// * - Err(e): error conditions
fn initialize(master: &mut Master) -> VirtioResult<Option<u64>> {
fn initialize(master: &mut Frontend) -> VirtioResult<Option<u64>> {
// 1. Seems that some vhost-user slaves depend on the get_features request to driver its
// internal state machine.
// N.B. it's really TDD, we just found it works in this way. Any spec about this?
@@ -242,7 +242,7 @@ impl Endpoint {
pub fn negotiate<AS: GuestAddressSpace, Q: QueueT, R: GuestMemoryRegion>(
&mut self,
config: &EndpointParam<AS, Q, R>,
mut old: Option<&mut Master>,
mut old: Option<&mut Frontend>,
) -> VirtioResult<()> {
let guard = config.virtio_config.lock_guest_memory();
let mem = guard.deref();
@@ -286,19 +286,19 @@ impl Endpoint {
);
// Setup slave channel if SLAVE_REQ protocol feature is set
if protocol_features.contains(VhostUserProtocolFeatures::SLAVE_REQ) {
if protocol_features.contains(VhostUserProtocolFeatures::BACKEND_REQ) {
match config.slave_req_fd {
Some(fd) => master.set_slave_request_fd(&fd)?,
Some(fd) => master.set_backend_request_fd(&fd)?,
None => {
error!(
"{}: Protocol feature SLAVE_REQ is set but not slave channel fd",
"{}: Protocol feature BACKEND_REQ is set but not slave channel fd",
self.name
);
return Err(VhostError::VhostUserProtocol(VhostUserError::InvalidParam).into());
}
}
} else {
info!("{}: has no SLAVE_REQ protocol feature set", self.name);
info!("{}: has no BACKEND_REQ protocol feature set", self.name);
}
// 6. check number of queues supported
@@ -454,7 +454,7 @@ impl Endpoint {
/// Restore communication with the vhost-user slave on reconnect.
pub fn reconnect<AS: GuestAddressSpace, Q: QueueT, R: GuestMemoryRegion>(
&mut self,
master: Master,
master: Frontend,
config: &EndpointParam<AS, Q, R>,
ops: &mut EventOps,
) -> VirtioResult<()> {
@@ -515,7 +515,11 @@ impl Endpoint {
}
/// Deregister the underlying socket from the epoll controller.
pub fn deregister_epoll_event(&self, master: &Master, ops: &mut EventOps) -> VirtioResult<()> {
pub fn deregister_epoll_event(
&self,
master: &Frontend,
ops: &mut EventOps,
) -> VirtioResult<()> {
info!(
"{}: unregister epoll event for fd {}.",
self.name,
@@ -529,7 +533,7 @@ impl Endpoint {
.map_err(VirtioError::EpollMgr)
}
pub fn set_master(&mut self, master: Master) {
pub fn set_master(&mut self, master: Frontend) {
self.conn = Some(master);
}
}

View File

@@ -3,10 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
use std::any::Any;
use std::io;
use std::marker::PhantomData;
use std::ops::Deref;
use std::os::fd::AsRawFd;
use std::sync::{Arc, Mutex, MutexGuard};
use dbs_device::resources::{DeviceResources, ResourceConstraint};
@@ -15,18 +12,15 @@ use dbs_utils::epoll_manager::{
};
use kvm_bindings::kvm_userspace_memory_region;
use kvm_ioctls::VmFd;
use libc::{c_void, off64_t, pread64, pwrite64};
use log::*;
use vhost_rs::vhost_user::message::{
VhostUserFSSlaveMsg, VhostUserFSSlaveMsgFlags, VhostUserProtocolFeatures,
VhostUserVirtioFeatures, VHOST_USER_FS_SLAVE_ENTRIES,
use vhost_rs::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
use vhost_rs::vhost_user::{
Frontend, FrontendReqHandler, HandlerResult, VhostUserFrontendReqHandler,
};
use vhost_rs::vhost_user::{HandlerResult, Master, MasterReqHandler, VhostUserMasterReqHandler};
use vhost_rs::VhostBackend;
use virtio_queue::QueueT;
use vm_memory::{
GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, GuestUsize,
MmapRegion,
GuestAddress, GuestAddressSpace, GuestMemoryRegion, GuestRegionMmap, GuestUsize, MmapRegion,
};
use crate::ConfigResult;
@@ -50,6 +44,7 @@ const NUM_QUEUE_OFFSET: usize = 1;
const MASTER_SLOT: u32 = 0;
const SLAVE_REQ_SLOT: u32 = 1;
#[allow(dead_code)]
struct SlaveReqHandler<AS: GuestAddressSpace> {
/// the address of memory region allocated for virtiofs
cache_offset: u64,
@@ -69,6 +64,7 @@ struct SlaveReqHandler<AS: GuestAddressSpace> {
impl<AS: GuestAddressSpace> SlaveReqHandler<AS> {
// Make sure request is within cache range
#[allow(dead_code)]
fn is_req_valid(&self, offset: u64, len: u64) -> bool {
// TODO: do we need to validate alignment here?
match offset.checked_add(len) {
@@ -78,274 +74,24 @@ impl<AS: GuestAddressSpace> SlaveReqHandler<AS> {
}
}
impl<AS: GuestAddressSpace> VhostUserMasterReqHandler for SlaveReqHandler<AS> {
impl<AS: GuestAddressSpace> VhostUserFrontendReqHandler for SlaveReqHandler<AS> {
fn handle_config_change(&self) -> HandlerResult<u64> {
trace!(target: "vhost-fs", "{}: SlaveReqHandler::handle_config_change()", self.id);
debug!("{}: unhandle device_config_change event", self.id);
Ok(0)
}
fn fs_slave_map(&self, fs: &VhostUserFSSlaveMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> {
trace!(target: "vhost-fs", "{}: SlaveReqHandler::fs_slave_map()", self.id);
for i in 0..VHOST_USER_FS_SLAVE_ENTRIES {
let offset = fs.cache_offset[i];
let len = fs.len[i];
// Ignore if the length is 0.
if len == 0 {
continue;
}
debug!(
"{}: fs_slave_map: offset={:x} len={:x} cache_size={:x}",
self.id, offset, len, self.cache_size
);
if !self.is_req_valid(offset, len) {
debug!(
"{}: fs_slave_map: Wrong offset or length, offset={:x} len={:x} cache_size={:x}",
self.id, offset, len, self.cache_size
);
return Err(std::io::Error::from_raw_os_error(libc::EINVAL));
}
let addr = self.mmap_cache_addr + offset;
let flags = fs.flags[i];
let ret = unsafe {
libc::mmap(
addr as *mut libc::c_void,
len as usize,
flags.bits() as i32,
libc::MAP_SHARED | libc::MAP_FIXED,
fd.as_raw_fd(),
fs.fd_offset[i] as libc::off_t,
)
};
if ret == libc::MAP_FAILED {
let e = std::io::Error::last_os_error();
error!("{}: fs_slave_map: mmap failed, {}", self.id, e);
return Err(e);
}
let ret = unsafe { libc::close(fd.as_raw_fd()) };
if ret == -1 {
let e = std::io::Error::last_os_error();
error!("{}: fs_slave_map: close failed, {}", self.id, e);
return Err(e);
}
}
Ok(0)
}
fn fs_slave_unmap(&self, fs: &VhostUserFSSlaveMsg) -> HandlerResult<u64> {
trace!(target: "vhost-fs", "{}: SlaveReqHandler::fs_slave_map()", self.id);
for i in 0..VHOST_USER_FS_SLAVE_ENTRIES {
let offset = fs.cache_offset[i];
let mut len = fs.len[i];
// Ignore if the length is 0.
if len == 0 {
continue;
}
debug!(
"{}: fs_slave_unmap: offset={:x} len={:x} cache_size={:x}",
self.id, offset, len, self.cache_size
);
// Need to handle a special case where the slave ask for the unmapping
// of the entire mapping.
if len == 0xffff_ffff_ffff_ffff {
len = self.cache_size;
}
if !self.is_req_valid(offset, len) {
error!(
"{}: fs_slave_map: Wrong offset or length, offset={:x} len={:x} cache_size={:x}",
self.id, offset, len, self.cache_size
);
return Err(std::io::Error::from_raw_os_error(libc::EINVAL));
}
let addr = self.mmap_cache_addr + offset;
#[allow(clippy::unnecessary_cast)]
let ret = unsafe {
libc::mmap(
addr as *mut libc::c_void,
len as usize,
libc::PROT_NONE,
libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED,
-1,
0 as libc::off_t,
)
};
if ret == libc::MAP_FAILED {
let e = std::io::Error::last_os_error();
error!("{}: fs_slave_map: mmap failed, {}", self.id, e);
return Err(e);
}
}
Ok(0)
}
fn fs_slave_sync(&self, fs: &VhostUserFSSlaveMsg) -> HandlerResult<u64> {
trace!(target: "vhost-fs", "{}: SlaveReqHandler::fs_slave_sync()", self.id);
for i in 0..VHOST_USER_FS_SLAVE_ENTRIES {
let offset = fs.cache_offset[i];
let len = fs.len[i];
// Ignore if the length is 0.
if len == 0 {
continue;
}
debug!(
"{}: fs_slave_sync: offset={:x} len={:x} cache_size={:x}",
self.id, offset, len, self.cache_size
);
if !self.is_req_valid(offset, len) {
error!(
"{}: fs_slave_map: Wrong offset or length, offset={:x} len={:x} cache_size={:x}",
self.id, offset, len, self.cache_size
);
return Err(std::io::Error::from_raw_os_error(libc::EINVAL));
}
let addr = self.mmap_cache_addr + offset;
let ret =
unsafe { libc::msync(addr as *mut libc::c_void, len as usize, libc::MS_SYNC) };
if ret == -1 {
let e = std::io::Error::last_os_error();
error!("{}: fs_slave_sync: msync failed, {}", self.id, e);
return Err(e);
}
}
Ok(0)
}
fn fs_slave_io(&self, fs: &VhostUserFSSlaveMsg, fd: &dyn AsRawFd) -> HandlerResult<u64> {
trace!(target: "vhost-fs", "{}: SlaveReqHandler::fs_slave_io()", self.id);
let guard = self.mem.memory();
let mem = guard.deref();
let mut done: u64 = 0;
for i in 0..VHOST_USER_FS_SLAVE_ENTRIES {
// Ignore if the length is 0.
if fs.len[i] == 0 {
continue;
}
let mut foffset = fs.fd_offset[i];
let mut len = fs.len[i] as usize;
let gpa = fs.cache_offset[i];
let cache_end = self.cache_offset + self.cache_size;
let efault = libc::EFAULT;
debug!(
"{}: fs_slave_io: gpa={:x} len={:x} foffset={:x} cache_offset={:x} cache_size={:x}",
self.id, gpa, len, foffset, self.cache_offset, self.cache_size
);
let mut ptr = if gpa >= self.cache_offset && gpa < cache_end {
let offset = gpa
.checked_sub(self.cache_offset)
.ok_or_else(|| io::Error::from_raw_os_error(efault))?;
let end = gpa
.checked_add(fs.len[i])
.ok_or_else(|| io::Error::from_raw_os_error(efault))?;
if end >= cache_end {
error!( "{}: fs_slave_io: Wrong gpa or len (gpa={:x} len={:x} cache_offset={:x}, cache_size={:x})", self.id, gpa, len, self.cache_offset, self.cache_size );
return Err(io::Error::from_raw_os_error(efault));
}
self.mmap_cache_addr + offset
} else {
// gpa is a RAM addr.
mem.get_host_address(GuestAddress(gpa))
.map_err(|e| {
error!(
"{}: fs_slave_io: Failed to find RAM region associated with gpa 0x{:x}: {:?}",
self.id, gpa, e
);
io::Error::from_raw_os_error(efault)
})? as u64
};
while len > 0 {
let ret = if (fs.flags[i] & VhostUserFSSlaveMsgFlags::MAP_W)
== VhostUserFSSlaveMsgFlags::MAP_W
{
debug!("{}: write: foffset={:x}, len={:x}", self.id, foffset, len);
unsafe {
pwrite64(
fd.as_raw_fd(),
ptr as *const c_void,
len,
foffset as off64_t,
)
}
} else {
debug!("{}: read: foffset={:x}, len={:x}", self.id, foffset, len);
unsafe { pread64(fd.as_raw_fd(), ptr as *mut c_void, len, foffset as off64_t) }
};
if ret < 0 {
let e = std::io::Error::last_os_error();
if (fs.flags[i] & VhostUserFSSlaveMsgFlags::MAP_W)
== VhostUserFSSlaveMsgFlags::MAP_W
{
error!("{}: fs_slave_io: pwrite failed, {}", self.id, e);
} else {
error!("{}: fs_slave_io: pread failed, {}", self.id, e);
}
return Err(e);
}
if ret == 0 {
// EOF
let e = io::Error::new(
io::ErrorKind::UnexpectedEof,
"failed to access whole buffer",
);
error!("{}: fs_slave_io: IO error, {}", self.id, e);
return Err(e);
}
len -= ret as usize;
foffset += ret as u64;
ptr += ret as u64;
done += ret as u64;
}
let ret = unsafe { libc::close(fd.as_raw_fd()) };
if ret == -1 {
let e = std::io::Error::last_os_error();
error!("{}: fs_slave_io: close failed, {}", self.id, e);
return Err(e);
}
}
Ok(done)
}
}
pub struct VhostUserFsHandler<
AS: GuestAddressSpace,
Q: QueueT,
R: GuestMemoryRegion,
S: VhostUserMasterReqHandler,
S: VhostUserFrontendReqHandler,
> {
config: VirtioDeviceConfig<AS, Q, R>,
device: Arc<Mutex<VhostUserFsDevice>>,
slave_req_handler: Option<MasterReqHandler<S>>,
slave_req_handler: Option<FrontendReqHandler<S>>,
id: String,
}
@@ -354,7 +100,7 @@ where
AS: 'static + GuestAddressSpace + Send + Sync,
Q: QueueT + Send + 'static,
R: GuestMemoryRegion + Send + Sync + 'static,
S: 'static + Send + VhostUserMasterReqHandler,
S: 'static + Send + VhostUserFrontendReqHandler,
{
fn process(&mut self, events: Events, _ops: &mut EventOps) {
trace!(target: "vhost-fs", "{}: VhostUserFsHandler::process({})", self.id, events.data());
@@ -425,7 +171,7 @@ impl VhostUserFsDevice {
// Connect to the vhost-user socket.
info!("{VHOST_USER_FS_NAME}: try to connect to {path:?}");
let num_queues = NUM_QUEUE_OFFSET + req_num_queues;
let master = Master::connect(path, num_queues as u64).map_err(VirtioError::VhostError)?;
let master = Frontend::connect(path, num_queues as u64).map_err(VirtioError::VhostError)?;
info!("{VHOST_USER_FS_NAME}: get features");
let avail_features = master.get_features().map_err(VirtioError::VhostError)?;
@@ -475,7 +221,7 @@ impl VhostUserFsDevice {
let mut features = VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::REPLY_ACK;
if self.is_dax_on() {
features |=
VhostUserProtocolFeatures::SLAVE_REQ | VhostUserProtocolFeatures::SLAVE_SEND_FD;
VhostUserProtocolFeatures::BACKEND_REQ | VhostUserProtocolFeatures::BACKEND_SEND_FD;
}
features
}
@@ -484,7 +230,7 @@ impl VhostUserFsDevice {
AS: GuestAddressSpace,
Q: QueueT,
R: GuestMemoryRegion,
S: VhostUserMasterReqHandler,
S: VhostUserFrontendReqHandler,
>(
&mut self,
handler: &VhostUserFsHandler<AS, Q, R, S>,
@@ -621,7 +367,7 @@ where
mem: config.vm_as.clone(),
id: device.device_info.driver_name.clone(),
});
let req_handler = MasterReqHandler::new(vu_master_req_handler)
let req_handler = FrontendReqHandler::new(vu_master_req_handler)
.map_err(|e| ActivateError::VhostActivate(vhost_rs::Error::VhostUserProtocol(e)))?;
Some(req_handler)
@@ -748,7 +494,7 @@ where
let guest_mmap_region = Arc::new(
GuestRegionMmap::new(mmap_region, GuestAddress(guest_addr))
.map_err(VirtioError::InsertMmap)?,
.ok_or(VirtioError::InsertMmap)?,
);
Ok(Some(VirtioSharedMemoryList {

View File

@@ -12,7 +12,7 @@ use dbs_utils::epoll_manager::{EpollManager, EventOps, Events, MutEventSubscribe
use dbs_utils::net::MacAddr;
use log::{debug, error, info, trace, warn};
use vhost_rs::vhost_user::{
Error as VhostUserError, Master, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
Error as VhostUserError, Frontend, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
};
use vhost_rs::Error as VhostError;
use virtio_bindings::bindings::virtio_net::{
@@ -59,7 +59,7 @@ struct VhostUserNetDevice {
impl VhostUserNetDevice {
fn new(
master: Master,
master: Frontend,
mut avail_features: u64,
listener: Listener,
guest_mac: Option<&MacAddr>,

View File

@@ -14,13 +14,14 @@ use vhost_rs::vhost_user::message::{
VhostUserVringAddr, VhostUserVringState, MAX_MSG_SIZE,
};
use vhost_rs::vhost_user::Error;
use vm_memory::ByteValued;
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
use vmm_sys_util::tempfile::TempFile;
pub const MAX_ATTACHED_FD_ENTRIES: usize = 32;
pub(crate) trait Req:
Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Into<u32>
Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Into<u32> + Send + Sync
{
fn is_valid(&self) -> bool;
}
@@ -215,6 +216,10 @@ impl<R: Req> Default for VhostUserMsgHeader<R> {
}
}
// SAFETY: VhostUserMsgHeader is a packed struct with only primitive (u32) fields and PhantomData.
// All bit patterns are valid, and it has no padding bytes.
unsafe impl<R: Req> ByteValued for VhostUserMsgHeader<R> {}
/// Unix domain socket endpoint for vhost-user connection.
pub(crate) struct Endpoint<R: Req> {
sock: UnixStream,

View File

@@ -99,13 +99,13 @@ mod tests {
#[test]
fn test_tcp_backend_bind() {
let tcp_sock_addr = String::from("127.0.0.2:9000");
let tcp_sock_addr = String::from("127.0.0.1:9000");
assert!(VsockTcpBackend::new(tcp_sock_addr).is_ok());
}
#[test]
fn test_tcp_backend_accept() {
let tcp_sock_addr = String::from("127.0.0.2:9001");
let tcp_sock_addr = String::from("127.0.0.1:9001");
let mut vsock_backend = VsockTcpBackend::new(tcp_sock_addr.clone()).unwrap();
let _stream = TcpStream::connect(&tcp_sock_addr).unwrap();
@@ -115,7 +115,7 @@ mod tests {
#[test]
fn test_tcp_backend_communication() {
let tcp_sock_addr = String::from("127.0.0.2:9002");
let tcp_sock_addr = String::from("127.0.0.1:9002");
let test_string = String::from("TEST");
let mut buffer = [0; 10];
@@ -139,7 +139,7 @@ mod tests {
#[test]
fn test_tcp_backend_connect() {
let tcp_sock_addr = String::from("127.0.0.2:9003");
let tcp_sock_addr = String::from("127.0.0.1:9003");
let vsock_backend = VsockTcpBackend::new(tcp_sock_addr).unwrap();
// tcp backend don't support peer connection
assert!(vsock_backend.connect(0).is_err());
@@ -147,14 +147,14 @@ mod tests {
#[test]
fn test_tcp_backend_type() {
let tcp_sock_addr = String::from("127.0.0.2:9004");
let tcp_sock_addr = String::from("127.0.0.1:9004");
let vsock_backend = VsockTcpBackend::new(tcp_sock_addr).unwrap();
assert_eq!(vsock_backend.r#type(), VsockBackendType::Tcp);
}
#[test]
fn test_tcp_backend_vsock_stream() {
let tcp_sock_addr = String::from("127.0.0.2:9005");
let tcp_sock_addr = String::from("127.0.0.1:9005");
let _vsock_backend = VsockTcpBackend::new(tcp_sock_addr.clone()).unwrap();
let vsock_stream = TcpStream::connect(&tcp_sock_addr).unwrap();

View File

@@ -17,7 +17,7 @@
/// backend.
use std::ops::{Deref, DerefMut};
use virtio_queue::{Descriptor, DescriptorChain};
use virtio_queue::{desc::split::Descriptor, DescriptorChain};
use vm_memory::{Address, GuestMemory};
use super::defs;

View File

@@ -118,11 +118,15 @@ pub enum AddressManagerError {
/// Failure in accessing the memory located at some address.
#[error("address manager failed to access guest memory located at 0x{0:x}")]
AccessGuestMemory(u64, #[source] vm_memory::mmap::Error),
AccessGuestMemory(u64, #[source] vm_memory::GuestMemoryError),
/// Failed to create GuestMemory
#[error("address manager failed to create guest memory object")]
CreateGuestMemory(#[source] vm_memory::Error),
CreateGuestMemory(#[source] vm_memory::GuestMemoryError),
/// Failed to insert/manage guest memory region collection
#[error("address manager failed to manage guest memory region collection")]
GuestRegionCollection(#[source] vm_memory::GuestRegionCollectionError),
/// Failure in initializing guest memory.
#[error("address manager failed to initialize guest memory")]
@@ -328,7 +332,7 @@ impl AddressSpaceMgr {
vm_memory = vm_memory
.insert_region(mmap_reg.clone())
.map_err(AddressManagerError::CreateGuestMemory)?;
.map_err(AddressManagerError::GuestRegionCollection)?;
self.map_to_kvm(res_mgr, &param, reg, mmap_reg)?;
}
@@ -488,8 +492,11 @@ impl AddressSpaceMgr {
self.configure_thp_and_prealloc(&region, &mmap_reg)?;
}
let reg = GuestRegionImpl::new(mmap_reg, region.start_addr())
.map_err(AddressManagerError::CreateGuestMemory)?;
let reg = GuestRegionImpl::new(mmap_reg, region.start_addr()).ok_or(
AddressManagerError::GuestRegionCollection(
vm_memory::GuestRegionCollectionError::NoMemoryRegion,
),
)?;
Ok(Arc::new(reg))
}

View File

@@ -31,7 +31,7 @@ pub enum BalloonDeviceError {
/// guest memory error
#[error("failed to access guest memory, {0}")]
GuestMemoryError(#[source] vm_memory::mmap::Error),
GuestMemoryError(#[source] vm_memory::GuestMemoryError),
/// create balloon device error
#[error("failed to create virtio-balloon device, {0}")]

View File

@@ -557,15 +557,14 @@ impl MemRegionFactory for MemoryRegionFactory {
);
// All value should be valid.
let memory_region = Arc::new(
GuestRegionMmap::new(mmap_region, guest_addr).map_err(VirtioError::InsertMmap)?,
);
let memory_region =
Arc::new(GuestRegionMmap::new(mmap_region, guest_addr).ok_or(VirtioError::InsertMmap)?);
let vm_as_new = self
.vm_as
.memory()
.insert_region(memory_region.clone())
.map_err(VirtioError::InsertMmap)?;
.map_err(|_| VirtioError::InsertMmap)?;
self.vm_as.lock().unwrap().replace(vm_as_new);
self.address_space.insert_region(region).map_err(|e| {
error!(self.logger, "failed to insert address space region: {}", e);

View File

@@ -78,7 +78,7 @@ impl DeviceVirtioRegionHandler {
) -> std::result::Result<(), VirtioError> {
let vm_as_new = self.vm_as.memory().insert_region(region).map_err(|e| {
error!("DeviceVirtioRegionHandler failed to insert guest memory region: {e:?}.");
VirtioError::InsertMmap(e)
VirtioError::InsertMmap
})?;
// Do not expect poisoned lock here, so safe to unwrap().
self.vm_as.lock().unwrap().replace(vm_as_new);

View File

@@ -13,6 +13,7 @@ use arc_swap::ArcSwap;
use dbs_address_space::AddressSpace;
#[cfg(target_arch = "aarch64")]
use dbs_arch::{DeviceType, MMIODeviceInfo};
#[cfg(feature = "host-device")]
use dbs_boot::layout::MMIO_LOW_END;
use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext};
use dbs_device::resources::DeviceResources;
@@ -24,7 +25,6 @@ use dbs_legacy_devices::ConsoleHandler;
use dbs_pci::CAPABILITY_BAR_SIZE;
use dbs_utils::epoll_manager::EpollManager;
use kvm_ioctls::VmFd;
use virtio_queue::QueueSync;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_device::resources::ResourceConstraint;
@@ -41,6 +41,7 @@ use dbs_virtio_devices::{
#[cfg(feature = "host-device")]
use dbs_pci::VfioPciDevice;
#[cfg(feature = "host-device")]
use dbs_pci::VirtioPciDevice;
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
use dbs_upcall::{
@@ -59,6 +60,7 @@ use crate::resource_manager::ResourceManager;
use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo};
use crate::IoManagerCached;
#[cfg(feature = "host-device")]
use vm_memory::GuestRegionMmap;
/// Virtual machine console device manager.
@@ -187,18 +189,23 @@ pub enum DeviceMgrError {
/// Error from Vfio Pci
#[error("failed to do vfio pci operation: {0:?}")]
VfioPci(#[source] dbs_pci::VfioPciError),
#[cfg(feature = "host-device")]
/// Error from Virtio Pci
#[error("failed to do virtio pci operation")]
VirtioPci,
#[cfg(feature = "host-device")]
/// PCI system manager error
#[error("Pci system manager error")]
PciSystemManager,
#[cfg(feature = "host-device")]
/// Dragonball pci system error
#[error("pci error: {0:?}")]
PciError(#[source] dbs_pci::Error),
#[cfg(feature = "host-device")]
/// Virtio Pci system error
#[error("virtio pci error: {0:?}")]
VirtioPciError(#[source] dbs_pci::VirtioPciDeviceError),
#[cfg(feature = "host-device")]
/// Unsupported pci device type
#[error("unsupported pci device type")]
InvalidPciDeviceType,
@@ -315,6 +322,7 @@ pub struct DeviceOpContext {
virtio_devices: Vec<Arc<dyn DeviceIo>>,
#[cfg(feature = "host-device")]
vfio_manager: Option<Arc<Mutex<VfioDeviceMgr>>>,
#[cfg(feature = "host-device")]
pci_system_manager: Arc<Mutex<PciSystemManager>>,
vm_config: Option<VmConfigInfo>,
shared_info: Arc<RwLock<InstanceInfo>>,
@@ -366,6 +374,7 @@ impl DeviceOpContext {
shared_info,
#[cfg(feature = "host-device")]
vfio_manager: None,
#[cfg(feature = "host-device")]
pci_system_manager: device_mgr.pci_system_manager.clone(),
}
}
@@ -659,6 +668,7 @@ pub struct DeviceManager {
vhost_user_net_manager: VhostUserNetDeviceMgr,
#[cfg(feature = "host-device")]
pub(crate) vfio_manager: Arc<Mutex<VfioDeviceMgr>>,
#[cfg(feature = "host-device")]
pub(crate) pci_system_manager: Arc<Mutex<PciSystemManager>>,
}
@@ -674,15 +684,21 @@ impl DeviceManager {
let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone()));
let io_manager = Arc::new(ArcSwap::new(Arc::new(IoManager::new())));
let io_lock = Arc::new(Mutex::new(()));
#[cfg(feature = "host-device")]
let io_context = DeviceManagerContext::new(io_manager.clone(), io_lock.clone());
#[cfg(feature = "host-device")]
let mut mgr = PciSystemManager::new(irq_manager.clone(), io_context, res_manager.clone())?;
#[cfg(feature = "host-device")]
let requirements = mgr.resource_requirements();
#[cfg(feature = "host-device")]
let resources = res_manager
.allocate_device_resources(&requirements, USE_SHARED_IRQ)
.map_err(DeviceMgrError::ResourceError)?;
#[cfg(feature = "host-device")]
mgr.activate(resources)?;
#[cfg(feature = "host-device")]
let pci_system_manager = Arc::new(Mutex::new(mgr));
Ok(DeviceManager {
@@ -720,6 +736,7 @@ impl DeviceManager {
pci_system_manager.clone(),
logger,
))),
#[cfg(feature = "host-device")]
pci_system_manager,
})
}
@@ -1251,6 +1268,7 @@ impl DeviceManager {
}
/// Create an Virtio PCI transport layer device for the virtio backend device.
#[cfg(feature = "host-device")]
pub fn create_virtio_pci_device(
mut device: DbsVirtioDevice,
ctx: &mut DeviceOpContext,
@@ -1366,6 +1384,7 @@ impl DeviceManager {
}
/// Create an Virtio PCI transport layer device for the virtio backend device.
#[cfg(feature = "host-device")]
pub fn register_virtio_pci_device(
device: Arc<dyn DeviceIo>,
ctx: &DeviceOpContext,
@@ -1385,6 +1404,7 @@ impl DeviceManager {
}
/// Deregister Virtio device from IoManager
#[cfg(feature = "host-device")]
pub fn deregister_virtio_device(
device: &Arc<dyn DeviceIo>,
ctx: &mut DeviceOpContext,
@@ -1405,11 +1425,15 @@ impl DeviceManager {
}
/// Destroy/Deregister resources for a Virtio PCI
#[cfg(feature = "host-device")]
fn destroy_pci_device(
device: Arc<dyn DeviceIo>,
ctx: &mut DeviceOpContext,
dev_id: u8,
) -> std::result::Result<(), DeviceMgrError> {
use virtio_queue::QueueSync;
use vm_memory::GuestRegionMmap;
// unregister IoManager
Self::deregister_virtio_device(&device, ctx)?;
// unregister Resource manager
@@ -1489,6 +1513,7 @@ impl DeviceManager {
}
/// Teardown the Virtio PCI or MMIO transport layer device associated with the virtio backend device.
#[cfg(feature = "dbs-virtio-devices")]
pub fn destroy_virtio_device(
device: Arc<dyn DeviceIo>,
ctx: &mut DeviceOpContext,
@@ -1496,12 +1521,18 @@ impl DeviceManager {
if let Some(mmio_dev) = device.as_any().downcast_ref::<DbsMmioV2Device>() {
Self::destroy_mmio_device(device.clone(), ctx)?;
mmio_dev.remove();
} else if let Some(pci_dev) = device.as_any().downcast_ref::<VirtioPciDevice<
GuestAddressSpaceImpl,
QueueSync,
GuestRegionMmap,
>>() {
Self::destroy_pci_device(device.clone(), ctx, pci_dev.device_id())?;
}
#[cfg(feature = "host-device")]
{
use virtio_queue::QueueSync;
use vm_memory::GuestRegionMmap;
if let Some(pci_dev) = device.as_any().downcast_ref::<VirtioPciDevice<
GuestAddressSpaceImpl,
QueueSync,
GuestRegionMmap,
>>() {
Self::destroy_pci_device(device.clone(), ctx, pci_dev.device_id())?;
}
}
Ok(())
@@ -1572,18 +1603,25 @@ mod tests {
let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone()));
let io_manager = Arc::new(ArcSwap::new(Arc::new(IoManager::new())));
let io_lock = Arc::new(Mutex::new(()));
#[cfg(feature = "host-device")]
let io_context = DeviceManagerContext::new(io_manager.clone(), io_lock.clone());
#[cfg(feature = "host-device")]
let mut mgr =
PciSystemManager::new(irq_manager.clone(), io_context, res_manager.clone())
.unwrap();
#[cfg(feature = "host-device")]
let requirements = mgr.resource_requirements();
#[cfg(feature = "host-device")]
let resources = res_manager
.allocate_device_resources(&requirements, USE_SHARED_IRQ)
.map_err(DeviceMgrError::ResourceError)
.unwrap();
#[cfg(feature = "host-device")]
mgr.activate(resources).unwrap();
#[cfg(feature = "host-device")]
let pci_system_manager = Arc::new(Mutex::new(mgr));
DeviceManager {
@@ -1619,6 +1657,7 @@ mod tests {
pci_system_manager.clone(),
&logger,
))),
#[cfg(feature = "host-device")]
pci_system_manager,
logger,

View File

@@ -406,9 +406,11 @@ impl VfioDeviceMgr {
if let Some(vfio_container) = self.vfio_container.as_ref() {
Ok(vfio_container.clone())
} else {
let kvm_dev_fd = Arc::new(self.get_kvm_dev_fd()?);
let vfio_container =
Arc::new(VfioContainer::new(kvm_dev_fd).map_err(VfioDeviceError::VfioIoctlError)?);
let kvm_dev_fd = self.get_kvm_dev_fd()?;
let vfio_dev_fd = Arc::new(vfio_ioctls::VfioDeviceFd::new_from_kvm(kvm_dev_fd));
let vfio_container = Arc::new(
VfioContainer::new(Some(vfio_dev_fd)).map_err(VfioDeviceError::VfioIoctlError)?,
);
self.vfio_container = Some(vfio_container.clone());
Ok(vfio_container)

View File

@@ -43,7 +43,7 @@ impl Vcpu {
#[allow(clippy::too_many_arguments)]
pub fn new_aarch64(
id: u8,
vcpu_fd: Arc<VcpuFd>,
vcpu_fd: VcpuFd,
io_mgr: IoManagerCached,
exit_evt: EventFd,
vcpu_state_event: EventFd,

View File

@@ -274,7 +274,7 @@ enum VcpuEmulation {
/// A wrapper around creating and using a kvm-based VCPU.
pub struct Vcpu {
// vCPU fd used by the vCPU
fd: Arc<VcpuFd>,
fd: VcpuFd,
// vCPU id info
id: u8,
// Io manager Cached for facilitating IO operations
@@ -317,7 +317,7 @@ pub struct Vcpu {
}
// Using this for easier explicit type-casting to help IDEs interpret the code.
type VcpuCell = Cell<Option<*const Vcpu>>;
type VcpuCell = Cell<Option<*mut Vcpu>>;
impl Vcpu {
thread_local!(static TLS_VCPU_PTR: VcpuCell = const { Cell::new(None) });
@@ -332,7 +332,7 @@ impl Vcpu {
if cell.get().is_some() {
return Err(VcpuError::VcpuTlsInit);
}
cell.set(Some(self as *const Vcpu));
cell.set(Some(self as *mut Vcpu));
Ok(())
})
}
@@ -369,13 +369,13 @@ impl Vcpu {
/// dereferencing from pointer an already borrowed `Vcpu`.
unsafe fn run_on_thread_local<F>(func: F) -> Result<()>
where
F: FnOnce(&Vcpu),
F: FnOnce(&mut Vcpu),
{
Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| {
if let Some(vcpu_ptr) = cell.get() {
// Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty,
// and it is being cleared on `Vcpu::drop` so there is no dangling pointer.
let vcpu_ref: &Vcpu = &*vcpu_ptr;
let vcpu_ref: &mut Vcpu = &mut *vcpu_ptr;
func(vcpu_ref);
Ok(())
} else {
@@ -436,7 +436,7 @@ impl Vcpu {
/// Extract the vcpu running logic for test mocking.
#[cfg(not(test))]
pub fn emulate(fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
pub fn emulate(fd: &mut VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
fd.run()
}
@@ -444,7 +444,7 @@ impl Vcpu {
///
/// Returns error or enum specifying whether emulation was handled or interrupted.
fn run_emulation(&mut self) -> Result<VcpuEmulation> {
match Vcpu::emulate(&self.fd) {
match Vcpu::emulate(&mut self.fd) {
Ok(run) => {
match run {
#[cfg(target_arch = "x86_64")]
@@ -455,8 +455,9 @@ impl Vcpu {
}
#[cfg(target_arch = "x86_64")]
VcpuExit::IoOut(addr, data) => {
if !self.check_io_port_info(addr, data)? {
let _ = self.io_mgr.pio_write(addr, data);
let data = data.to_vec();
if !self.check_io_port_info(addr, &data)? {
let _ = self.io_mgr.pio_write(addr, &data);
}
self.metrics.exit_io_out.inc();
Ok(VcpuEmulation::Handled)
@@ -493,14 +494,14 @@ impl Vcpu {
VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
info!(
"Received KVM_SYSTEM_EVENT: type: {event_type}, event: {event_flags}"
"Received KVM_SYSTEM_EVENT: type: {event_type}, event: {event_flags:?}"
);
Ok(VcpuEmulation::Stopped)
}
_ => {
self.metrics.failures.inc();
error!(
"Received KVM_SYSTEM_EVENT signal type: {event_type}, flag: {event_flags}"
"Received KVM_SYSTEM_EVENT signal type: {event_type}, flag: {event_flags:?}"
);
Err(VcpuError::VcpuUnhandledKvmExit)
}
@@ -765,7 +766,7 @@ impl Vcpu {
/// Get vcpu file descriptor.
pub fn vcpu_fd(&self) -> &VcpuFd {
self.fd.as_ref()
&self.fd
}
pub fn metrics(&self) -> Arc<VcpuMetrics> {
@@ -804,7 +805,7 @@ pub mod tests {
FailEntry(u64, u32),
InternalError,
Unknown,
SystemEvent(u32, u64),
SystemEvent(u32, Vec<u64>),
Error(i32),
}
@@ -813,7 +814,7 @@ pub mod tests {
}
impl Vcpu {
pub fn emulate(_fd: &VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
pub fn emulate(_fd: &mut VcpuFd) -> std::result::Result<VcpuExit<'_>, kvm_ioctls::Error> {
let res = &*EMULATE_RES.lock().unwrap();
match res {
EmulationCase::IoIn => Ok(VcpuExit::IoIn(0, &mut [])),
@@ -828,7 +829,8 @@ pub mod tests {
EmulationCase::InternalError => Ok(VcpuExit::InternalError),
EmulationCase::Unknown => Ok(VcpuExit::Unknown),
EmulationCase::SystemEvent(event_type, event_flags) => {
Ok(VcpuExit::SystemEvent(*event_type, *event_flags))
let flags = event_flags.clone().into_boxed_slice();
Ok(VcpuExit::SystemEvent(*event_type, Box::leak(flags)))
}
EmulationCase::Error(e) => Err(kvm_ioctls::Error::new(*e)),
}
@@ -839,7 +841,7 @@ pub mod tests {
fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) {
let kvm_context = KvmContext::new(None).unwrap();
let vm = kvm_context.kvm().create_vm().unwrap();
let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap());
let vcpu_fd = vm.create_vcpu(0).unwrap();
let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new()))));
let supported_cpuid = kvm_context
.supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
@@ -875,7 +877,7 @@ pub mod tests {
let kvm = Kvm::new().unwrap();
let vm = Arc::new(kvm.create_vm().unwrap());
let _kvm_context = KvmContext::new(Some(kvm.as_raw_fd())).unwrap();
let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap());
let vcpu_fd = vm.create_vcpu(0).unwrap();
let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new()))));
let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let vcpu_state_event = EventFd::new(libc::EFD_NONBLOCK).unwrap();
@@ -947,17 +949,19 @@ pub mod tests {
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
// KVM_SYSTEM_EVENT_RESET
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_RESET, 0);
*(EMULATE_RES.lock().unwrap()) =
EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_RESET, vec![0]);
let res = vcpu.run_emulation();
assert!(matches!(res, Ok(VcpuEmulation::Stopped)));
// KVM_SYSTEM_EVENT_SHUTDOWN
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, 0);
*(EMULATE_RES.lock().unwrap()) =
EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, vec![0]);
let res = vcpu.run_emulation();
assert!(matches!(res, Ok(VcpuEmulation::Stopped)));
// Other system event
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(0, 0);
*(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(0, vec![0]);
let res = vcpu.run_emulation();
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));

View File

@@ -189,7 +189,7 @@ pub struct VcpuResizeInfo {
#[derive(Default)]
pub(crate) struct VcpuInfo {
pub(crate) vcpu: Option<Vcpu>,
vcpu_fd: Option<Arc<VcpuFd>>,
vcpu_fd: Option<VcpuFd>,
handle: Option<VcpuHandle>,
tid: u32,
}
@@ -541,18 +541,13 @@ impl VcpuManager {
}
// We will reuse the kvm's vcpufd after first creation, for we can't
// create vcpufd with same id in one kvm instance.
let kvm_vcpu = match &self.vcpu_infos[cpu_index as usize].vcpu_fd {
Some(vcpu_fd) => vcpu_fd.clone(),
None => {
let vcpu_fd = Arc::new(
self.vm_fd
.create_vcpu(cpu_index as u64)
.map_err(VcpuError::VcpuFd)
.map_err(VcpuManagerError::Vcpu)?,
);
self.vcpu_infos[cpu_index as usize].vcpu_fd = Some(vcpu_fd.clone());
vcpu_fd
}
let kvm_vcpu = match self.vcpu_infos[cpu_index as usize].vcpu_fd.take() {
Some(vcpu_fd) => vcpu_fd,
None => self
.vm_fd
.create_vcpu(cpu_index as u64)
.map_err(VcpuError::VcpuFd)
.map_err(VcpuManagerError::Vcpu)?,
};
let mut vcpu = self.create_vcpu_arch(cpu_index, kvm_vcpu, request_ts)?;
@@ -777,7 +772,7 @@ impl VcpuManager {
fn create_vcpu_arch(
&self,
cpu_index: u8,
vcpu_fd: Arc<VcpuFd>,
vcpu_fd: VcpuFd,
request_ts: TimestampUs,
) -> Result<Vcpu> {
// It's safe to unwrap because guest_kernel always exist until vcpu manager done
@@ -806,7 +801,7 @@ impl VcpuManager {
fn create_vcpu_arch(
&self,
cpu_index: u8,
vcpu_fd: Arc<VcpuFd>,
vcpu_fd: VcpuFd,
request_ts: TimestampUs,
) -> Result<Vcpu> {
Vcpu::new_aarch64(

View File

@@ -45,7 +45,7 @@ impl Vcpu {
#[allow(clippy::too_many_arguments)]
pub fn new_x86_64(
id: u8,
vcpu_fd: Arc<VcpuFd>,
vcpu_fd: VcpuFd,
io_mgr: IoManagerCached,
cpuid: CpuId,
exit_evt: EventFd,

View File

@@ -642,7 +642,7 @@ impl Vm {
image: &mut F,
) -> std::result::Result<InitrdConfig, LoadInitrdError>
where
F: Read + Seek,
F: Read + Seek + vm_memory::ReadVolatile,
{
use crate::error::LoadInitrdError::*;
@@ -666,7 +666,7 @@ impl Vm {
// Load the image into memory
vm_memory
.read_from(GuestAddress(address), image, size)
.read_volatile_from(GuestAddress(address), image, size)
.map_err(|_| LoadInitrd)?;
Ok(InitrdConfig {
@@ -1132,7 +1132,7 @@ pub mod tests {
let vm_memory = vm.address_space.vm_memory().unwrap();
vm_memory.write_obj(code, load_addr).unwrap();
let vcpu_fd = vm.vm_fd().create_vcpu(0).unwrap();
let mut vcpu_fd = vm.vm_fd().create_vcpu(0).unwrap();
let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap();
assert_ne!(vcpu_sregs.cs.base, 0);
assert_ne!(vcpu_sregs.cs.selector, 0);

View File

@@ -4,7 +4,7 @@ Safe Path
A library to safely handle filesystem paths, typically for container runtimes.
There are often path related attacks, such as symlink based attacks, TOCTTOU attacks. The `safe-path` crate
There are often path related attacks, such as symlink based attacks, time-of-check to time-of-use (TOCTOU) attacks. The `safe-path` crate
provides several functions and utility structures to protect against path resolution related attacks.
## Support

View File

@@ -22,7 +22,7 @@ slog = { workspace = true }
slog-scope = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["sync", "fs", "process", "io-util"] }
vmm-sys-util = "0.11.0"
vmm-sys-util = "0.15.0"
rand = { workspace = true }
path-clean = "1.0.1"
lazy_static = { workspace = true }

View File

@@ -15,13 +15,13 @@ serde = { workspace = true, features = ["rc", "derive"] }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["sync", "rt"] }
nix = "0.26.2"
thiserror = { workspace = true }
thiserror = "2.0.18"
# Cloud Hypervisor public HTTP API functions
# Note that the version specified is not necessarily the version of CH
# being used. This version is used to pin the CH config structure
# which is relatively static.
api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" }
api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", tag = "v51.0" }
# Local dependencies
kata-types = { workspace = true }

View File

@@ -135,7 +135,7 @@ pub async fn cloud_hypervisor_vm_netdev_add_with_fds(
"PUT",
"vm.add-net",
Some(&serialised),
request_fds,
&request_fds,
)
.map_err(|e| anyhow!(e))?;

View File

@@ -15,7 +15,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use libc::ifreq;
use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val};
use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr};
use vmm_sys_util::ioctl_iow_nr;
// As defined in the Linux UAPI:
// https://elixir.bootlin.com/linux/v4.17/source/include/uapi/linux/if.h#L33
pub(crate) const IFACE_NAME_MAX_LEN: usize = 16;

View File

@@ -56,10 +56,10 @@ require (
go.opentelemetry.io/otel/exporters/jaeger v1.0.0
go.opentelemetry.io/otel/sdk v1.40.0
go.opentelemetry.io/otel/trace v1.40.0
golang.org/x/oauth2 v0.30.0
golang.org/x/oauth2 v0.34.0
golang.org/x/sys v0.40.0
google.golang.org/grpc v1.72.0
google.golang.org/protobuf v1.36.7
google.golang.org/grpc v1.79.3
google.golang.org/protobuf v1.36.10
k8s.io/apimachinery v0.33.0
k8s.io/cri-api v0.33.0
k8s.io/kubelet v0.33.0
@@ -135,7 +135,7 @@ require (
golang.org/x/sync v0.19.0 // indirect
golang.org/x/text v0.33.0 // indirect
google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect

Some files were not shown because too many files have changed in this diff Show More