Compare commits

..

857 Commits

Author SHA1 Message Date
Bin Liu
a8756887f6 Merge pull request #1594 from bergwolf/action
action: fix missing qemu tag
2021-03-31 16:58:03 +08:00
Fabiano Fidêncio
a85d235e0e Merge pull request #1587 from fidencio/wip/update-install-docs-for-sle-and-opensuse
Remove installation guides for SLE and openSUSE
2021-03-31 09:54:21 +02:00
Peng Tao
8a1c6c3ff0 action: fix missing qemu tag
Otherwise it breaks qemu build.

Fixes: #1593
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-31 11:47:16 +08:00
Fabiano Fidêncio
bf707209df Merge pull request #1384 from fidencio/wip/update-kernel-config-for-overlayfs
kernel: Enable OVERLAY_FS_{METACOPY,XINO_AUTO}
2021-03-30 23:20:20 +02:00
Fabiano Fidêncio
a9ff9c8707 docs: Remove openSUSE installation guide
The content of the openSUSE installation guide is related to the 1.x
packages, as openSUSE doesn't provide katacontainers 2.x packages.

Fixes: #1585

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-30 22:24:19 +02:00
Fabiano Fidêncio
2888ceb024 docs: Remove SLE installation guide
The content of the SLE installation guide is related to the 1.x
packages, as SUSE doesn't provide katacontainers 2.x packages.

Fixes: #1586

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-30 22:23:43 +02:00
Carlos Venegas
8e48fecc2c Merge pull request #1540 from jcvenegas/2021-03-23/kernel-5.10.x
versions: kernel 5.10.x
2021-03-30 12:12:53 -06:00
Chelsea Mafrica
e5aa4e7eb4 Merge pull request #1563 from Jakob-Naucke/s390x-missing-contexts
virtcontainers: Fix missing contexts in s390x
2021-03-30 09:38:28 -07:00
Carlos Venegas
c748a9c278 Merge pull request #1549 from jcvenegas/2021-03-24/makefile-enable-dax-env-var
runtime: makefile allow override DAX value
2021-03-30 10:06:16 -06:00
Eric Ernst
3b1e5cbe9b Merge pull request #1569 from bergwolf/2.1-alpha1-branch-bump
# Kata Containers 2.1-alpha1
2021-03-30 06:05:54 -07:00
Peng Tao
75f9963872 release: Kata Containers 2.1-alpha1
- test: install mock hook binary before test
- cgroups: fix the issue of get wrong online cpus
- build: remove unused variables from Makefile
- container: on cleanup, rm container directory for mounts path
- runtime: add support for QEMU 6
- agent: Enable clean shutdown
- runtime: fix virtiofsd RO volume sharing
- runtime: update virtcontainers API documentation
- runtime: Fix trace span ordering and static checks
- fix runtime UTs and enable static check
- kata-deploy: Use the correct tag for 2.1-alpha1 release
- ci: fix virtiofs-experimental build
- Verify container ID
- rustjail: rework execute_hook
- agent: Allow server address to be specified on kernel command-line
- agent: disconnect rpc get_oom_event when destroy_sandbox.
- docs: Update QAT instructions to work with Kata 2.0 repos
- agent: Update Cargo.lock for earlier dependency change
- osbuiler: fixing USE_DOCKER for ppc64le
- agent: Fix unused import warning in unit tests
- [forwardport] Fixup systemd cgroup handling
- runtime: Fix ordering of trace spans
- runtime: add support for readonly sandbox bindmounts
- Bump to QEMU 5.2.0 - respin
- runtime: return hypervisor Pid in TaskExit event
- agent: makefile: Add codecov target
- Bump to QEMU 5.2.0
- exec: ensure sup groups are added to agent request
- rustjail: fix the issue of home_dir function
- osbuilder: Port QAT Dockerfile to 2.0 repo
- agent-ctl: Unbreak build
- rustjail: fix the issue of bind mount device file from guest
- musl/arm64: decompression before use the tarball.
- osbuilder/arm64: build musl toolchain from source code if needed
- runtime: Fix missing 'name' field on containerd-shim-v2 logs
- agent: don't error of virtiofs share is already mounted
- shimv2: return the hypervisor's pid as the container pid
- runtime: check if error loading runtime config
- agent: fix clippy for rustc 1.5
- agent: Upgrade tokio to 1.2.0
- rustjail: fix blkio conversion
- agent: Agent invokes OCI hooks with wrong PID
- kata-deploy: stop mentioning qemu-virtiofsd, as the default qemu supports virtiofsd already (plus some cleanups)
- agent: Stop receive message from Receiver if got None
- Remove "Docker" & "[kata] runtime" references from the release scripts & process documentation
- kata-deploy: Ensure CRI-O uses shimv2 & the "vm" runtime type
- makefile: agent: Add self documented help
- runtime: connect guest debug console bypass kata-monitor
- Clean up PCI path handling
- runtime: Create tracer later in shimv2
- Agent: OCI hooks return malformed json
- osbuilder: Allow image registry to be customizable
- docs: Update licensing strategy to use kata 2.0 repository
- runtime: clh-config: add runtime hooks to the clh toml
- Fix Snap CI
- runtime: cpuset: when creating container, don't pass cpuset details
- agent: Remove bogus check from list_interfaces() unit test
- cli: Add aliases for `kata-` options
- github: Only run kata-deploy-test on pull-requests
- docs: Fix the installation directory of virtiofsd
- osbuilder: Fix USE_DOCKER on s390x
- Add katacontainers end-to-end arch image
- Build for glibc on s390x
- packaging: Fix vmlinux kernel install on s390x
- ci: Upgrade to yq 3.4.1
- kernel: Don't fail if "experimental" dir doesn't exist
- kata-deploy: Remove kata-deploy-docker.sh
- runtime: migrate from opentracing to opentelemetry
- rustjail: use rlimit crate
- rustjail: get all capabilities dynamically
- agent: README update to install protoc for ppc64le
- qemu: Add security fixes for CVE-2020-35517
- Fix lints and remove allow attributes which silence these warnings
- arm64: enable acpi for qemu/virt.
- osbuilder: Enforcing LIBC=gnu to rootfs build for ppc64le
- Fix async problems
- kata-monitor: set buildmode to exe to avoid build failing
- osbuilder: add description for how to use DISTRO variable
- kata-monitor: allow for building for alpine
- shimv2: log a warning and continue on post-stop hook failure
- kernel: Updates to kernel config for ppc64le
- agent: add secure_join to prevent softlink escape
- rustjail: fix the issue of container's cgroup root path
- osbuilder: remove traces of cmake
- versions: Update cloud-hypervisor to release v0.12.0
- clh: Use vanilla kernel.
- osbuilder: miscelaneous fixes/improvements
- branch: change 2.0-dev to main
- snap: Don't release Kata Alpha/RC in snap store
- Migrate to rtnetlink
- agent: Add underscore for constants
- github: Update ubuntu version to 20.04
- agent: implement NVDIMM/PMEM block driver
- rustjail: fix the issue of missing destroy contaienr cgroups
- agent: switch to async runtime

9a4e866 container: on cleanup, rm container directory for mounts path
48e5e4f test: install mock hook binary before test
1d44881 uevent: Add shutdown channel for task
d8d5b4c signal: Move to a new module
011f7d7 logging: Rework for shutdown
7d5f88c agent: Enable clean shutdown
dcb39c6 main: Create logger task
2cf2897 main: Use task list for stopping tasks
039df1d main: Refactor main logic into new async function
2a648fa logging: Use guard to make threaded logging safe
38f0d8d config: Fix assert_error testing macro
3f46e63 cgroups: fix the issue of getting wrong online cpus
e349244 runtime: fix virtiofsd RO volume sharing
532ff7c runtime: update virtcontainers API documentation
6fcfea8 runtime: Fix static check errors
f3ebbb1 runtime: Fix trace span ordering
5a3ee7d snap: Use qemu.version to build snap
0f78a5d kernel: rename exeperimental kernel symlink.
f791052 qemu: Build experimental qemu.
1555bfd runtime: add support for QEMU 6
fc0f93a actions: enable unit tests in PR check
74192d1 runtime: fix static check errors
a2dee1f runtime: fix vm factory UT failure
076bc50 agent-ctl: update Cargo.lock
0153f76 runtime: gofmt code
60f6315 kata-deploy: Use the correct tag for 2.1-alpha1 release
b0e51e5 qemu: Improve cache build
bc587da qemu: Add suffix for qemu binaries.
5493517 qemu: add CACHE_TIMEOUT
98d01ce qemu: Apply patches for specific versions.
190f813 runtime/katautils: PFlash should be initialized
b2ec5a4 runtime: fix cleanupSandboxBindMounts panic
9b689ea runtime/cli: fix TestMainBeforeSubCommandsLoadConfigurationFail failure
8e71c4f runtime: fix missing context argument in mocked sandbox APIs
8ff62be runtime: fix vcmock build failure
0e4b28e rustjail: rework execute_hook
a09e58f packaging: Use local file for assets.
451b45f agent: Make use of test consts for error messages
8c4d334 agent: disconnect rpc get_oom_event when destroy_sandbox.
07cfa4c qemu: patches: Fail if not patches directory
e221c45 versions: Update qemu database
5abdd2a qemu: move 5.0.0 patches to its own dir.
259c179 docs: Update QAT instructions to work with Kata 2.0 repos
34e7d5e agent: Validate CID
b265870 runtime: Validate CID
12e9f7f runtime: Add missing test mock function
ea51c17 agent: Allow server address to be specified on kernel command-line
4bf84b4 runtime: Add contexts to calls in unit tests
9e4932a runtime: use root span for shimv2 tracing
6b0dc60 runtime: Fix ordering of trace spans
3a77e4e build: remove unused variables from Makefile
d7cb3df cgroups: Add systemd detection when creating cgroup manager
f659871 cgroups: remove unused SystemdCgroup variable and accessor/mutators
b0e966c agent: Fix unused import warning in unit tests
d5a9d56 agent: Update Cargo.lock for earlier dependency change
0f7950f packaging: configure QEMU with -O2
224c50f snap: Package virtiofsd and fix path
f0d4985 exec: ensure sup groups are added to agent request
b034458 runtime: return hypervisor Pid in TaskExit event
81607e3 rustjail: fix the issue of home_dir function
c258ea2 agent-ctl: Function parameter cleanup
fcd45de agent-ctl: Unbreak build
efe625d build: Remove whitespace
48ed8f3 runtime: add support for readonly sandbox bindmounts
7ae349c agent: makefile: Add codecov target
f580d33 musl/arm64: decompression before use the tarball.
2da058e osbuild: build musl toolchain from source if needed
6417067 osbuilder: Port QAT Dockerfile to 2.0 repo
85601cd snap: Update for QEMU 5.2.0
88cef33 versions: update QEMU to 5.2.0
21bdaaf runtime: Fix missing 'name' field on containerd-shim-v2 logs
74a893f packaging: Refactor version comparisons on configure-hypervisor.sh
34dc861 rustjail: fix the issue of bind mount device file from guest
0f70983 runtime: check if error loading runtime config
6f72076 agent: fix clippy for rustc 1.5
4a21472 agent: Fix test
02079db agent: upgrade tokio to 1.0
a42dc74 agent: Agent invokes OCI hooks with wrong PID
17e9a2c agent: don't error of virtiofs share is already mounted
947913f agent/protocols: Remove cargo:rerun-if-changed in build.rs
bc0ac52 shimv2: return the hypervisor's pid as the container pid
10ed3da release: Rename runtime-release-notes to release-notes
f5dab6a release: We're not compatible with Docker.
2c8ea0a kata-deploy: Add copyright to the kata-deploy's Dockerfile
4e494e3 packaging: Remove NEMU mentions
f21c54a kata-deploy: QEMU, for 2.x, already includes virtiofs
657bd78 kata-deploy: Get rid of references to the docker script
dcea086 rustjail: fix blkio conversion
bc34cbb agent: Stop receive message from Receiver if got None
01481d6 kata-deploy: Ensure CRI-O uses the VM runtime type
d1c7173 kata-deploy: Move the containerd workarounds to their own functions
5013634 kata-deploy: Stop shipping kata-{clh,fc,qemu,qemu-virtiofs} binaries
2270f19 kata-deploy: Update README to reflect the current distributed artifacts
a494c4d makefile: agent: Add self documented help
72cb928 vhost-user-blk: Use PciPath type for vhost user devices
74f5b5f runtime/block: Use PciPath type through block code
32b40f5 runtime/network: Use PciPath type through network handling
87c5823 agent/device: Add unit test for pcipath_to_sysfs()
066ce7a agent/device: Pass root bus sysfs path to pcipath_to_sysfs()
fda48a9 agent/device: Use pci::Path type, name things consistently
c12b86d agent/device: Generalize PCI path resolution to any number of bridges
3715c57 agent/device: Rename and clarify semantics of get_pci_device_address()
7e92831 protocols: Update PCI path names / terminology in agent protocol def
8e5fd8e runtime: Introduce PciSlot and PciPath types
7464d05 agent: PCI path type
b22259a agent: PCI slot type
8c2f9e6 gitignore: Ignore *~ editor backup files
b412e15 osbuilder: Port QAT Dockerfile to 2.0 repo
5096103 osbuiler: fixing USE_DOCKER for ppc64le
a44b272 runtime: Create tracer later in shimv2
49bdbac osbuilder: Allow image registry to be customizable
fdc573d docs: Update licensing strategy to use kata 2.0 repository
2e2749a runtime: clh-config: add runtime hooks to the clh toml
ef72926 ci: snap: run snap CI on every pull request
919d512 snap: fix kernel setup
d054841 ci: snap: build targets that not need sudo first
a115338 ci: snap: define proxy variables
df14d38 Agent: OCI hooks return malformed json
3721351 runtime: cpuset: when creating container, don't pass cpuset details
c9c7c12 agent: Remove bogus check from list_interfaces() unit test
cb6d2f3 osbuilder: alphabetize fields
056d742 docs: Update documentation with new prefixless config options
fdcde79 cli: use new prefixless config options in tools scripts
02ee8b0 cli: Add aliases for kata- options
c6bc43b docs: Fix broken link to fluentbit.io docs
20b27a1 docs: Fix the installation directory of virtiofsd
11fe6a3 osbuilder: Fix USE_DOCKER on s390x
10f1c30 kata-runtime: use filepath.Join() to compose file path
f4ae9c8 docs: Update Developer-Guide.md
9963428 docs: update document for using debug console
44cde6e runtime: connect guest debug console bypass kata-monitor
3406502 runtime: add jaeger configuration items
fbab262 kernel: Don't fail if "experimental" dir doesn't exist
e1dce3a rustjail: use rlimit crate
8045104 ci: Upgrade to yq 3.4.1
3d3e4dc packaging: Fix vmlinux kernel install on s390x
a252d86 rustjail: get all capabilities dynamically
62cbaf4 kata-deploy: Remove kata-deploy-docker.sh
50fea9f github: Only run kata-deploy-test on pull-requests
b548114 qemu: Add security fixes for CVE-2020-35517
11680ef agent: README update to install protoc for ppc64le
f16ab49 agent: fix non_camel_case_types lint and stop hiding the warning
8ffe4d6 agent: fix unused_parens lint and stop hiding the warning
f70ca69 agent: remove #![allow(unused_unsafe)]
e28bf7a agent: fix dead_code lint
05da23a agent: fix non_snake_case lint and remove ![allow(non_snake_case)]
afb4197 osbuilder: Build for glibc on s390x
a1cedc5 agent: Build for glibc on s390x
9f237aa docs: add katacontainers end-to-end arch image
254b98d rustjail: fix unit test test_process
b25575b agent: remove crate signal-hook which are no longer used
b1880b3 rustjail: remove unnecessary #[async_trait]
83e9414 rustjail: add unittest test_execute_hook
d204100 rustjail: close stdin in execute_hook after it was sent
bb08131 rustjail: fix fork/child in execute_hook
17df9b1 runtime: migrate from opentracing to opentelemetry
71aeb92 osbuilder: updates for feedback
8e2b19a osbuilder: add description for how to use DISTRO variable
b6c2a60 kata-monitor: set buildmode to exe to avoid build failing
9f7a7a4 osbuilder: Enforcing LIBC=gnu to rootfs build for ppc64le
a88b896 kernel: Updates to kernel config for ppc64le
b7a1f75 arm64: enable acpi for qemu/virt.
448771f rustjail: fix the issue of container's cgroup root path
fd39f0f osbuilder: Add "Agent init" on terms glossary
e111093 agent: add secure_join to prevent softlink escape
1273e48 osbuilder: Fix urls to repositories
ba9fa49 osbuilder: Use Fedora and CentOS registries
c2d14cd versions: Update cloud-hypervisor to release v0.12.0
0e57393 shimv2: log a warning and continue on post-start hook failure
e7043fe shimv2: log a warning and continue on post-stop hook failure
3718df6 osbuilder: Remove leftover pieces related to cmake
d1bf829 kernel: ACPI: Always build evged for stable kernel
6f3d591 clh: Use vanilla kernel.
fd5592d branch: change 2.0-dev to main
2b880d2 snap: Don't release Kata Alpha/RC in snap store
14a63cc agent: Add underscore for constants
fa93831 agent: Address linter and tests
96762ab agent: Remove old netlink crate
0ea8243 github: Update ubuntu version to 20.04
33367be agent: Integrate netlink
23f3aef agent: Implement new netlink module
12551de agent: implement NVDIMM/PMEM block driver
6abb1be rustjail: fix the issue of missing destroy contaienr cgroups
fe67f57 agent: set edition = "2018" in .rustfmt.toml to fix rustfmt about async fn
df68771 agent-ctl: Update ttrpc to 0.4.14 for agent-ctl
37e285b agent: Make debug console async
f3bd439 agent: fix tests for async functions
9f79ddb agent: use tokio Notify instead of epoll to fix #1160
332fa4c agent: switch to async runtime
5561755 agent: Initial switch to async runtime
2f1cb79 kata-monitor: allow for building for alpine

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-30 07:36:36 +00:00
Tim Zhang
b58fb25d88 Merge pull request #1555 from liubin/fix/1554-install-hook-before-test
test: install mock hook binary before test
2021-03-30 14:01:56 +08:00
Eric Ernst
05680b86c4 Merge pull request #1537 from lifupan/main
cgroups: fix the issue of get wrong online cpus
2021-03-29 15:56:03 -07:00
Eric Ernst
460117a1a6 Merge pull request #1510 from littlejawa/issue_1003
build: remove unused variables from Makefile
2021-03-29 14:54:09 -07:00
Carlos Venegas
0b502d15b2 runtime: makefile allow override DAX value
Allow enable DAX using env variable

Fixes: #1547

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-29 21:28:22 +00:00
Carlos Venegas
a65519b9d3 versions: keep using kernel 5.4.x for ARM
ARM CI fails with new kernel. Lets use 5.4.x until
fixed.

Depends-on: github.com/kata-containers/tests#3363

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-29 21:24:14 +00:00
Eric Ernst
24214a536a Merge pull request #1560 from egernst/fix-1559
container: on cleanup, rm container directory for mounts path
2021-03-29 14:14:52 -07:00
GabyCT
17840cb573 Merge pull request #1546 from devimc/2021-03-24/supportQEMU6
runtime: add support for QEMU 6
2021-03-29 14:33:16 -06:00
Eric Ernst
6dfe3acf93 Merge pull request #1535 from jodh-intel/agent-shutdown
agent: Enable clean shutdown
2021-03-29 12:26:01 -07:00
Eric Ernst
9a4e866654 container: on cleanup, rm container directory for mounts path
A wrong path was being used for container directory when
virtiofs is utilized. This resulted in a warning message in
logs when a container is killed, or completes:

level=warning msg="Could not remove container share dir"

Without proper removal, they'd later be cleaned up when the shared
path is removed as part of stopping the sandbox.

Fixes: #1559

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-03-29 11:39:39 -07:00
Jakob Naucke
31ced01eba virtcontainers: Fix missing contexts in s390x
#1389 has added a context for many signatures to improve trace spans.
Functions specific to s390x lack this. Add context where required. This
affects some common code signatures, since some functions that do not
require context on other architectures do require it on s390x.
Also remove an unnecessary import in test_qemu_s390x.go.

Fixes: #1562

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-03-29 17:49:27 +02:00
bin
48e5e4f2f3 test: install mock hook binary before test
`make test` depends mock hook in virtcontainers directory,
before test, install it first.

And also run test as normal user and root in GitHub actions.

Fixes: #1554

Signed-off-by: bin <bin@hyper.sh>
2021-03-29 22:40:45 +08:00
James O. D. Hunt
1d448813a1 uevent: Add shutdown channel for task
Allow the uevent task to shutdown on request.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:12 +01:00
James O. D. Hunt
d8d5b4cd1d signal: Move to a new module
Move the signal handling code into a new module and refactor into the
main handler and a new SIGCHLD handling function to make the code
simpler and easier to understand.

Also added a unit test for shutdown.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:12 +01:00
James O. D. Hunt
011f7d785a logging: Rework for shutdown
Make changes to logger thread to allow the logger to be replaced with
a NOP logger (required for agent shutdown).

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:12 +01:00
James O. D. Hunt
7d5f88c0ad agent: Enable clean shutdown
The agent doesn't normally shutdown: it doesn't need to be as it is
killed *after* the workload has finished. However, a clean and ordered
shutdown sequence is required to support agent tracing, since all trace
spans need to be completed to ensure a valid trace transaction.

Enable a controlled shutdown by allowing the main threads (tasks) to be
stopped.

To allow this to happen, each thread is now passed a shutdown channel
which it must listen to asynchronously, and shut down the thread if
activity is detected on that channel.

Since some threads are created for I/O and since the standard `io::copy`
cannot be stopped, added a new `interruptable_io_copier()` function
which shares the same semantics as `io::copy()`, but which is also
passed a shutdown channel to allow asynchronous I/O operations to be
stopped cleanly.

Fixes: #1531.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:12 +01:00
James O. D. Hunt
dcb39c61f1 main: Create logger task
Encapsulate the logic for handling the task that displays logger output
into a new function to simplify the code and remove another anonymous
async block.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:11 +01:00
James O. D. Hunt
2cf2897d31 main: Use task list for stopping tasks
Maintain a list of tasks and wait on them all before main returns.

This is preparatory work for the agent shutdown: all tasks that are
started need to be added to the list. This aggregation makes it easier
to identify what needs to stop before the agent can exit cleanly.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:11 +01:00
James O. D. Hunt
039df1d727 main: Refactor main logic into new async function
Move most of the main logic into a separate async function. This makes
the code clearer and avoids the anonymous async block.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:11 +01:00
James O. D. Hunt
2a648fa760 logging: Use guard to make threaded logging safe
Return a guard variable from `create_logger()` which the caller can
implicitly drop to guarantee that all threads started by the async log
drain are stopped.

This fixes a long-standing bug [1] whereby the agent could panic with
the following error, generated by the `slog` logging crate:

```
slog::Fuse Drain: Custom { kind: Other, error: "serde serialization error: Bad file descriptor (os error 9)" }
```

[1] - See https://github.com/kata-containers/kata-containers/issues/171.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:11 +01:00
James O. D. Hunt
38f0d8d3ce config: Fix assert_error testing macro
Fixed the `assert_error!()` test macro so that it correctly handles the
scenario where the test expects an error, but the actual result was `Ok`
(no error).

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-29 14:32:11 +01:00
Bin Liu
594c47ab6c Merge pull request #1553 from bergwolf/ro-volumes
runtime: fix virtiofsd RO volume sharing
2021-03-29 20:43:34 +08:00
fupan.lfp
3f46e6379d cgroups: fix the issue of getting wrong online cpus
It's better to get the online cpus from
"/sys/devices/system/cpu/online" instead of from
cpuset cgroup, cause there would be an latency
between one cpu online and present in the root
cpuset cgroup.

Fixes: #1536

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-03-29 15:49:15 +08:00
Fupan Li
6932ac6522 Merge pull request #1460 from liubin/fix/1455
runtime: update virtcontainers API documentation
2021-03-29 15:24:41 +08:00
Peng Tao
e34924488b runtime: fix virtiofsd RO volume sharing
Right now we rely heavily on mount propagation to share host
files/directories to the guest. However, because virtiofsd
pivots and moves itself to a separate mount namespace, the remount
mount is not present in virtiofsd's mount. And it causes guest to be
able to write to the host RO volume.

To fix it, create a private RO mount and then move it to the host mounts
dir so that it will be present readonly in the host-guest shared dir.

Fixes: #1552
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-29 13:54:25 +08:00
bin
532ff7c909 runtime: update virtcontainers API documentation
Virtcontainers API documentation is outdated, update documentation from the latest
source.

Fixes: #1455

Signed-off-by: bin <bin@hyper.sh>
2021-03-29 11:50:53 +08:00
Chelsea Mafrica
b9489e6c6e Merge pull request #1551 from cmaf/fix-span-ordering-static-check
runtime: Fix trace span ordering and static checks
2021-03-26 16:35:23 -07:00
Carlos Venegas
c035cdb3ef versions: kernel 5.10.x
Linux 5.10.x is the new LTS branch, move
kata to a more recent kernel branch.

Fixes: #1288

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-26 17:58:09 +00:00
Chelsea Mafrica
6fcfea8dcf runtime: Fix static check errors
Fix comment formatting and unused variable to make static checks pass.

Fixes #1550

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-03-25 12:59:01 -07:00
Chelsea Mafrica
f3ebbb1f1a runtime: Fix trace span ordering
Return ctx in trace() functions to correct span ordering.

Fixes #1550

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-03-25 11:43:04 -07:00
Bin Liu
5b5b5cc611 Merge pull request #1539 from bergwolf/ut
fix runtime UTs and enable static check
2021-03-25 16:29:45 +08:00
Fabiano Fidêncio
deca207608 Merge pull request #1542 from fidencio/wip/kata-deploy-use-the-correct-image-for-alpha
kata-deploy: Use the correct tag for 2.1-alpha1 release
2021-03-25 08:32:06 +01:00
Carlos Venegas
753c7270ed Merge pull request #1422 from jcvenegas/2021-02-17/experimental-virtiofs
ci: fix virtiofs-experimental build
2021-03-24 14:47:27 -06:00
Carlos Venegas
5a3ee7d7e9 snap: Use qemu.version to build snap
Use only one key to refer the version used by kata.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-24 17:52:52 +00:00
Carlos Venegas
0f78a5dc96 kernel: rename exeperimental kernel symlink.
use -experimental suffix instead of virtiofs

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-24 17:52:52 +00:00
Carlos Venegas
f79105231c qemu: Build experimental qemu.
Split qemu script to build qemu experimental using
same dockerfile.

Fixes: #1421

Depends-on: github.com/kata-containers/tests#3255

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-24 17:52:07 +00:00
Julio Montes
1555bfd8b5 runtime: add support for QEMU 6
Use `on` and `off` to enable or disable features,
`no` prefix is deprecated

fixes #1545

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-03-24 10:55:35 -06:00
James O. D. Hunt
2fc7f75724 Merge pull request #1521 from jodh-intel/verify-cid
Verify container ID
2021-03-24 13:27:58 +00:00
Peng Tao
fc0f93aef9 actions: enable unit tests in PR check
Right now we only run UTs for agent. We need to run it for *ALL*
components.

Fixes: #1538
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-24 20:12:14 +08:00
Peng Tao
74192d179d runtime: fix static check errors
It turns out we have managed to break the static checker in many
difference places with the absence of static checker in github action.
Let's fix them while enabling static checker in github actions...

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-24 20:10:19 +08:00
Peng Tao
a2dee1f6a0 runtime: fix vm factory UT failure
We need to use different mocked socket otherwise they conflict with each
other.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-24 18:21:21 +08:00
Peng Tao
076bc5075f agent-ctl: update Cargo.lock
Just build would result in these diffs. Let's include them in git.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-24 15:03:52 +08:00
Peng Tao
0153f76b07 runtime: gofmt code
Looks like we have merged a lot of code that is not properly formated.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-24 14:37:46 +08:00
Bin Liu
018454be44 Merge pull request #1534 from Tim-Zhang/rework-execute_hook
rustjail: rework execute_hook
2021-03-24 14:09:09 +08:00
Fabiano Fidêncio
60f6315b2d kata-deploy: Use the correct tag for 2.1-alpha1 release
Let's ensure we use the appropriate tag for the release, even before it
was actually created.

Fixes: #1493

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-24 00:29:03 +01:00
Carlos Venegas
b0e51e59fa qemu: Improve cache build
Add arguments and files as needed, if only of them
changes the build will start from the change and
not from scratch.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-23 20:52:39 +00:00
Carlos Venegas
bc587da9f5 qemu: Add suffix for qemu binaries.
To build different qemu versions with the
same qemu code add a prefix on install.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-23 20:52:39 +00:00
Carlos Venegas
5493517b4f qemu: add CACHE_TIMEOUT
Add docker ARG to provide a date to invalid cache, if the date changes
the image will be rebuild. This is required to keep build dependencies
with security fixes, but still take advantage of build qemu faster using
docker cache.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-23 20:52:39 +00:00
Carlos Venegas
98d01ce6b9 qemu: Apply patches for specific versions.
Today we apply patches per base branch. Having
two qemu versions in a similar base version can make
can have problems if one of the trees already has a patch.
If a patch is needed only for one specific tag/commit
add only the patch to that version.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-23 20:52:39 +00:00
Tim Zhang
40861fbab5 Merge pull request #1517 from jodh-intel/agent-server-address-cmdline
agent: Allow server address to be specified on kernel command-line
2021-03-23 19:33:25 +08:00
Peng Tao
190f813427 runtime/katautils: PFlash should be initialized
newQemuHypervisorConfig() sets it to an empty slice. We have to set the
same in the test config otherwise it is nil and reflect DeepEqual would
fail.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-23 17:49:47 +08:00
Peng Tao
b2ec5a43d5 runtime: fix cleanupSandboxBindMounts panic
Found in UT:
--- FAIL: TestKataCleanupSandbox (0.00s)
panic: runtime error: invalid memory address or nil pointer dereference [recovered]
        panic: runtime error: invalid memory address or nil pointer dereference

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-23 16:44:47 +08:00
Peng Tao
9b689ea1d7 runtime/cli: fix TestMainBeforeSubCommandsLoadConfigurationFail failure
Now that it is `kata-config`...

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-23 16:40:38 +08:00
Peng Tao
8e71c4fc7a runtime: fix missing context argument in mocked sandbox APIs
Missing context.Context in several APIs.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-23 16:19:46 +08:00
Peng Tao
8ff62beeb4 runtime: fix vcmock build failure
github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock
virtcontainers/pkg/vcmock/container.go:19:10: cannot use c.MockSandbox
(type *Sandbox) as type virtcontainers.VCSandbox in return argument:
        *Sandbox does not implement virtcontainers.VCSandbox (missing
	GetHypervisorPid method)
github.com/kata-containers/kata-containers/src/runtime/pkg/katautils

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-03-23 15:57:07 +08:00
Tim Zhang
0e4b28e838 rustjail: rework execute_hook
Fixes: #1532

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-03-22 20:20:30 +08:00
Bin Liu
dd6da22a11 Merge pull request #1525 from Tim-Zhang/disconnect-get-oom
agent: disconnect rpc get_oom_event when destroy_sandbox.
2021-03-22 15:19:31 +08:00
Chelsea Mafrica
7da3bdc68e Merge pull request #1363 from eadamsintel/qat-docs
docs: Update QAT instructions to work with Kata 2.0 repos
2021-03-19 18:33:19 -07:00
David Gibson
89e5fa7a2f Merge pull request #1506 from dgibson/bug1505
agent: Update Cargo.lock for earlier dependency change
2021-03-20 11:34:32 +11:00
Chelsea Mafrica
8c1abc3b57 Merge pull request #1418 from Amulyam24/fix-docker-ppc64le
osbuiler: fixing USE_DOCKER for ppc64le
2021-03-19 17:22:12 -07:00
Chelsea Mafrica
f26db684c8 Merge pull request #1508 from dgibson/bug1507
agent: Fix unused import warning in unit tests
2021-03-19 14:20:00 -07:00
Chelsea Mafrica
3369fc8b4b Merge pull request #1514 from fgiudici/port_cgroup_fix
[forwardport] Fixup systemd cgroup handling
2021-03-19 14:18:03 -07:00
Chelsea Mafrica
16c6c1e272 Merge pull request #1389 from cmaf/fix-span-ordering
runtime: Fix ordering of trace spans
2021-03-19 13:07:42 -07:00
Carlos Venegas
a09e58fa80 packaging: Use local file for assets.
When kata used multiple repositories, versions file was
downloaded. This is not needed anymore as the file is part
of the same repository.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-19 15:50:41 +00:00
James O. D. Hunt
451b45f9d7 agent: Make use of test consts for error messages
Make use of the `const` values for error messages that were previously
only used for the unit tests. This guarantees consistency.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-19 09:17:31 +00:00
Tim Zhang
8c4d3346d4 agent: disconnect rpc get_oom_event when destroy_sandbox.
Otherwise it would block the shutdown of ttrpc.

Fixes: #1524

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-03-19 16:42:37 +08:00
Carlos Venegas
07cfa4ce22 qemu: patches: Fail if not patches directory
Fail if not patches directory is found.  Help to prevent
build a new qemu version with missing patches.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-18 20:07:40 +00:00
Carlos Venegas
e221c45d7a versions: Update qemu database
Only use 'version' key to build qemu..

The version could be used as any valid target:
branch, tag or commit.

Using different keys to build is confusing.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-18 19:37:35 +00:00
Carlos Venegas
5abdd2aaf0 qemu: move 5.0.0 patches to its own dir.
Some patches has conflicts with old experimental kernel. Move patches to its own specific version.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-18 19:25:55 +00:00
Adams, Eric
259c179162 docs: Update QAT instructions to work with Kata 2.0 repos
This fixes the guide to work with the Kata 2.0 repos and provide more
details on how to verify with ctr and kubernetes.

Fixes: #1362

Signed-off-by: Adams, Eric <eric.adams@intel.com>
2021-03-18 11:19:46 -07:00
James O. D. Hunt
34e7d5ed97 agent: Validate CID
Validate the container ID as we cannot / should not rely on the
container manager / runtime to do this.

Fixes: #1520.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-18 11:19:34 +00:00
James O. D. Hunt
b265870997 runtime: Validate CID
Validate the container ID as we cannot rely on the container manager
doing this.

Fixes: #1520.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-18 11:19:32 +00:00
James O. D. Hunt
12e9f7f82c runtime: Add missing test mock function
Added a missing `vcmock.Sandbox.GetHypervisorPid()` function.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-17 10:37:47 +00:00
James O. D. Hunt
ea51c17b64 agent: Allow server address to be specified on kernel command-line
To make debugging and testing easier, allow the ttRPC server address to
be specified via `/proc/cmdline` as `agent.server_addr=`.

Fixes: #1516.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-17 08:22:03 +00:00
Chelsea Mafrica
4bf84b4b2f runtime: Add contexts to calls in unit tests
Modify calls in unit tests to use context since many functions were
updated to accept local context to fix trace span ordering.

Fixes #1355

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-03-16 17:39:28 -07:00
Chelsea Mafrica
9e4932a6e2 runtime: use root span for shimv2 tracing
Add rootCtx to service struct in shimv2 to use as parent of spans
created in shimv2 for a more organized trace ouput.

Fixes #1355

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-03-16 17:39:28 -07:00
Chelsea Mafrica
6b0dc60dda runtime: Fix ordering of trace spans
A significant number of trace calls did not use a parent context that
would create proper span ordering in trace output. Add local context to
functions for use in trace calls to facilitate proper span ordering.
Additionally, change whether trace function returns context in some
functions in virtcontainers and use existing context rather than
background context in bindMount() so that span exists as a child of a
parent span.

Fixes #1355

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-03-16 17:39:28 -07:00
Fabiano Fidêncio
50f317dcff Merge pull request #1463 from egernst/bindmount-infra
runtime: add support for readonly sandbox bindmounts
2021-03-16 11:34:53 +01:00
Julien Ropé
3a77e4eb8c build: remove unused variables from Makefile
Some variable are initialized in the Makefile, but never used.
Removing them to clean up the Makefile.

Fixes: #1003

Signed-off-by: Julien Ropé <jrope@redhat.com>
2021-03-16 08:51:16 +01:00
Eric Ernsteernst
d7cb3df0d2 cgroups: Add systemd detection when creating cgroup manager
Look at the provided cgroup path to determine whether systemd is being
used to manage the cgroups. With this, systemd cgroups are being detected
and created appropriately for the sandbox.

Fixes: #599

Signed-off-by: Eric Ernsteernst <eric@amperecomputing.com>

(forward port of https://github.com/kata-containers/runtime/pull/2817)
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-03-16 08:27:14 +01:00
Eric Ernsteernst
f659871f55 cgroups: remove unused SystemdCgroup variable and accessor/mutators
Since we are now detecting, no longer to keep this state.

Signed-off-by: Eric Ernsteernst <eric@amperecomputing.com>

(forward port of https://github.com/kata-containers/runtime/pull/2817)
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-03-16 08:26:15 +01:00
Wainer Moschetta
943a7553be Merge pull request #1496 from wainersm/qemu_5_2-respin
Bump to QEMU 5.2.0 - respin
2021-03-12 09:48:14 -03:00
David Gibson
b0e966c3bd agent: Fix unused import warning in unit tests
This unneeded import was accidentally introduced by 81607e34.

fixes #1507

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-03-12 17:25:13 +11:00
David Gibson
d5a9d56e79 agent: Update Cargo.lock for earlier dependency change
Commit 81607e34 updated src/agent/rustjail/Cargo.toml, to remove an
unneeded dependency.  That causes cargo to update src/agent/Cargo.lock
on each build.  However, the change to Cargo.lock wasn't checked in
meaning anyone working on the agent code will get bogus diffs with every
build.  Check in the missing file to fix this.

fixes #1505

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-03-12 16:51:30 +11:00
Fupan Li
62d30ca2b6 Merge pull request #1498 from liubin/fix/1497-task-exit-pid
runtime: return hypervisor Pid in TaskExit event
2021-03-11 12:58:28 +08:00
Bo Chen
396565fa56 Merge pull request #1489 from jcvenegas/2021-03-03/codecov
agent: makefile: Add codecov target
2021-03-10 11:21:33 -08:00
Wainer dos Santos Moschetta
0f7950fb2a packaging: configure QEMU with -O2
Currently the the configure-hypervisor.sh doesn't set any optimization flag when
compiling QEMU >= 5.2.0 since the configure script will implicitly set -O2 on Ubuntu. But
on other environments, for example CentOS 7, it won't be set any optimization and this
results on the compiler warn:

  # warning _FORTIFY_SOURCE requires compiling with optimization (-O)

To avoid this inconsistent behavior across different build environments, let's explicitly
set the -O2 flag.

Reported-by: Eric Ernst <eric.g.ernst@gmail.com>
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-03-10 11:16:52 -03:00
Wainer dos Santos Moschetta
224c50f420 snap: Package virtiofsd and fix path
This contain to fixes for the virtiofsd on snap:
 * removed the "-/usr/libexec" so that virtiofsd is copied to prime
 * The configuration.toml expects virtiofsd in /usr/libexec/kata-qemu so it should be passed "kata-qemu"
   to configure_hypervisor.sh script and it wil configure to install the executable onto the right directory.

Fixes #1238
Depends-on: github.com/kata-containers/kata-containers#1349
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-03-10 11:16:52 -03:00
Wainer Moschetta
6e496941a5 Merge pull request #1349 from wainersm/qemu_5_2
Bump to QEMU 5.2.0
2021-03-10 11:14:45 -03:00
Peng Tao
e0f191fbbc Merge pull request #1501 from egernst/fix-exec-groups
exec: ensure sup groups are added to agent request
2021-03-10 10:18:24 +08:00
Eric Ernst
f0d49851db exec: ensure sup groups are added to agent request
Extra groups were not being handled when exec'ing. Ensure
that these are handled.

Before this, running a pod with:
```
 ...snippet...
 securityContext:
   fsGroup: 266
   runAsGroup: 51020
   runAsUser: 264
```

And then exec'ing would not supply the fsGroup:
```
$ kubectl exec -it kata-bb  -- sh -c id
uid=264 gid=51020
```

Fixes: #1500

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-03-09 17:17:55 -08:00
Fabiano Fidêncio
9e90105092 Merge pull request #1495 from lifupan/fix_homedir
rustjail: fix the issue of home_dir function
2021-03-09 14:36:45 +01:00
bin
b034458960 runtime: return hypervisor Pid in TaskExit event
Other RPC calls return Pid of hypervisor, the TaskExit should
return the same Pid.

Fixes: #1497

Signed-off-by: bin <bin@hyper.sh>
2021-03-09 17:41:44 +08:00
GabyCT
856ffb85fc Merge pull request #1420 from eadamsintel/qat-dockerfile
osbuilder: Port QAT Dockerfile to 2.0 repo
2021-03-08 11:34:28 -06:00
fupan.lfp
81607e348e rustjail: fix the issue of home_dir function
Since the crate dirs::home_dir function depends on the
libc's api: getpwuid_r, but this api function wouldn't
be static linked on glibc, thus we'd better to figure
out an alternative way to get the home dir from /etc/passwd.
For much more info about this glibc's issue, please see:
https://sourceware.org/bugzilla/show_bug.cgi?id=19341.

This commit read and parse the "/etc/passwd" directly and
fetch the corresponding uid's home dir.

Fixes: #675

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-03-08 21:51:23 +08:00
James O. D. Hunt
83d5a49941 Merge pull request #1472 from jodh-intel/unbreak-agent-ctl
agent-ctl: Unbreak build
2021-03-08 10:32:22 +00:00
Fupan Li
f6630ddd49 Merge pull request #1478 from lifupan/fix_device
rustjail: fix the issue of bind mount device file from guest
2021-03-08 09:55:00 +08:00
James O. D. Hunt
c258ea25d4 agent-ctl: Function parameter cleanup
Remove unused function parameters from the following types:

- `AgentCmdFp`: Removed the config parameter and made
  the context parameter the first (à la golang).

- `BuiltinCmdFp`: Removed the config and options parameters.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-05 09:54:50 +00:00
James O. D. Hunt
fcd45def53 agent-ctl: Unbreak build
The recent switch to an async rust agent broke the `agent-ctl` tool.
However, we didn't notice because that isn't being built by the CI.

Fix the breakage by passing a ttRPC context to all ttRPC API calls and
also build the tool as part of the static checks CI.

Fixes: #1471.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-05 09:54:50 +00:00
James O. D. Hunt
efe625dfc1 build: Remove whitespace
Zap trailing whitespace.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-03-05 09:54:50 +00:00
Eric Ernst
48ed8f3c4a runtime: add support for readonly sandbox bindmounts
If specified, sandbox_bind_mounts identifies host paths to be
mounted (ro) into the sandboxes shared path. This is only valid
if filesystem sharing is utilized.

The provided path(s) will be bindmounted (ro) into the shared fs directory on
the host, and thus mapped into the guest. If defaults are utilized,
these mounts should be available in the guest at
`/var/run/kata-containers/shared/containers/sandbox-mounts`

These will not be exposed to the container workloads, and are only
added for potential guest-services to consume (example: expose certs
into the guest that are available on the host).

Fixes: #1464

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-03-04 10:04:25 -08:00
Carlos Venegas
7ae349c511 agent: makefile: Add codecov target
Add target to run codecov report locally.

Useful to identify what are the missing lines
to be covered by unit test.

Fixes: #1487

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-03-04 16:32:52 +00:00
Fabiano Fidêncio
acc4bc57f4 Merge pull request #1491 from jongwu/musl_tar
musl/arm64: decompression before use the tarball.
2021-03-04 08:58:03 +01:00
Jianyong Wu
f580d33cc9 musl/arm64: decompression before use the tarball.
In the last fix, the decompression ops is deleted by mistake and need
add it back.

Fixes: #1490
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-03-04 10:12:29 +08:00
GabyCT
ed3cb9bfa8 Merge pull request #1482 from jongwu/musl
osbuilder/arm64: build musl toolchain from source code if needed
2021-03-03 16:49:31 -06:00
Jianyong Wu
2da058ed7b osbuild: build musl toolchain from source if needed
Currently, musl toolchain installation on arm64 is just downloading from
a website. It's unsafe in case the website corrupts. So build musl
toolchain from source if it can't be downloaded.

Fixes: #1481
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-03-03 17:22:39 +08:00
Eric Ernst
6e1ef809e1 Merge pull request #1480 from wainersm/fix_shim_log
runtime: Fix missing 'name' field on containerd-shim-v2 logs
2021-03-02 11:46:09 -08:00
Adams, Eric
6417067d62 osbuilder: Port QAT Dockerfile to 2.0 repo
Update the Intel QAT Dockerfile to work with the 2.0 repos, fix some
bugs with building Debian/Ubuntu rootfs, and update the latest QAT
driver. Updated copyright.

Fixes: #1419

Signed-off-by: Adams, Eric <eric.adams@intel.com>
2021-03-01 17:34:04 -08:00
Wainer dos Santos Moschetta
85601cd360 snap: Update for QEMU 5.2.0
QEMU 5.2.0 needs ninja-build package installed on the build environment.

The default-configs were copied to $QEMU_SRC/default-configs but that does
take any effect, so instead it is now copied to $QEMU_SRC/default-configs/devices
and the configs for i386 were updated.

Also it had to change some arguments being passed to configure as Meson was failing
due inconsistent paths:

  ./meson.build:1:0: ERROR: The value of the 'libdir' option is '/usr/lib/qemu' which must be a subdir of the prefix '/snap/kata-containers/current/usr'.
  Note that if you pass a relative path, it is assumed to be a subdir of prefix.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-03-01 16:57:58 -05:00
Wainer dos Santos Moschetta
88cef33b76 versions: update QEMU to 5.2.0
This change the version of QEMU used in the tests and CI.

The scripts/configure-hypervisor.sh was changed so that:
  - Passing the `--enable-virtiofsd` flag
  - Do not compiling with -O3 to avoid the warning:

    Program python3 found: YES (/usr/bin/python3)
    ../meson.build:104: WARNING: Consider using the built-in optimization level instead of using "-O3".
    ../meson.build:108: WARNING: Consider using the built-in optimization level instead of using "-O3".

The qemu.blacklist files was changed so that new and uneeded firmware files are removed from the
final tarball. Except for qboot.rom which is new but kept, since it can be used with microvm
machine type (in case we want to enable microvm in the future).

The patches which are applied on QEMU sources:
 - 0001-virtiofsd-Allow-to-build-it-without-the-tools.patch
   (Build fix for Meson - allows passing `--disable-tools --enable-virtiofsd`)
 - 0002-virtiofsd-extract-lo_do_open-from-lo_open.patch
   0003-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch
   0004-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch
   0005-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch
   0006-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch
   (Security fixes for virtiofsd)
 - 0007-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch
   (Performance improvement for 9p driver)
 - 0008-hw-s390x-fix-build-for-virtio-9p-ccw.patch
   (Build fix for virtio-9p-ccw machine type)

Fixes: #1238

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-03-01 16:57:50 -05:00
Wainer dos Santos Moschetta
21bdaaf84f runtime: Fix missing 'name' field on containerd-shim-v2 logs
Each Kata Containers application should generate log records with a specified
structure. Currently on containerd-shim-v2's logs, the required 'name' field
is missing. This changed its logger to append the application name on each
and every emitted entries.

Fixes #1479
Related-to: github.com/kata-containers/tests/issues/3260
Suggested-by: James O. D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-03-01 16:52:24 -05:00
Wainer dos Santos Moschetta
74a893f732 packaging: Refactor version comparisons on configure-hypervisor.sh
The scripts/configure-hypervisor.sh split the QEMU and GCC version
in major and minor versions then use those values on shell conditionals
to compare versions. This is error prone, so instead this change the script
to use the `sort -V -C ` command for version comparisons.

Fixes: #1349
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-03-01 14:46:24 -05:00
Eric Ernst
90a18e228b Merge pull request #1457 from mxpv/shared
agent: don't error of virtiofs share is already mounted
2021-03-01 11:16:18 -08:00
fupan.lfp
34dc861cde rustjail: fix the issue of bind mount device file from guest
When do pass guest device files to container, the source
file wouldn't be a regular file, but we also need to create
a corresponding destination file to bind mount source file
to it. Thus it's better to check whether the source file
was a directory instead of regular file.

Fixes: #1477

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-03-01 21:20:01 +08:00
Bin Liu
61f0291d63 Merge pull request #1452 from lifupan/main
shimv2: return the hypervisor's pid as the container pid
2021-03-01 15:48:01 +08:00
Eric Ernst
ddb283dd43 Merge pull request #1475 from egernst/yiiiiikes
runtime: check if error loading runtime config
2021-02-26 15:01:22 -08:00
Eric Ernst
0f7098339b runtime: check if error loading runtime config
Looks like we inadvertantly removed the check on the loadRuntimeConfig
error return value. Adding back...

Fixes: #1474

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-02-26 10:51:38 -08:00
Bin Liu
7587d2a8d6 Merge pull request #1462 from Tim-Zhang/fix-clippy-for-rust1.5
agent: fix clippy for rustc 1.5
2021-02-26 15:52:03 +08:00
Fupan Li
b5282fa224 Merge pull request #1305 from Tim-Zhang/upgrade-tokio-to-1.0
agent: Upgrade tokio to 1.2.0
2021-02-26 13:33:24 +08:00
Tim Zhang
6f720761ed agent: fix clippy for rustc 1.5
Fixes: #1461

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-25 17:04:54 +08:00
Bin Liu
735fe3f94a Merge pull request #1444 from ManaSugi/fix-blkio-weight
rustjail: fix blkio conversion
2021-02-25 15:20:20 +08:00
Tim Zhang
4a214720e8 agent: Fix test
Struct TtrpcContext has been changed in ttrpc@0.5

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-25 14:38:38 +08:00
Tim Zhang
02079dbb4f agent: upgrade tokio to 1.0
Fixes: #1257

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-25 14:38:32 +08:00
Eric Ernst
2f591f5642 Merge pull request #1459 from mxpv/pid
agent: Agent invokes OCI hooks with wrong PID
2021-02-24 20:08:56 -08:00
Maksym Pavlenko
a42dc74898 agent: Agent invokes OCI hooks with wrong PID
Agent sends -1 PID when invoking OCI hooks.

OCI state struct is initialized before obtaining PID, so this PR moves
`oci_state` call down, right after we get the id.

Fixes: #1458

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-02-24 18:16:17 -08:00
Fupan Li
91ad176a06 Merge pull request #1434 from fidencio/wip/kata-deploy-cleanups
kata-deploy: stop mentioning qemu-virtiofsd, as the default qemu supports virtiofsd already (plus some cleanups)
2021-02-25 09:50:10 +08:00
Maksym Pavlenko
17e9a2cff5 agent: don't error of virtiofs share is already mounted
Port kata-containers/agent#883 to the Rust Agent.

In the event that the virtiofs device is already mounted at the
requested destination, don't error out. We'll check before attempting to
mount to see if the destination is already a mount point. If so, skip
doing the mount in the agent.

This facilitates mounting the sharedfs automatically in the guest before
the agent service starts.

Signed-off-by: Eric Ernst eric.g.ernst@gmail.com

Fixes: #1398

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-02-24 17:05:59 -08:00
Tim Zhang
947913f6e5 agent/protocols: Remove cargo:rerun-if-changed in build.rs
So that the build.rs will be re-runed if any file
within the package is changed.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-24 21:50:47 +08:00
Tim Zhang
2c42be0ca6 Merge pull request #1447 from liubin/liubin/fix-1369
agent: Stop receive message from Receiver if got None
2021-02-24 16:48:24 +08:00
Bin Liu
974f926292 Merge pull request #1450 from fidencio/wip/release-notes-script-still-mentions-docker
Remove "Docker" & "[kata] runtime" references from the release scripts & process documentation
2021-02-24 15:59:36 +08:00
fupan.lfp
bc0ac526a2 shimv2: return the hypervisor's pid as the container pid
Since the kata's hypervisor process is in the network namespace,
which is close to container's process, and some host metrics
such as cadvisor can use this pid to access the network namespace
to get some network metrics. Thus this commit replace the shim's
pid with the hypervisor's pid.

Fixes: #1451

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-02-24 13:26:05 +08:00
Fabiano Fidêncio
10ed3da4eb release: Rename runtime-release-notes to release-notes
There's no runtime repo anymore, let's avoid making a reference to it,
which may end up confusing people reading the Release-Process file.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-23 17:25:40 +01:00
Fabiano Fidêncio
f5dab6af2e release: We're not compatible with Docker.
We don't support Docker as part of 2.x repository.

Fixes: #1449

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-23 17:25:40 +01:00
Fabiano Fidêncio
2c8ea0a8d1 kata-deploy: Add copyright to the kata-deploy's Dockerfile
All the work done on this file, apart from merging the 2.x repo, and now
removing unused lines, comes from Intel.

The reason it's being added is to silent a complaint from the static
checker.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-23 14:25:00 +01:00
Fabiano Fidêncio
4e494e34c9 packaging: Remove NEMU mentions
There's no more NEMU, for some time already.  Considering this, let's
just remove any mention to it as part of our project.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-23 14:24:54 +01:00
Fabiano Fidêncio
f21c54a985 kata-deploy: QEMU, for 2.x, already includes virtiofs
There's no reason to ship qemu & qemu-virtiofs when the former already
includes vitiofs support (and that's the default for 2.x deployments).

In case we will enable experimental qemu DAX support, we should add a
new target, a "qemu-experimental" target, as Carlos has been working on.

Fixes: #1424

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-23 14:24:48 +01:00
Fabiano Fidêncio
657bd789a3 kata-deploy: Get rid of references to the docker script
The docker script has been removed as part of
62cbaf4de4, but references to it were left
behind in the artifact-list.sh, release/kata-deploy-binaries.sh, and
kata-deploy/Dockerfile.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-23 14:20:39 +01:00
Fabiano Fidêncio
81109f89bd Merge pull request #1423 from fidencio/wip/kata-deploy-crio-plus-shimv2
kata-deploy: Ensure CRI-O uses shimv2 & the "vm" runtime type
2021-02-22 20:28:30 +01:00
Manabu Sugimoto
dcea08697a rustjail: fix blkio conversion
BFQ weight controller is using the same BFQ weight scheme (i.e 1->1000).
Therefore, there is no need to do the conversion.

More details here: https://github.com/opencontainers/runc/pull/2786

Fixes: #1440

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-02-23 00:26:57 +09:00
bin
bc34cbbce5 agent: Stop receive message from Receiver if got None
If the container has exited, the sender in notifier watching OOM events
will be dropped after the loop exited, and recv() from the according
receiver will get None.

This will lead two problems for get_oom_event rpc all from agent:

- return an wrong OOM event.
- continuously return OOM events.

Fixes: #1369

Signed-off-by: bin <bin@hyper.sh>
2021-02-22 21:56:07 +08:00
Fabiano Fidêncio
01481d6ac0 kata-deploy: Ensure CRI-O uses the VM runtime type
For Kata Containers 2.x, CRI-O should always be using the
`containerd-shim-kata-v2` binary, and always be configured to use the
"vm" runtime type, developed specifically for the shimv2, instead of the
default "oci" runtime type.

I've taken the liberty to try to simplify the CRI-O script and make it
less error prone.  In the future, we can start dropping a configuration
file to /etc/crio/crio.conf.d and just removing it as part of the
cleanup, but that's for the future.

Fixes: #1357

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-20 09:27:33 +01:00
Fabiano Fidêncio
d1c717363d kata-deploy: Move the containerd workarounds to their own functions
Factoring those pieces of code to their own functions allows us to
easily re-use them when creating & cleaning up the CRI-O configuration
files, as CRI-O is also affected by the issues that are still opened.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-20 09:27:24 +01:00
Fabiano Fidêncio
5013634e23 kata-deploy: Stop shipping kata-{clh,fc,qemu,qemu-virtiofs} binaries
Those binaries are not revelant for 2.x deployments.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-20 09:24:12 +01:00
Fabiano Fidêncio
2270f19ee1 kata-deploy: Update README to reflect the current distributed artifacts
Our list was based on what we used to ship for Kata Containers 1.x, not
even taking into account the shimv2 binary.

Let's update it in order to reflect better what we currently distribute.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-20 09:24:12 +01:00
Bin Liu
29d4abf23a Merge pull request #1437 from jcvenegas/2021-02-18/self-documented-makefile
makefile: agent: Add self documented help
2021-02-20 10:04:30 +08:00
Carlos Venegas
a494c4de23 makefile: agent: Add self documented help
Add comments that allow self document variables and targets

Fixes: #1436

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-02-19 15:30:42 +00:00
Eric Ernst
4f67bf919d Merge pull request #1344 from liubin/fix/1329-improvements-for-console-access
runtime: connect guest debug console bypass kata-monitor
2021-02-18 17:59:32 -08:00
David Gibson
a060b9a21b Merge pull request #1190 from dgibson/pcipath
Clean up PCI path handling
2021-02-19 12:23:27 +11:00
David Gibson
72cb9287a0 vhost-user-blk: Use PciPath type for vhost user devices
VhostUserDeviceAttrs::PCIAddr didn't actually store a PCI address
(DDDD:BB:DD.F), but rather a PCI path.  Use the PciPath type and
rename things to make that clearer.

TestHandleBlockVolume previously used the bizarre value "0001:01"
which is neither a PCI address nor a PCI path for this value.  Change
it to a valid PCI path - it appears the actual value didn't matter for
that test, as long as it was consistent.

Forward port of
3596058c67

fixes #1040

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
74f5b5febe runtime/block: Use PciPath type through block code
BlockDrive::PCIAddr doesn't actually store a PCI address
(DDDD:BB:DD.F) but a PCI path.  Use the PciPath type and rename things
to make that clearer.

TestHandleBlockVolume() previously used a bizarre value "0002:01" for
the "PCI address" which was neither an actual PCI address, nor a PCI
path.  Update it to use a PCI path - the actual value appears not to
matter in this test, as long as its consistent throughout.

Forward port of
64751f377b

fixes #1040

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
32b40f5fe4 runtime/network: Use PciPath type through network handling
The "PCI address" returned by Endpoint::PciPath() isn't actually a PCI
address (DDDD:BB:DD.F), but rather a PCI path.  Rename and use the
PciPath type to clean this up and the various parts of the network
code connected to it.

Forward port of
3e589713cf

fixes #1040

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
87c5823c4b agent/device: Add unit test for pcipath_to_sysfs()
Port this test from the Kata 1 Go agent to the Kata 2 Rust agent.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
066ce7ab51 agent/device: Pass root bus sysfs path to pcipath_to_sysfs()
Currently pcipath_to_sysfs() generates the path to the root bus node in
sysfs via create_pci_root_bus_path().  This is inconvenient for testing,
though, so instead make it take this as a parameter and generate the path
in the (single) caller.  As a bonus this will make life a bit easier when
we want to support machines with multiple PCI roots.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
fda48a9bf0 agent/device: Use pci::Path type, name things consistently
pcipath_to_sysfs takes a PCI path, with a particular format.  A number of
places implicitly need strings in that format, many of them repeat the
description.  To make things safer and briefer use the pci::Path type for
the purpose more widely, and just describe the string formatting of it at
the type definition.

Then, update variable names and comments throughout to call things in
this format "PCI path", rather than "PCI identifier", which is vague,
or "PCI address" which is just plain wrong.  Likewise we change names and
comments which incorrectly refer to sysfs paths as a "PCI address".

This changes the grpc proto definitions, but because it's just
changing the name of a field without changing the field number, it
shouldn't change the actual protocol.

A loose forward port of
da4bc1d184

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
c12b86dc82 agent/device: Generalize PCI path resolution to any number of bridges
Currently pcipath_to_sysfs(), which translates PCI paths into sysfs paths
accepts only pci paths with exactly 2 components; which represents PCI
devices separated from the root bus by exactly one PCI to PCI bridge (which
could be a virtual P2P bridge, such as a PCI-E root port).

There are cases we might reasonably want to support which have devices
either plugged directly into the root bus (zero bridges), or under
multiple layers of P2P bridge (a PCI-E switch would require at least 2
layers).

So, generalize pcipath_to_sysfs to support any number of components in the
PCI path.  We also make it use the new type for PCI paths internally rather
than plain strings.

This is a loose forward port of
9804b1e55d

fixes #1040

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
3715c5775f agent/device: Rename and clarify semantics of get_pci_device_address()
get_pci_device_address() has pretty confusing semantics.  Both its input
and output are in other parts of the code described as a "PCI address", but
neither is *actually* a PCI address (in the standard DDDD:BB:DD.F format).

What it's really about is resolving a "PCI path" - that is way to locate a
PCI device by using it's slot number and the slot number of the bridge
leading to it - into a sysfs path.

Rename the function, and change a bunch of variable names to make those
semantics clearer.

Forward port of
https://github.com/kata-containers/agent/pull/855/commits/0eb612f06484

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
7e92831c7a protocols: Update PCI path names / terminology in agent protocol def
Now that we have types to represent PCI paths on both the agent and
runtime sides, we can update the protocol definitionto use clearer
terminology.

Note that this doesn't actually change the agent protocol, because it just
renames a field without changing its field ID or type.

While we're there fix a trivial rustfmt error in
src/agent/protocols/build.rs

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:08 +11:00
David Gibson
8e5fd8ee84 runtime: Introduce PciSlot and PciPath types
This is a dedicated data type for representing PCI paths, that is, PCI
devices described by the slot numbers of the bridges we need to reach
them.

There are a number of places that uses strings with that structure for
things.  The plan is to use this data type to consolidate their
handling.  These are essentially Go equivalents of the pci::Slot and
pci::Path types introduced in the Rust agent.

Forward port of
185b3ab044

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:56:05 +11:00
David Gibson
7464d055a7 agent: PCI path type
Introduce a Rust type to represent a "PCI path" - that is a way of
locating a PCI device from a given root by listing the slots of all
the bridges leading to it and finally the slot of the device itself.

It's implemented as a vector of the previously added pci::Slot type,
and includes the necessary validation and conversions to/from strings.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:54:53 +11:00
David Gibson
b22259ad9b agent: PCI slot type
Add a Rust type for representing a PCI slot on a single bus.  This is
essentially just an integer from 0..31 (inclusive), but includes the
code for converting from integers with appropriate validation and
formatting back to a string.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:54:53 +11:00
David Gibson
8c2f9e6949 gitignore: Ignore *~ editor backup files
We ignore some other formats for backup files, but add this one, used by
emacs.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-19 09:54:53 +11:00
Adams, Eric
b412e159f9 osbuilder: Port QAT Dockerfile to 2.0 repo
Update the Intel QAT Dockerfile to work with the 2.0 repos, fix some
bugs with building Debian/Ubuntu rootfs, and update the latest QAT
driver. Updated copyright.

Fixes: #1419

Signed-off-by: Adams, Eric <eric.adams@intel.com>
2021-02-18 12:46:49 -08:00
James O. D. Hunt
a4e367506d Merge pull request #1428 from cmaf/fix-shimv2-configpath
runtime: Create tracer later in shimv2
2021-02-18 16:04:36 +00:00
Amulya Meka
5096103e7e osbuiler: fixing USE_DOCKER for ppc64le
For building rootfs with docker, glibc based rust target should be installed on ppc64le.
Additionally, protobuf-compiler would be required on ppc64le as it is not present by default.

Fixes: #1417

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-02-18 05:33:51 +00:00
Chelsea Mafrica
a44b27291c runtime: Create tracer later in shimv2
Remove loading of configuration from New() because we do not know the
correct configuration file for the runtime until Create() and so that it
is not loaded more than once. Start tracer in create() so that it is
created after the runtime config is loaded in its original location.

Fixes #1411

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-02-17 19:45:48 -08:00
Fupan Li
6eba265734 Merge pull request #1407 from mxpv/json
Agent: OCI hooks return malformed json
2021-02-18 11:36:49 +08:00
Eric Ernst
0a9cc357c6 Merge pull request #1394 from egernst/custom-registry
osbuilder: Allow image registry to be customizable
2021-02-17 17:48:02 -08:00
Eric Ernst
49bdbac606 osbuilder: Allow image registry to be customizable
Give the user chance to specify their own registry in event the default
provided are not accessible, desirable.

Fixes: #1393

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-02-17 16:49:58 -08:00
GabyCT
be83b739df Merge pull request #1414 from GabyCT/topic/updatelicensing
docs: Update licensing strategy to use kata 2.0 repository
2021-02-17 14:50:58 -06:00
Eric Ernst
78a5958a81 Merge pull request #1402 from egernst/clh-hooks
runtime: clh-config: add runtime hooks to the clh toml
2021-02-17 12:49:58 -08:00
Gabriela Cervantes
fdc573d500 docs: Update licensing strategy to use kata 2.0 repository
This PR updates the licensing strategy document to use the proper
tests repository for kata 2.0

Fixes #1413

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-02-15 11:10:01 -06:00
Eric Ernst
2e2749ad3f runtime: clh-config: add runtime hooks to the clh toml
Today hooks are only described in the QEMU toml. This shouldn't be VMM
specific -- let's make sure these are advertised for Cloud Hypervisor as
well.

Fixes: #1401

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-02-14 20:26:02 -08:00
Julio Montes
e830192fca Merge pull request #1387 from devimc/2021-02-09/FixSnapCI
Fix Snap CI
2021-02-12 10:27:28 -06:00
Julio Montes
ef72926beb ci: snap: run snap CI on every pull request
Make sure a pull request doesn't break the snap packages,
run snap CI on every pull request.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-02-12 09:18:45 -06:00
Julio Montes
919d51274d snap: fix kernel setup
kernel setup fails when `yes "n"` is used and `make oldconfig` doesn't
read anything from STDIN, `yes "n"` was added in the past as a
workaround to fix incomplete kernel configs.
Enable `build-kernel.sh` debug.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-02-12 09:16:09 -06:00
Julio Montes
d054841430 ci: snap: build targets that not need sudo first
`sudo` is required to build the image, once the image has been built
the permission of some directories may change, let's build first the
targerts that not need `sudo`

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-02-12 09:05:42 -06:00
Julio Montes
a115338ddd ci: snap: define proxy variables
define proxy variables before using them to fix `unbound variable`
error

fixes #1386

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-02-12 09:05:42 -06:00
Bo Chen
14bb24e4ca Merge pull request #1406 from egernst/fix-ctr-cpuset
runtime: cpuset: when creating container, don't pass cpuset details
2021-02-11 22:31:44 -08:00
Maksym Pavlenko
df14d386a5 Agent: OCI hooks return malformed json
This PR fixes wrong serialization of OCI state object.
OCI hooks end up with a JSON string with double quotes in `state` field.

This happens because of confusion `Debug` and `Display` traits. Debug trait
returns a string representation with double quotes.

Ideally we should not use Debug as a part of serialization process, so a bit
more safer fix would be to move container states to `oci` crate and simply
disallow wrong values in that field.

`ContainerState` in go spec: https://github.com/opencontainers/runtime-spec/blob/master/specs-go/state.go#L4

Fixes: #1404

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-02-11 19:02:41 -08:00
Eric Ernst
3721351324 runtime: cpuset: when creating container, don't pass cpuset details
Today we only clear out the cpuset details when doing an update call on
existing container/pods. This works in the case of Kubernetes, but not
in the case where we are explicitly setting the cpuset details at boot
time. For example, if you are running a single container via docker ala:

docker run --cpuset-cpus 0-3 -it alpine sh

What would happen is the cpuset info would be passed in with the
container spec for create container request to the agent. At that point
in time, there'd only be the defualt number of CPUs available in the
guest (1), so you'd be left with cpusets set to 0. Next, we'd hotplug
the vCPUs, providing 0-4 CPUs in the guest, but the cpuset would never
be updated, leaving the application tied to CPU 0.

Ouch.

Until the day we support cpusets in the guest, let's make sure that we
start off clearing the cpuset fields.

Fixes: #1405

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-02-11 17:38:15 -08:00
Fabiano Fidêncio
96196e102e Merge pull request #1396 from dgibson/pointtopoint
agent: Remove bogus check from list_interfaces() unit test
2021-02-11 09:06:01 +01:00
David Gibson
c9c7c12440 agent: Remove bogus check from list_interfaces() unit test
The unit test for list_interfaces() checks that the hardware address
returned for each interface has non-zero length.  However, that need not be
the case.  Point-to-point devices, such as ppp, or tun devices in certain
configurations may not have a hardware address, which is represented as
a zero length address here.

This happens on my machine with a tun0 device created by OpenVPN.

fixes #1377

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-02-11 15:09:10 +11:00
Eric Ernst
cb6d2f3c40 osbuilder: alphabetize fields
Let's go ahead and list the usage info / fields in alphabetical order!

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-02-10 12:39:10 -08:00
Fabiano Fidêncio
e2c8c7e603 Merge pull request #1031 from knittl/feature/kata-option-aliases
cli: Add aliases for `kata-` options
2021-02-10 16:22:13 +01:00
Fabiano Fidêncio
2009ef4872 Merge pull request #1364 from fidencio/wip/only-run-kata-deploy-test-on-pull-requests
github: Only run kata-deploy-test on pull-requests
2021-02-10 13:59:30 +01:00
Daniel Knittl-Frank
056d742c17 docs: Update documentation with new prefixless config options
Remove the old config options from the documentation and replace them
with the new form (without the redundant `kata-` prefix).

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2021-02-10 07:55:18 +01:00
Daniel Knittl-Frank
fdcde7968a cli: use new prefixless config options in tools scripts
Update all tools/packaging scripts to prefer the new options over the
old ones (e.g. `--config` instead of `--kata-config`).

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2021-02-10 07:55:18 +01:00
Daniel Knittl-Frank
02ee8b0b8a cli: Add aliases for kata- options
Remove `kata-` prefix from options `kata-config` and
`kata-show-default-config-paths`.

Fixes #1011

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2021-02-10 07:55:18 +01:00
Daniel Knittl-Frank
c6bc43b697 docs: Fix broken link to fluentbit.io docs
Fix link to external website in fluentd how-to.

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2021-02-10 07:55:18 +01:00
Julio Montes
61b448c7fb Merge pull request #1385 from ManaSugi/change_virtiofsd_dir
docs: Fix the installation directory of virtiofsd
2021-02-09 10:49:33 -06:00
Manabu Sugimoto
20b27a16c9 docs: Fix the installation directory of virtiofsd
Change the installation directory of virtiofsd to kata-qemu.

Fixes: #1379

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-02-09 22:47:27 +09:00
Fabiano Fidêncio
f4fe97411e Merge pull request #1267 from Jakob-Naucke/s390x-fix-docker-rootfs-build
osbuilder: Fix USE_DOCKER on s390x
2021-02-09 14:13:35 +01:00
Fabiano Fidêncio
8c1e0d3002 kernel: Enable OVERLAY_FS_{METACOPY,XINO_AUTO}
* CONFIG_OVERLAY_FS_METACOPY is needed to have reasonable performance
  for chmod and similar calls;
* CONFIG_OVERLAY_FS_XINO_AUTO is recommended for POSIX compliance.

Fixes: #1075

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-09 13:01:01 +01:00
Jakob-Naucke
11fe6a3552 osbuilder: Fix USE_DOCKER on s390x
- Install the required protobuf-compiler on Ubuntu
- Install correct libc Rust target (glibc on s390x)
- Do not skip Rust installation on s390x

Fixes: #1266

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-02-09 12:37:24 +01:00
bin
10f1c30f3d kata-runtime: use filepath.Join() to compose file path
Use filepath.Join() will be safer than format string directly.

Signed-off-by: bin <bin@hyper.sh>
2021-02-09 19:37:20 +08:00
Bin Liu
f4ae9c8476 docs: Update Developer-Guide.md
Add description for difference of namespace in containerd and Kuberenetes.

Co-authored-by: Eric Ernst <eric.g.ernst@gmail.com>
Signed-off-by: bin <bin@hyper.sh>
2021-02-09 19:37:14 +08:00
bin
9963428a4d docs: update document for using debug console
Delete using `kata-monitor` to use `kata-runtime exec`

Fixes: #1329

Signed-off-by: bin <bin@hyper.sh>
2021-02-09 19:37:06 +08:00
bin
44cde6e464 runtime: connect guest debug console bypass kata-monitor
Parse agent socket address by conversation to improve usability of
using guest debug console.

Fixes: #1329

Signed-off-by: bin <bin@hyper.sh>
2021-02-09 19:36:48 +08:00
Fabiano Fidêncio
d4c506f270 Merge pull request #1184 from zanetworker/add-e2e-arch-image
Add katacontainers end-to-end arch image
2021-02-09 12:26:23 +01:00
Fabiano Fidêncio
6dbc648c54 Merge pull request #1263 from Jakob-Naucke/s390x-glibc-agent
Build for glibc on s390x
2021-02-09 12:26:02 +01:00
Fabiano Fidêncio
9e2ac11086 Merge pull request #1265 from Jakob-Naucke/s390x-fix-vmlinux
packaging: Fix vmlinux kernel install on s390x
2021-02-09 11:24:04 +01:00
Fabiano Fidêncio
d6682e3168 Merge pull request #1261 from Jakob-Naucke/update-yq
ci: Upgrade to yq 3.4.1
2021-02-09 10:21:14 +01:00
Fabiano Fidêncio
548c459066 Merge pull request #1375 from fidencio/wip/fix-build-kernel
kernel: Don't fail if "experimental" dir doesn't exist
2021-02-09 10:05:54 +01:00
Bin Liu
4e6a39cd25 Merge pull request #1366 from fidencio/wip/kata-deploy-remove-mention-to-docker
kata-deploy: Remove kata-deploy-docker.sh
2021-02-09 16:11:44 +08:00
Fupan Li
5d1432210c Merge pull request #1352 from liubin/fix/migrate-opentracing-to-opentelemetry
runtime: migrate from opentracing to opentelemetry
2021-02-09 10:18:10 +08:00
bin
3406502706 runtime: add jaeger configuration items
add configuration items in Kata Containers
configuration file to let users specify jaeger
collector address, and user/password.

Signed-off-by: bin <bin@hyper.sh>
2021-02-09 08:02:05 +08:00
Fabiano Fidêncio
fbab262f2d kernel: Don't fail if "experimental" dir doesn't exist
This directory has been automatically removed as there's no files inside
it, as part of d3c9862059

Let's improve the logic in the scripts to avoid failing in case the
"experimental" dir is not present.

Fixes: #1328

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-08 21:52:53 +01:00
Fupan Li
f3e9d4e7e3 Merge pull request #1373 from ManaSugi/use-rlimit-crate
rustjail: use rlimit crate
2021-02-08 23:15:37 +08:00
Manabu Sugimoto
e1dce3a369 rustjail: use rlimit crate
The current implementation of rustjail uses the specific setrlimit.
This patch uses rlimit crate for maintainability.

Fixes: #1372

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-02-08 18:43:56 +09:00
Jakob Naucke
8045104eaf ci: Upgrade to yq 3.4.1
Since the resolution of https://github.com/mikefarah/yq/issues/502,
the `yq` binary is no longer broken on s390x. This is an upgrade to
the latest v3 version of yq (v4 has new syntax).

Fixes: #1260

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-02-08 09:29:57 +01:00
Jakob-Naucke
3d3e4dc147 packaging: Fix vmlinux kernel install on s390x
Installing the built uncompressed vmlinux kernel will not work on s390x,
QEMU will complain:
Linux kernel boot failure: An attempt to boot a vmlinux ELF image
failed.
This image does not contain all parts necessary for starting up. Use
bzImage or arch/s390/boot/compressed/vmlinux instead.
Hence, use that kernel image on s390x.

Fixes: #1264

Signed-off-by: Jakob-Naucke <jakob.naucke@ibm.com>
2021-02-08 09:25:26 +01:00
Fupan Li
d54c702539 Merge pull request #1368 from ManaSugi/get_caps_dyn
rustjail: get all capabilities dynamically
2021-02-08 16:00:51 +08:00
Manabu Sugimoto
a252d861e3 rustjail: get all capabilities dynamically
The runtime determines the kernel capability set at runtime.

Fixes: #1370

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-02-07 16:39:14 +09:00
Fabiano Fidêncio
d4391d784d Merge pull request #1334 from clnperez/ppc64le-protoc
agent: README update to install protoc for ppc64le
2021-02-06 01:08:42 +01:00
Fabiano Fidêncio
7b5e56b274 Merge pull request #1360 from fidencio/wip/qemu-virtiofs-security-fixes
qemu: Add security fixes for CVE-2020-35517
2021-02-05 21:37:46 +01:00
Chelsea Mafrica
a12772c601 Merge pull request #1358 from Tim-Zhang/remove-allow
Fix lints and remove allow attributes which silence these warnings
2021-02-05 12:17:29 -08:00
Fabiano Fidêncio
62cbaf4de4 kata-deploy: Remove kata-deploy-docker.sh
Kata Containers 2.x is not supported outside of the kubernetes world.
With this in mind, let's remove leftovers from the 1.x deployments &
documentation.

Fixes: #1356

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-05 15:18:43 +01:00
Fabiano Fidêncio
50fea9fa6f github: Only run kata-deploy-test on pull-requests
We're currently running kata-deploy-test for every issue opened, for
every comment in the issue.  Issues, themselves, shouldn't be triggering
those as they can't cause any code change.

With this in mind, let's restrict ourselves to run those on
pull-requests only.

Fixes: #1341

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-05 11:00:32 +01:00
Fabiano Fidêncio
b548114f59 qemu: Add security fixes for CVE-2020-35517
This series is based on
https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01787.html, and
was kindly brought up by David Gilbert.

Fixes: #1361

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-02-04 22:26:20 +01:00
Christy Norman
11680efe4e agent: README update to install protoc for ppc64le
Add a bit to the agent README about installing protoc manually for Power (ppc64le)

Fixes: #1068

Signed-off-by: Christy Norman <christy@linux.vnet.ibm.com>
2021-02-04 17:03:31 +00:00
Tim Zhang
f16ab49b5b agent: fix non_camel_case_types lint and stop hiding the warning
Fixes: #1359

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-04 21:36:21 +08:00
Tim Zhang
8ffe4d6748 agent: fix unused_parens lint and stop hiding the warning
Fixes: #1359

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-04 21:24:04 +08:00
Tim Zhang
f70ca69d0d agent: remove #![allow(unused_unsafe)]
Fixes: #1359

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-04 21:21:48 +08:00
Tim Zhang
e28bf7a59d agent: fix dead_code lint
Fixes: #1359

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-04 21:18:45 +08:00
Tim Zhang
05da23acb7 agent: fix non_snake_case lint and remove ![allow(non_snake_case)]
Fixes: #1359

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-04 21:18:37 +08:00
Jakob Naucke
afb4197813 osbuilder: Build for glibc on s390x
Since there is no Rust target for musl on s390x, builds on s390x should use
glibc. This commit removes the "Cannot build Rust agent on s390x" restriction
in rootfs.sh and only installs musl in the build environment when it is
required.

Fixes: #1262

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-02-04 09:52:28 +01:00
Jakob Naucke
a1cedc567a agent: Build for glibc on s390x
Since there is no Rust target for musl on s390x, builds on s390x should
use glibc. This commit makes glibc the default on s390x as per the agent
Makefile.

Fixes: #1262

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-02-04 09:51:23 +01:00
Adel Zaalouk
9f237aab54 docs: add katacontainers end-to-end arch image
Added a Katacontainers e2e flow figure, the figure can be used in many places or simply just as a reference.
It contains pieces from Kata 1.x and Kata 2.0 (e.g., kata-monitor).

The figures are produced via excalidraw.com, also in the commit is the
source .excalidraw which can be used to modify the figure if needed.

Fixes #1185

Signed-off-by: Adel Zaalouk <azaalouk@redhat.com>
2021-02-04 08:51:36 +01:00
Chelsea Mafrica
38b5a43267 Merge pull request #1318 from jongwu/acpi
arm64: enable acpi for qemu/virt.
2021-02-03 16:37:49 -08:00
Chelsea Mafrica
d4bc5952b1 Merge pull request #1340 from jing-wang4/rootfs-ppc64le
osbuilder: Enforcing LIBC=gnu to rootfs build for ppc64le
2021-02-03 16:06:05 -08:00
Eric Ernst
a1361608a9 Merge pull request #1353 from Tim-Zhang/fix-async
Fix async problems
2021-02-03 14:49:52 -08:00
Chelsea Mafrica
dbfcd4a679 Merge pull request #1345 from liubin/fix/1343-kata-monitor-build-failed
kata-monitor: set buildmode to exe to avoid build failing
2021-02-03 14:40:12 -08:00
Tim Zhang
254b98dd2f rustjail: fix unit test test_process
test_process has a assertion that waitpid(-1) will fail
because there is no child process in most cases.

But if there is any child process forked by other unit test,
the test test_process will fail.

Because waitpid(-1) will wait for any child process including the
process created by other unit tests.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-03 22:27:50 +08:00
Tim Zhang
b25575b430 agent: remove crate signal-hook which are no longer used
Had replaced by tokio::signal.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-03 18:30:18 +08:00
Tim Zhang
b1880b3e80 rustjail: remove unnecessary #[async_trait]
Remove unnecessary #[async_trait]

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-03 18:30:15 +08:00
Tim Zhang
83e9414f4f rustjail: add unittest test_execute_hook
use xargs to test execute_hook.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-03 18:30:15 +08:00
Tim Zhang
d2041001ed rustjail: close stdin in execute_hook after it was sent
So that hook program could receive EOF.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-03 18:30:15 +08:00
Tim Zhang
bb08131151 rustjail: fix fork/child in execute_hook
Tokio in fork child does not work well as it easily deadlocks.
https://github.com/tokio-rs/tokio/issues/1541

Fixes: #1348

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-02-03 18:30:11 +08:00
bin
17df9b119d runtime: migrate from opentracing to opentelemetry
This commit includes two changes:
- migrate from opentracing to opentelemetry
- add jaeger configuration items

Fixes: #1351

Signed-off-by: bin <bin@hyper.sh>
2021-02-03 17:30:49 +08:00
Jing Wang
71aeb920aa osbuilder: updates for feedback
Updates for review feedback.

Signed-off-by: Jing Wang <jing.wang4@ibm.com>
2021-02-02 14:41:23 +00:00
Tim Zhang
9a02c81eb9 Merge pull request #1313 from liubin/doc/add-distro-desc
osbuilder: add description for how to use DISTRO variable
2021-02-02 15:19:57 +08:00
bin
8e2b19ac77 osbuilder: add description for how to use DISTRO variable
Fixes: #1312

Signed-off-by: bin <bin@hyper.sh>
2021-02-02 14:52:09 +08:00
bin
b6c2a60509 kata-monitor: set buildmode to exe to avoid build failing
CGO_ENABLED=0 and -buildmode=pie are not compatible and may lead build failing in some OS.
Specify buildmode=exe to overwrite the value set in BUILDFLAGS

Fixes: #1343

Signed-off-by: bin <bin@hyper.sh>
2021-02-02 14:47:21 +08:00
Chelsea Mafrica
6be910bdc1 Merge pull request #1134 from egernst/kata-monitor-cleanup
kata-monitor: allow for building for alpine
2021-02-01 16:19:36 -08:00
James O. D. Hunt
de9487744f Merge pull request #1253 from snir911/fix-poststop
shimv2: log a warning and continue on post-stop hook failure
2021-02-01 14:44:39 +00:00
Jing Wang
9f7a7a4f86 osbuilder: Enforcing LIBC=gnu to rootfs build for ppc64le
To enforce LIBC=gnu to rootfs.sh for ppc64le, instead of error and exit if not set.

Fixes: #1339
Signed-off-by: Jing Wang <jing.wang4@ibm.com>
2021-01-29 20:22:31 +00:00
Fabiano Fidêncio
7e996daf12 Merge pull request #1338 from jing-wang4/kernel-build-ppc64le
kernel: Updates to kernel config for ppc64le
2021-01-29 20:49:34 +01:00
Jing Wang
a88b8969b6 kernel: Updates to kernel config for ppc64le
Need to enable virtiofs and ipv6 to kernel config for ppc64le.

Fixes: #1333
Signed-off-by: Jing Wang <jing.wang4@ibm.com>
2021-01-29 18:13:56 +00:00
Jianyong Wu
b7a1f752c0 arm64: enable acpi for qemu/virt.
acpi is enabled for kata 1.x, port and rebase code for 2.x
including:
runtime: enable pflash;
agent: add acpi support for pci bus path;
packaging: enable CONFIG_RTC_DRV_EFI;

Fixes: #1317
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-01-29 22:12:43 +08:00
Fupan Li
5e39980858 Merge pull request #1216 from houstar/2.0-dev
agent: add secure_join to prevent softlink escape
2021-01-28 10:41:02 +08:00
Tim Zhang
07118afe93 Merge pull request #1320 from lifupan/main
rustjail: fix the issue of container's cgroup root path
2021-01-27 19:04:53 +08:00
fupan.lfp
448771f53d rustjail: fix the issue of container's cgroup root path
We should create the container's cgroup under the system's
cgroup default path such as "/sys/fs/cgroup/<sub system>",
instead of under the kata-agnet's process's cgroup path,
which would under the systemd's cgroup such as
"/sys/fs/cgroup/systemd/system.slice/kata-agent.service"

Fixes: #1319

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-01-27 15:38:45 +08:00
Bo Chen
03c5ecefef Merge pull request #1306 from dgibson/no-cmake
osbuilder: remove traces of cmake
2021-01-26 16:26:02 -08:00
Carlos Venegas
3e3bfb9a42 Merge pull request #1321 from likebreath/clh_v0.12.0
versions: Update cloud-hypervisor to release v0.12.0
2021-01-26 17:07:02 -06:00
Carlos Venegas
5e0e35073d Merge pull request #1302 from jcvenegas/2020-1-19/non-experimental-virtiofs-2.x
clh: Use vanilla kernel.
2021-01-26 16:14:06 -06:00
Fabiano Fidêncio
7083261402 Merge pull request #1322 from wainersm/osbuilder_misc-1
osbuilder: miscelaneous fixes/improvements
2021-01-26 22:28:51 +01:00
Wainer dos Santos Moschetta
fd39f0fa00 osbuilder: Add "Agent init" on terms glossary
Include on the terms glossary some words about the agent init as
later on README its explain how the agent can be switched between
systemd and init.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-01-26 10:51:27 -05:00
Qingyuan Hou
e111093b83 agent: add secure_join to prevent softlink escape
This patch fixed the security issue if the container images has
unsafe symlink to the container rootfs and hackers can be exploit
this symlink to hack the guest system. e.g. make directory or files
on guest.

CVE-2015-3629

Fixes: #1219

Signed-off-by: Qingyuan Hou <qingyuan.hou@linux.alibaba.com>
2021-01-26 23:51:23 +08:00
Wainer dos Santos Moschetta
1273e485d8 osbuilder: Fix urls to repositories
Changed the user-visible urls to point to the right Kata Containers
files/repositories.

Fixes #234

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-01-26 07:51:20 -05:00
Wainer dos Santos Moschetta
ba9fa49a53 osbuilder: Use Fedora and CentOS registries
To avoid hitting quota limit on docker.io, this changes the Fedora
and CentOS dockerfiles to pull the image from the projects registries.

Fixes #1324

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-01-26 07:51:07 -05:00
Bo Chen
c2d14cdeea versions: Update cloud-hypervisor to release v0.12.0
Highlights for cloud-hypervisor version v0.12.0 include: removal of
`vhost-user-net` and `vhost-user-block` self spawning, migration of
`vhost-user-fs` backend, ARM64 enhancements with full support of
`--watchdog` for rebooting, and enhanced `info` HTTP API to include the
details of devices used by the VM including VFIO devices.

Fixes: #1315

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-01-25 10:58:19 -08:00
Snir Sheriber
0e57393fcc shimv2: log a warning and continue on post-start hook failure
According to runtime-spec:
The poststart hooks MUST be invoked by the runtime. If any poststart
hook fails, the runtime MUST log a warning, but the remaining hooks
and lifecycle continue as if the hook had succeeded

Fixes: #1252

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-01-25 16:29:55 +02:00
Snir Sheriber
e7043fe284 shimv2: log a warning and continue on post-stop hook failure
According to runtime-spec:
The poststop hooks MUST be invoked by the runtime. If any
poststop hook fails, the runtime MUST log a warning, but
the remaining hooks and lifecycle continue as if the hook
had succeeded.

Fixes: #1252

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-01-25 16:29:47 +02:00
David Gibson
3718df69c2 osbuilder: Remove leftover pieces related to cmake
The osbuilder scripts include a bunch of code for installing cmake, even
though cmake is never invoked.  versions.yaml claims it's needed to build
grpc-rs, but that doesn't appear to be in our dependency graph.

Presumably, we used to need this, but don't any more.  So, remove all cmake
references.

Fixes #1309

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-01-23 15:24:25 +11:00
Carlos Venegas
d1bf8293e1 kernel: ACPI: Always build evged for stable kernel
Path required to enable CPU/memory hotplug in cloud-hypervisor.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-01-22 20:00:47 +00:00
Carlos Venegas
6f3d591763 clh: Use vanilla kernel.
Qemu config alredy use vanilla kernel build for virtiofs.

Lets make cosisntent the usage of kernel.

Depends-on: github.com/kata-containers/tests#3172

Fixes: #1302

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-01-22 20:00:20 +00:00
Fabiano Fidêncio
4de21e3d95 Merge pull request #1311 from bergwolf/update-to-main
branch: change 2.0-dev to main
2021-01-22 12:07:02 +01:00
Peng Tao
fd5592d4d5 branch: change 2.0-dev to main
Change all mentioning of 2.0-dev to main so that we can rename the
default branch to main.

Fixes: #1310
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-01-22 15:49:35 +08:00
Fabiano Fidêncio
5aef2faa4d Merge pull request #1304 from devimc/2021-01-20/snap/dontReleaseAlphaRC
snap: Don't release Kata Alpha/RC in snap store
2021-01-22 07:55:35 +01:00
Fupan Li
06d1dd2220 Merge pull request #1297 from mxpv/netlink
Migrate to rtnetlink
2021-01-22 09:39:17 +08:00
Julio Montes
2b880d2808 snap: Don't release Kata Alpha/RC in snap store
Stable and candidate snapcraft's channels are used to release Kata
Containers 1.x and 2.x respectively. Alpha and RC releases shouldn't
be pushed to the snap store because there are not enough channels
for all the releases of Kata Containers 1.x and 2.x.

fixes #1303

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-01-21 10:34:05 -06:00
Bin Liu
4bb23ed990 Merge pull request #1301 from Tim-Zhang/add-underscore-for-const
agent: Add underscore for constants
2021-01-20 21:43:21 +08:00
Tim Zhang
14a63cce22 agent: Add underscore for constants
To make them follow the rust convention and easier to read.

Fixes: #1235

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-20 17:12:29 +08:00
Tim Zhang
c3a4180ae3 Merge pull request #1296 from GabyCT/topic/update20ubuntu
github: Update ubuntu version to 20.04
2021-01-20 14:57:30 +08:00
Maksym Pavlenko
fa93831f66 agent: Address linter and tests
- Fix clippy complains
- Use #[tokio::test] for async tests
- Improve IPv6 check

Fixes: #1294

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-01-19 20:23:11 -08:00
Julio Montes
3b6dd7054a Merge pull request #1290 from devimc/2021-01-18/agent/nvdimmDriver
agent: implement NVDIMM/PMEM block driver
2021-01-19 13:43:19 -06:00
Maksym Pavlenko
96762ab7ab agent: Remove old netlink crate
Cleans up unused code.

Fixes: #1294

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-01-19 09:58:50 -08:00
Gabriela Cervantes
0ea8243a97 github: Update ubuntu version to 20.04
This PR updates the ubuntu version from 18.04 to 20.04 that will be
used for the github actions.

Fixes #1295

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-01-19 11:48:55 -06:00
Maksym Pavlenko
33367be4c7 agent: Integrate netlink
This patch integrates new netlink module routines with the agent (mainly
replaces calls to old netlink module with the new one).

Fixes: #1294

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-01-19 09:48:51 -08:00
Maksym Pavlenko
23f3aefa1d agent: Implement new netlink module
This PR adds new netlink module (based on `rtnetlink` crate), so we don’t have to
write a low level code to interact with netlink sockets, but use a high level API.

As a side effect, `rtnetlink` crate got full IPv6 support, so it fixes #1171

Fixes: #1294

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-01-19 09:44:50 -08:00
Julio Montes
12551de8a2 agent: implement NVDIMM/PMEM block driver
Support pmem-csi[1] k8s pluging, unlike SCSI and virtio devices,
NVDIMM/PMEM devices support DAX, improving IO Read and Write
operations.

fixes #1289

Signed-off-by: Julio Montes <julio.montes@intel.com>

[1]: https://github.com/intel/pmem-csi
2021-01-19 09:28:41 -06:00
Tim Zhang
f09128d8c7 Merge pull request #1292 from lifupan/2.0-dev
rustjail: fix the issue of missing destroy contaienr cgroups
2021-01-19 22:33:27 +08:00
fupan.lfp
6abb1be724 rustjail: fix the issue of missing destroy contaienr cgroups
In the container's destroy method, it should destroy
the container's cgroups.

Fixes: #1291

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-01-19 16:00:41 +08:00
Bin Liu
2a990a4507 Merge pull request #1210 from mxpv/async
agent: switch to async runtime
2021-01-19 13:52:23 +08:00
Tim Zhang
fe67f57c46 agent: set edition = "2018" in .rustfmt.toml to fix rustfmt about async fn
Got:
find . -type f -name "*.rs"  | egrep -v "target/|grpc-rs/|protocols/" | xargs rustfmt --check

error[E0670]: `async fn` is not permitted in the 2015 edition

This commit fixes this issue.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-18 15:38:19 +08:00
Tim Zhang
df68771e77 agent-ctl: Update ttrpc to 0.4.14 for agent-ctl
The ttrpc in agent-ctl should be compatible with protocols crate.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-18 15:38:19 +08:00
Maksym Pavlenko
37e285bf7b agent: Make debug console async
Fixes: #1209

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-01-18 15:38:19 +08:00
Tim Zhang
f3bd439465 agent: fix tests for async functions
Use tokio::test to test async functions.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-18 15:38:19 +08:00
Tim Zhang
9f79ddb9df agent: use tokio Notify instead of epoll to fix #1160
Fixes: #1160

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-18 15:38:19 +08:00
Tim Zhang
332fa4c65f agent: switch to async runtime
Fixes: #1209

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-18 15:38:15 +08:00
Eric Ernst
5d2c5ab534 Merge pull request #1280 from egernst/final-fix-probably-2.0-dev
Final fix probably 2.0 dev
2021-01-15 12:45:43 -08:00
Eric Ernst
35ea7ee600 actions: further updates to fix release workflow
There were still issues. Tested in fork, verified environment variable
passing works as before now.

Fixes: #1273

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-15 11:06:17 -08:00
Eric Ernst
fc6ba8f067 Merge pull request #1279 from egernst/fixup-release-2.0-dev
actions: fixup release/main workflow
2021-01-15 08:38:09 -08:00
Eric Ernst
ded8e03f33 actions: fixup release/main workflow
Still need pkg-sha

Fixes: #1273

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-15 07:45:13 -08:00
Peng Tao
2ff74f53bc Merge pull request #1274 from egernst/fix-release-scripts
Fix release scripts
2021-01-15 14:23:52 +08:00
Eric Ernst
7557a1b60d packaging: should tag/update tests repo when releasing
We should still bump/version the tests repository, just as we do for
1.x.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-14 21:01:02 -08:00
Eric Ernst
437b35b7d9 actions: w/a deprecated set-env
Fixes: #1273

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-14 20:59:51 -08:00
Chelsea Mafrica
b24a2d2e48 Merge pull request #904 from cmaf/tracing-shimv2
shimv2: Add tracing to shimv2
2021-01-14 16:38:28 -08:00
Eric Ernst
7c08ddf5a6 Merge pull request #1269 from egernst/2.1-alpha0-branch-bump
# Kata Containers 2.1-alpha0
2021-01-14 14:28:56 -08:00
Carlos Venegas
9c71092e76 Merge pull request #1268 from devimc/2021-01-14/fixSnap
Fix snap CI
2021-01-14 14:01:30 -06:00
Eric Ernst
383e8e673d release: Kata Containers 2.1-alpha0
- snap: Fix yq error in build
- storage: cleanup and support read only block dev hotplug
- rootfs: Don't fallthrough in the docker_extra_args() switch
- github: Add github actions
- shimv2: Avoid double removing of container from sandbox
- Agent: return error on trying to persist a pid namespace and minor improvements
- rustjail: allow network sysctls
- rustjail: fix the issue of sync read
- rustjail: fix the issue of bind mount /dev
- qemu: no state to save if QEMU isn't running
- packaging/qemu: Build and package completely inside the container
- agent: upgrade cgroups to 0.2.0
- agent: Simplify .or_else() to .or()
- Fix error reporting in listInterfaces() and listRoutes()
- improve rustjail validator
- Add void "install" targets for both "trace-forwarder" and "agent-ctl"
- [forwardport] Add support for Gentoo
- oci: fix a typo in "addtionalGids"
- Don't update cpusets if no CPUs changed closes #1172
- rootfs: reduce size of debian image
- runtime: Allow to overwrite DESTDIR
- snap: fix snap release channel
- Don't leak fd when reseeding rng
- Fixes for make generate-protocols
- docs: Fix docs in docs/architecture.md
- docs: Update the Cloud Hypervisor description in virtualization.md
- agent: exit from exec hangs if background process is present
- [forwardport] install: Improve snap documentation
- handle vcpus properly utilized in the guest
- docs: fix the custom agent binary file path for creating initrd image
- shimv2: handle ctx passed by containerd
- runtime: clh: Enforce to call 'cleanupVM' for 'stopSandbox'
- agent: Adjust OOM Score to avoid agent being killed.
- [forward port]  cli: make check subcommand more tolerant to failures
- docs: add link to VMT on top level README
- rustjail: fork a new child process to change the pid ns
- rustjail: remove the network ns validation against container
- snap: update apps section
- runtime: don't wait the second shim process in shim start
- agent: create pci root Bus Path for arm64
- agent: enable lto flag for Cargo to get better optimized code
- virtcontainers: revert CleanupContainer from PR 1079
- docs: Create hypervisor summary document
- Add hyperlink and fix typo
- versions: Use CRI-O v1.18.4-4-g6dee3891e
- runtime: change configuration key name from EnablePprof to enable_pprof
- runtime: delete sandboxlist.go and sandboxlist_test.go
- versions: Use release-1.18 (commit ee9128444bec10)
- runtime: clh: disable virtiofs DAX when FS cache size is 0
- release: Fix release candidate to major version upgrade check
- runtime: sleep 1 second after GetOOMEvent failed
- Agent: README updates for build on ppc64le
- runtime: clean/refactor code
- Forward port annotation doc
- versions: Update cloud-hypervisor to release v0.11.0
- docs: Add instructions for enabling VM templating
- Revert "version: revert back to crio 1.8.3"
- Dump guest memory when kernel panic for QEMU
- clh: Consolidate the code path for device unplug
- agent: Log ttrpc messages
- annotations: Improve asset annotation handling
- runtime: readonly volume should be bind mounted readonly on the host
- docs: Fix incorrect docs in config file
- CI: Fix incorrect URL
- docs: Update top-level README
- versions: Update crio version
- runtime: cloud-hypervisor: reduce memory footprint
- agent: Improve unit test coverage for src/sandbox.rs
- rustjail: fix the issue of create thread failed causing current thread panic
- Improve unit test coverage for rustjail/container.rs
- agent: Update build instructions
- cli: Provide aliases for kata-* subcommands and options
- runtime: Restore QEMUVIRTIOFSPATH variable in Makefile
- Use apply_patches.sh in qemu and kernel scripts
- clean up agent proto files
- agent: fixes the permissions of PID 1's STDIO
- Feature/1004 add version for kata monitor
- agent: Generate proto files programmatically
- runtime: Fix firecracker config
- docs: remove the 1.x version description about shim and proxy
- arm64: correct bridge type for QEMUVIRT
- snap: add GH actions jobs to release the snap package
- agent: clear clippy warnings
- agent: simplify ttrpc error construction
- Replace @RUNTIME_NAME@ with the target in generated files
- 2.0 update doc for hypervisor related information
- virtcontainers: Append max_ports to virtio-serial device
- snap: install libseccomp-dev
- runtime: set virtio-fs as default fs sharing method
- VirtioFS: backports & default settings to improve performance
- tools: Make agent-ctl support more APIs
- Validate runtime annotations
- kernel: update to 5.4.71
- config: make virtio-fs part of standard kernel
- agent: Optimize error handling
- versions: Update Kubernetes, containerd, cri-o and cri-tools
- agent: fix crashers if API requests empty
- rustjail: add length check for uid_mappings in rootless euid mapping
- kata-monitor: use regexp to check if runtime is kata containers
- docs: update the build kata containers kernel document
- cgroup and cpuset fixes from 1.x
- docs: Update upgrading guide
- agent: fix panic on malformed device resource in container update
- Forward port device conflict fixes from Kata 1 / Go agent
- docs: Add containerd install guide
- agent: simplify codes
- agent: fix errorneous parsing for guest block size
- agent: use macro to simplify parse_cmdline function in config.rs
- fix arm CI
- packaging: fix missing cloud_hypervisor_repo
- docs: Add crictl example json files
- ci: snap: add event filtering
- agent: do not follow link when mounting container proc and sysfs
- agent-ctl: include cargo lock updates
- agent: set init process non-dumpable
- runtime: Clear the VCMock 1.x API Methods from 2.0
- virtiofs: Disable DAX
- docs: Update docs for enabling agent debug console
- Remove compilation warnings
- osbuilder: Create target directory for agent
- versions: add plugins section
- snap: specify python version
- packaging: fix image build script
- Main packaging fixups
- clh: Support VFIO device unplug
- ci: add github action to test the snap
- docs: update networking description
- docs: update dev guide for agent build
- rust-agent: Update README
- docs: update architecture.md
- runtime: add support for SGX
- version: upgrade qemu version to v5.1.0 for arm64
- agent: Fix OCI Windows network shared container name typo
- github: Remove issue template and use central one
- docs: fix broken links
- Packaging: release notes script using error kernel path urls
- rust-agent: Replaces improper use of match for non-constant patterns
- devices: fix go test warning in manager_test.go
- action: Allow long lines if non-alphabetic
- Indicates never return function and remove unreachable code
- agent: propagate the internal detail errors to users
- Update Installation Guide to better reflect the current state of the project
- ci: fix clone_tests_repo function
- agent: Set LIBC=gnu for ppc64le arch by default
- fc: integrate Firecracker's metrics
- Fix to qemu experimental and improvements
- ci: resurrect travis static checkers
- agent: fix UT failures due to chdir
- agent: Only allow proc mount if it is procfs
- kata 2.0: add debug console service
- runtime: Call s.newStore.Destroy if globalSandboxList.addSandbox
- shimv2: add a comment in checkAndMount()
- osbuilder: specify default toolchain verion in rust-init
- runtime: Update CLH client pkg to version v0.10.0
- agent/oci: Don't use deprecated Error::description() method
- runtime: Fix linter errors in release files
- packaging: Build from source if the clh release binary is missing
- runtime: add podman configuration to data collection script
- ci: use Travis cache to reduce build time
- agent: update cgroups crate
- docs: Update the reference path of kata-deploy in the packaging
- runtime: make kata-check check for newer release
- how-to: add privileged_without_host_devices to containerd guide
- agent: Unit tests for rustjail/mount.rs
- docs: Fix the kata-pkgsync tool's docs script path
- Fix developer guide
- fix guest panic when running agent as init
- packaging: update version file url for kata 2.0 in Makefile
- Fix release notes

789fd7c1 blk-dev: hotplug readonly if applicable
12777b26 volumes: cleanup / minor refactoring
fbc1d123 vendor: revendor govmm
6cc1920c snap: Fix yq error in build
b329a74f rootfs: Fix indentation inside a switch
8879f9a0 rootfs: apparmor=unconfined is needed for non Red Hat host OSes
bbeebcdb rootfs: Always add SYS_ADMIN, CHROOT, and MKNOD caps to docker cmdline
90ec2fa8 rootfs: Don't fallthrough in the docker_extra_args() switch
ebd9fcc2 actions: Run static checks before make agent
0d3736d5 rustjail: fix the issue of sync read
0dc02f6d rustjail: fix the issue of bind mount /dev
894fa42a rustjail: allow network sysctls
d4cd2554 agent: Avoid container stats panic caused by cgroup controller non-exist
157e055f agent: upgrade crate cgroups to 0.2.0
e3ec1d50 agent: Simplify .or_else() to .or()
14e7042c agent: Clean up commented use declarations
5fe5b321 agent: Fix temp prefix on Namespace::test_setup_persistent_ns
3a891d4e agent: Return error on trying to persist a pid namespace
5c464018 shimv2: Avoid double removing of container from sandbox
b366af93 jail: add more test cases for validator
d38a5d3f jail/validator: introduce helpers to reduce duplicated code
76ad3213 jail/validator: avoid unwrap() for safety
51fd624f rustjail: add more context info for errors
9321e1b2 oci: fix two incompatible issues with OCI spec
406a91ff agent: consume ttrpc crate from crates.io
9a7bcccc qemu: no state to save if QEMU isn't running
6181570c oci: fix a typo in "addtionalGids"
a5372e00 github: Add github actions
4af5beda agent/sandbox:  Don't update cpuset when ncpus = 0
e004616b runtime/network: Fix error reporting in listRoutes()
1ae8e81a runtime/network: Correct error reporting in listInterfaces()
a19263e5 agent/protocols: Remove unneeded import from oci.proto
a19cf28c agent/protocols: Remove some unnecessary include directives from protoc
2b452090 agent/protocols: Remove some unneeded dependencies for protocol generation
b36c9ea3 docs: Fix docs in docs/architecture.md
3db1c805 agent: Don't leak fd when reseeding rng
8ac93f65 rootfs-builder: add support for gentoo
9897238f rootfs: reduce size of debian image
d47122e9 docs: Update the Cloud Hypervisor description in virtualization.md
10e9bfc6 runtime: Allow to overwrite DESTDIR
f740032c packaging/qemu: Delete the temporary container
e5c710e8 packaging/qemu: Build and package completely in the container
4c3377de packaging/qemu: Add QEMU_DESTDIR argument to dockerfiles
faed2369 rootfs-builder: add functions to run before and after the container
8e5603e6 snap: fix snap release channel
8f538935 install: Improve snap documentation
1ca415d8 agent: exit from exec hangs if background process is present
a00f7c34 docs: fix the custom agent binary file path for creating initrd image
0155fe12 shimv2: handle ctx passed by containerd
a793b8d9 agent: update cpuset of container path
705182d0 agent: ignore updating cpuset error when update cgroups
647331ac runtime: clh: Enforce to call 'cleanupVM' for 'stopSandbox'
e684a541 docs: add link to VMT on top level README
68f66c51 agent-ctl: Add void "install" target
5e407758 trace-forwarder: Add void "install" target
70f198d7 cli: check modules and permissions before loading a module
cb684cf8 cli: don't fail if rate limit is exceeded
9216f2ad rustjail: fork a new child process to change the pid ns
3b08376c rustjail: remove the network ns validation against container
c388ec5b runtime: don't wait the second shim process in shim start
d6acc4c0 agent: enable lto flag for Cargo to get better optimized code
13a8e4e3 snap: update apps section
fdbf7d32 virtcontainers: revert CleanupContainer from PR 1079
91a390f0 docs: Create hypervisor summary document
3eeb25a1 docs: Tidied up virtualisation summary table
8ec3cf08 docs: Adding hyperlink to virtio-net in kata documentation 2.0
b5b67db8 docs: Fixing typo in virtualization.md file
4d46d0f0 versions: Use CRI-O v1.18.4-4-g6dee3891e
53b5d063 agent: Adjust OOM Score to avoid agent being killed.
14a21c3a runtime: change configuration key name from EnablePprof to enable_pprof
4e3a8c01 runtime: remove global sandbox variable
29020394 runtime: delete sandboxlist.go and sandboxlist_test.go
9b88a96b versions: Use release-1.18 (commit ee9128444bec10)
36f65ce1 runtime: clh: update cloud-hypervisor
e1396f04 runtime: clh: disable virtiofs DAX when FS cache size is 0
8f38265b release: Fix release candidate to major version upgrade check
2e0bf40a tests: Ensure semver build metadata is ignored
4024a827 release: Make error format string consistent
cb0e6094 runtime: sleep 1 second after GetOOMEvent failed
4c78814b docs: Fix pre-existing spelling mistakes caught by the CI
6c083d94 docs: Add a link to document describing how to use annotations
d67921a2 docs: Document restricted annotations
1fc7b764 docs: Repair inconsistencies between 2.0 and 1.x
21801a11 versions: Revert "version: revert back to crio 1.8.3"
b8414045 runtime: remove nsenter
e3510be8 runtime: use one line if statement to check if err is nil for qemu.go
378308e2 docs: Add instructions for enabling VM templating
92c1c4c6 versions: Update cloud-hypervisor to release v0.11.0
8907a339 agent: Only show ttrpc logs for trace log level
21cd7ad1 agent: Log ttrpc messages
286eebf0 agent: Add env var to set log level
b9c6db4b agent: Add env var tests
705e9955 agent: Add env var comment
5ced96e9 hypervisor: Remove unused methods
e82c9dae annotations: Improve asset annotation handling
0f26f1cd annotations: Add missing hypervisor control annotation
76064e3e asset: Formatting, grammar and whitespace
40418f6d runtime: add geust memory dump
ff13bde3 version: revert back to crio 1.8.3
6c2fc233 agent: create pci root Bus Path for arm64
a958eaa8 runtime: mount shared mountpoint readonly
125e21ce runtime: readonly mounts should be readonly bindmount on the host
5f0abc20 CI: Fix incorrect URL
b6f8a1d5 docs: Fix incorrect docs in config file
93d79625 clh: Consolidate the code path for device unplug
18a22459 Agent: README updates for build on ppc64le
655f2649 Agent: README updates for build on ppc64le
62c7e094 docs: Remove credits
679df0fb docs: Update top-level README
dfe364f8 Agent: README updates for build on ppc64le
77b50969 runtime: cloud-hypervisor: reduce memory footprint
2e1a8f0a agent: Improve unit test coverage for src/sandbox.rs
87848e87 versions: Update crio version
172d015e rustjail: fix the issue of create thread failed causing thread panic
9e93463b agent/rustjail: improve unit test coverage for rustjail/container.rs
ad4f7b86 agent/rustjail: make mount and umount2 public
926a6186 agent/rustjail: fix typo
8130d9b2 agent/rustjail: don't use unwrap in container::oci_state
5d111071 rustjail: add mock implementation for cgroup manager
e3eff0eb agent: Update build instructions
0896ce80 agent: update proto file copyright
6e9ca457 agent: generate proto files properly
837343f0 agent-ctl: update cargo.lock
b3166618 runtime: remove the unused proto files
54e23c83 agent: move gogo.proto out of the github.com namespance
583e6ed3 agent: types.pb.go is not regenerated
bb19fcb9 docs: Update documentation with new subcommand forms
d2fe7091 cli: Use new subcommand forms in kata-manager script
4d9ab0cd cli: Support new subcommand forms in bash completion
c5d355e1 cli: Remove `kata-` prefix from env and check subcommands
f134b4a3 agent: Update build instructions
9e9988df agent/protocols: Move agent.proto out of the mock folder of agent
e90aa7b4 agent: fixes the permissions of PID 1's STDIO
b9b281e7 packaging: Use apply-patches.sh in build-kernel.sh
163e6104 packaging: Make qemu/apply_patches.sh common
d4cf3057 packaging: qemu/apply_patches.sh should sort the patches
5b065eb5 runtime: change govmm package
9cb41507 agent/protocols: Fix copyright header checking
0d58d919 agent/protocols: Stop generate agent proto files in the shellscript
7559382b agent/protocols: Ignore generated files and remove these files from repo
fdc33fb7 agent/protocols: Generate proto files programmatically
f1c3bf6b runtime: let kata-collect-data.sh collect kata-monitor info
993a8da3 kata-monitor: add version subcommand
4ee78120 runtime: Restore QEMUVIRTIOFSPATH variable in Makefile
df4ce9fa ci: add `cargo clippy` for agent
2e138788 agent: clear match_like_matches_macro/vec_resize_to_zero warnings
227edfdc agent: clear module_inception/type_complexity warnings
698d25b7 agent: clear redundant_field_names clippy warning
4dd9bd7a agent: clear clippy `len_zero` warnings
bf7dec5c agent: clear clippy warnings
56f867ee rustjail: clear clippy warnings
16757ad4 oci: clear clippy warnings
f32f49bd logging: clear clippy warnings
5b079a3b snap: add GH actions jobs to release the snap package
2738b18b runtime: Fix firecracker config
e5d4259a runtime: Simplify make variables for clh
9eab3015 arm64: correct bridge type for QEMUVIRT
b88aac04 docs: Update how-to Readme with hypervisor information.
d6464117 docs: Update Readme to remove hypervisor information
b4f9fb51 docs: Remove docs for nemu
96a4ed7d Makefile: Replace @RUNTIME_NAME@ with the target in generated files
7159fc2e agent: simplify ttrpc error construction
0f894986 snap: install libseccomp-dev
9a351509 package: drop qemu-virtiofs shim
6ed669a1 packaging: install virtiofsd for normal qemu build as well
da79b4be virtcontainers: Append max_ports to virtio-serial device
bcf48530 runtime: enable virtiofs by default
e2221d34 tools: Improve agent-ctl README
2d1f2c7b kernel: update to 5.4.71
d3c98620 config: make virtio-fs part of standard kernel
edf02af1 tools: Make agent-ctl support more APIs
56201803 tools: Remove commented out code in agent-ctl
9bac4ee6 tools: Log request in agent-ctl tool if debug enabled
68821f08 tools: Rename agent-ctl command to GetGuestDetails
8553f062 tools: Fix comment in agent-ctl
6ba294a1 agent: remove `unwrap()` for `e.as_errno()`
e77482fe agent: Use `?` instead of `match` when the error returns directly
1b7ed328 kata-monitor: use regexp to check if runtime is kata containers
47ff2fb9 agent: use anyhow `context` to attach context to `Error` instead of `match`
2f690a2b agent: remove useless match
1d8def66 agent: Use `ok_or_else` instead of match for Option -> Result
84953066 agent: Fix crasher if AddARPNeighbors request empty
3d084c7d agent: Fix crasher if UpdateRoutes request empty
5615e5a7 agent: Fix crasher if UpdateInterface request empty
0dce817e agent: replace `match Result` with `or_else`
7bf4073d agent: replace unnecessary `match Result` with `map_err`
7f9e5913 agent: replace check! with map_err for readability
09aca49e agent: remove `check!` in child process because we cant' see logs.
a18899f1 agent: refactor namespace::setup to optimize error handling
a3c64e5c agent: replace `if let Err` with `or_else`
6ffa8283 agent: replace `if let Err` with `map_err`
863f918a rustjail: add length check for uid_mappings in rootless euid mapping
720eab78 versions: Update Kubernetes, containerd, cri-o and cri-tools
c5771be2 annotations: Correct unit tests to validate new protections
398d7918 annotations: Split addHypervisorOverrides to reduce complexity
b2b3bc7a annotations: Add unit test for checkPathIsInGlobs
6f52179c annotations: Add unit test for regexpContains function
966bd573 makefile: Add missing generated vars to `USER_VARS`
be6ee255 makefile: Improve names of config entries for annotation checks
b1194274 annotations: Give better names to local variabes in search functions
b5db114a annotations: Rename checkPathIsInGlobList with checkPathIsInGlobs
d65a7d10 config: Add better comments in the template files
7c6aede5 config: Whitelist hypervisor annotations by name
f047fced config: Use glob instead of regexp to match paths in annotations
11b9c90c annotations: Fix typo in comment
c16cdcb2 config: Add makefile variables for path lists
4e89b885 config: Protect file_mem_backend against annotation attacks
aae9656d config: Protect vhost_user_store_path against annotation attacks
55881653 config: Add security warning on configuration examples
b21a829c config: Protect ctlpath from annotation attack
27b6620b config: Protect jailer_path annotation
07669017 config: Add examples for path_list configuration
2d431c61 annotations: Simplify negative logic
2ca9ca89 config: Add hypervisor path override through annotations
2e093dfd config: Fix typo in function name
bf13ff0a config: Protect virtio_fs_daemon annotation
8c75de19 config: Add 'List' alternates for hypervisor configuration paths
fc6468ef agent: fix panic on malformed device resource in container update
d8a8fe47 cpuset: don't set cpuset.mems in the guest
88cd7128 sandbox: consider cpusets if quota is not enforced
77a463e5 cpuset: support setting mems for sandbox
2d690536 cpuset: add cpuset pkg
1a9515a9 runtime: Pass `--thread-pool-size=1` to virtiofsd
1c528cd1 packaging: Apply virtiofs performance related fixes to 5.x
5b520003 docs: Update upgrading guide
0e0564a5 docs: update the build kata containers kernel document
ae6b8ec7 agent/device: Check type as well as major:minor when looking up devices
859301b0 agent/device: Index all devices in spec before updating them
2477c355 agent/device: Forward port update_spec_device_list() unit test
08d80c1a agent/device: update_spec_device_list() should error if dev not found
12cc0ee1 sandbox: don't constrain cpus, mem only cpuset, devices
b6cf68a9 cgroups: add ability to update CPUSet
b812d4f7 virtcontainers: add method for calculating cpuset for sandbox
f63f7405 agent: fix errorneous parsing for guest block size
43d70a32 docs: Add containerd install guide
11c1ab8b agent: use ok_or/map_err instead of match
6b9f9915 rustjail: use Iterator to manipulate vector elements
a7251651 docs: remove the 1.x version description about shim and proxy
dc1442c3 rustjail: delete codes commented out
aa04111d rustjail: delete unused test code
eae685dc agent: use chain of Result to avoid early return
5e3d1fb6 agent: add blank lines between methods
980e48ca agent: delete unused field in agentService
52b821fa agent: use no-named closure to reduce codes
82e94501 packaging: fix cloud-hypervisor binary path
b1f95e8d agent: use a local fn to reduce duplicated codes
154a356a packaging: apply qemu v5.1 stable fixes
c781a808 agent: fix aarch64 build
906b3844 agent: update not accurate comments
78318c18 packaging: fix missing cloud_hypervisor_repo
b7309943 agent: use macro to simplify parse_cmdline function in config.rs
9834a766 docs: add namespace key to pod/container config files
37e7de72 ci: snap: add event filtering
9a02e6eb docs: Add crictl example json files
b7147eda agent: do not follow link when mounting container proc and sysfs
15b71563 agent: set init process non-dumpable
00ad3fd3 agent-ctl: include cargo lock updates
8cd62d7b versions: add plugins section
c4472481 virtiofs: Disable DAX
3e56de81 snap: specify python version
e3cdc89b osbuilder: Create target directory for agent
7cad865d packaging: fix image build script
0e898c6b rust-agent: Treat warnings as error
0e4baaab rust-agent: Identify unused results in tests
5b2b5652 rust-agent: Log returned errors rather than ignore them
d617caf1 rust-agent: Remove unused imports
ee739c5d rust-agent: Report errors to caller if possible
d5b492a1 rust-agent: Ignore write errors while writing to the logs
c635c46a rust-agent: Remove unused code that has undefined behavior
ec24f688 rust-agent: Remove 'mut' where not needed
c8f406d4 rust-agent: Remove uses of deprecated functions
f832d8a6 rust-agent: Remove or rename unused parameters
5a1d3311 rust-agent: Remove or rename unused variables
27efe291 rust-agent: Remove unused functions
d76ece0c rust-agent: Remove useless braces
3682812e rust-agent: Remove unused macros
483209bf actions: add kata deploy test
07930024 packaging: cleaning, updating based on new filepaths
f0f205cd packaging: remove obs-packaging
4b1753c5 packaging: pull versions, build-image out from obs dir
3f6cd4d5 packaging: Revert "packaging: Stop providing OBS packages"
c33ee54a clh: Support VFIO device unplug
1f4dfa31 clh: Remove unnecessary VmmPing
cc80ae0a versions: cloud-hypervisor: Bump to version 6d30fe05
0fec7a4d docs: Change kata_tap0 to tap0_kata
3394a6a5 docs: update networking description
2e83f405 dev-guide: update kata-agent install details
ffea705a docs: Update docs for enabling agent debug console
777f3981 docs: update dev guide for agent build
aa8eefd8 ci: add github action to test the snap
ea1cb37b versions: cloud-hypervisor: bump version
0ebffdf2 runtime: cloud-hypervisor: tag openapi-generator-cli container
e51a1ea3 docs: use-cases: Add Intel SGX use case
7d638231 runtime/vendor: add k8s.io/apimachinery/pkg/api/resource
6df165c1 runtime: add support for SGX
a5b3e1cd docs: drop docker installation guide
6c4300c6 docs: fix static check errors in docs/install/README.md
59224a76 docs: update architecture.md
a89deb3e rust-agent: Update README
80c52834 github: Remove issue template and use central one
0ccbca3b agent: Fix OCI Windows network shared container name typo
a6221a74 qemu: upgrade qemu version to 5.1.0 for arm64.
f30b86f1 Packaging: release notes script using error kernel path urls
a7faeaac docs: fix broken links
4501c25a agent: propagate the internal detail errors to users
1984e635 ci: fix clone_tests_repo function
02c1a59f agent: Set LIBC=gnu for ppc64le arch by default
7019e72c agent: remove unreachable code
942999ed agent: Change do_exec return type to ! because it will never return
757dfa70 fc: integrate Firecracker's metrics
b03d958e gitignore: ignore agent service file
64b4f698 agent: fix UT failures due to chdir
85d22301 runtime: fix TestNewConsole UT failure
e90e9a2c travis: skip static checker for ppc64
5611283e runtime: fix golint errors
daf2a54d agent: fix cargo fmt
c05c4ba5 ci: always checkout 2.0-dev of test repository
1569b3b3 docs: fix static check errors
df3119b6 runtime: fix make check
484a595f runtime: add enable_debug_console configuration item for agent
febdf8f6 runtime: add debug console service
07d339c7 devices: fix go test warning in manager_test.go
a4afe3af rust-agent: Replaces improper use of match for non-constant patterns
acaa806c agent: Only allow proc mount if it is procfs
ca501e54 osbuilder: specify default toolchain verion in rust-init.
03517327 action: Allow long lines if non-alphabetic
33513fb4 rustjail: make the mount error info much more clear
45b0b4ed agent/oci: Don't use deprecated Error::description() method
a34478ff runtime: Update cloud-hypervisor client pkg to version v0.10.0
ce675075 static-build/qemu-virtiofs: Refactor apply virtiofs patches
512b38cf packaging/qemu: Add common code to apply patches
edce2712 static-build/qemu-virtiofs: Fix to apply QEMU patches
86a864b8 packaging: Build from source if the clh release binary is missing
33585a8e runtime: Fix linter errors in release files
e3a0f9b3 ci: use export command to export envs instead of env config item
36ce7018 agent: update cgroups crate
3523167d runtime: Call s.newStore.Destroy if globalSandboxList.addSandbox
9e5a4b8b ci: use Travis cache to reduce build time
52984b67 docs: Update the reference path of kata-deploy in the packaging
eae21591 runtime: add podman configuration to data collection script
d1277848 how-to: add privileged_without_host_devices to containerd guide
98c4d11b docs: fix k8s containerd howto links
f107b12b docs: fix up developer guide for 2.0
9f2f5201 docs: Fix the kata-pkgsync tool's docs script path
96f8769a travis: enable RUST_BACKTRACE
cda7acf7 agent/rustjail: add more unit tests
98cc979a agent/rustjail: remove makedev function
b99fefad agent/rustjail: add unit tests for ms_move_rootfs and mask_path
d79fad2d agent/rustjail: implement functions to chroot
25c91afb agent/rustjail: add unit test for pivot_rootfs
7cf0fd95 agent/rustjail: implement functions to pivot_root
672da4d0 agent/rustjail: add unit test for mount_cgroups
ab61cf7f agent/rustjail: add unit test for init_rootfs
0a0714c9 agent/rustjail/mount: don't use unwrap
3dc9452b agent/rustjail: add tempfile crate as depedency
d756f52c rustjail: implement functions to mount and umount files
a02d1787 gitignore: ignore agent version.rs
b518ddea agent: fix agent panic running as init
1a77f69e runtime: make kata-check check for newer release
61181b9f packaging: use local version file for kata 2.0 in Makefile
e1c6aa27 docs: fix release process doc
1acfba4d packaging: fix release notes
1839dfd9 runtime: Clear the VCMock 1.x API Methods from 2.0
7225460a shimv2: add a comment in checkAndMount()
22ca2da6 packaging: Stop providing OBS packages
afa88c1b install: Add contacts to the distribution packages
3955cc89 install: Update information about Community Packages
218f77d7 install: Update SUSE information
2a0e76a8 install: Update openSUSE information
691f1364 install: Update RHEL information
270fc4b2 install: Update Fedora information
492b4e90 install: Update CentOS information

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-01-14 10:59:36 -08:00
Julio Montes
5ce74bab41 snap: tag yq version
yq major releases are not backward compatible, install the same
major version used in the CI to avoid conflics building the kata
components.
We should update yq when the CI updates it, not before.

fixes #1232

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-01-14 11:44:39 -06:00
Julio Montes
ef1feaf38f revert: "snap: Fix yq error in build"
This reverts commit 6cc1920c37.

Instead of updating the syntax of yq, let's use yq 3.x, otherwise
yq must be updated in the CI and the syntax updated in all the
tools (osbuilder, packging).

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-01-14 11:36:44 -06:00
Julio Montes
74ebd96f99 Merge pull request #1243 from jodh-intel/2.0-dev-fix-snap-build
snap: Fix yq error in build
2021-01-13 14:57:57 -06:00
Eric Ernst
9176df7068 Merge pull request #1247 from egernst/blockro
storage: cleanup and support read only block dev hotplug
2021-01-13 12:47:12 -08:00
Eric Ernst
789fd7c1c6 blk-dev: hotplug readonly if applicable
If a block based volume is read only, let's make sure we add as a RO
device

Fixes: #1246

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-12 14:50:54 -08:00
Eric Ernst
12777b26e4 volumes: cleanup / minor refactoring
Update some headers, very minor refactoring

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-12 14:50:47 -08:00
Eric Ernst
fbc1d123e8 vendor: revendor govmm
Update govmm to add RO blk hotplug support.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-01-11 18:11:50 -08:00
Julio Montes
ea069002b7 Merge pull request #1196 from fidencio/wip/fix-docker-extra-args
rootfs: Don't fallthrough in the docker_extra_args() switch
2021-01-11 14:04:56 -06:00
James O. D. Hunt
6cc1920c37 snap: Fix yq error in build
The snap build pulls the latest release of `yq`, but `yq` version 4
changed the CLI syntax for reading a YAML file.

Update the snap config file to use the new `yq` v4 syntax.

Fixes: #1232.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-01-11 11:24:46 +00:00
Maksym Pavlenko
5561755e3c agent: Initial switch to async runtime
This commit includes minimal changes in order to switch to Tokio:
- Update protocol crate to generate async server code
- Adds async entry point to the Agent
- Updates agent services signatures in rpc.rs

Fixes: #1209

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2021-01-11 16:50:53 +08:00
Fabiano Fidêncio
b329a74f18 rootfs: Fix indentation inside a switch
While touching this part of the code, let's help my OCD.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-01-08 22:46:24 +01:00
Fabiano Fidêncio
8879f9a09b rootfs: apparmor=unconfined is needed for non Red Hat host OSes
This is not needed for Fedora, RHEL, and CentOS, but it is required when
using any other host OS.  Having --security-opt apparmor=unconfined used
unconditionally is a no go as it'd break podman.

The reason this was only added when building for SUSE (as target distro)
was because debian and ubuntu condition would fall-through the switch to
the suse case (which makes me think that the fall-through was not
accidental).

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-01-08 21:35:10 +01:00
Fabiano Fidêncio
bbeebcdbba rootfs: Always add SYS_ADMIN, CHROOT, and MKNOD caps to docker cmdline
We use those, independently of the distro.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-01-08 21:35:10 +01:00
Fabiano Fidêncio
90ec2fa802 rootfs: Don't fallthrough in the docker_extra_args() switch
Falling through the switch cases in docker_extra_args() looks like a
typo and causes issues when building with podman, as `--security-opt
apparmor=unconfinded" shouldn't be passed if Apparmor is no enable on
the system.

Fixes: #1241

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-01-08 21:35:10 +01:00
GabyCT
a6d52d3da1 Merge pull request #1208 from GabyCT/topic/addgithu
github: Add github actions
2021-01-08 14:27:19 -06:00
Archana Shinde
ebd9fcc2c3 actions: Run static checks before make agent
Run static checks prior to building the agent.Checks
fail if run after since the compilation process
produces new rust code.

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2021-01-08 11:04:54 -06:00
Fabiano Fidêncio
ce27c00ee2 Merge pull request #1217 from snir911/fix_hanging_pods
shimv2: Avoid double removing of container from sandbox
2021-01-08 15:00:54 +01:00
Fabiano Fidêncio
31519333a0 Merge pull request #1221 from wainersm/agent_improve
Agent: return error on trying to persist a pid namespace and minor improvements
2021-01-08 14:58:15 +01:00
Fabiano Fidêncio
dcfbf03781 Merge pull request #1229 from snir911/sysctl_fix
rustjail: allow network sysctls
2021-01-08 14:56:18 +01:00
Peng Tao
855fe10bfd Merge pull request #1234 from lifupan/2.0-dev-fix-read
rustjail: fix the issue of sync read
2021-01-08 14:03:46 +08:00
Fupan Li
80f561d7e3 Merge pull request #1230 from lifupan/2.0-dev
rustjail: fix the issue of bind mount /dev
2021-01-08 13:40:10 +08:00
fupan.lfp
0d3736d5c5 rustjail: fix the issue of sync read
It should check the read count and return an
error if read count didn't match the expected
number.

Fixes: #1233

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-01-07 17:21:22 +08:00
fupan.lfp
0dc02f6dd3 rustjail: fix the issue of bind mount /dev
In case the container rootfs's /dev was overrided
by binding mount from another directory, then there's
no need to create the default devices nodes and symlinks
in /dev.

Fixes: #692

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-01-07 16:12:25 +08:00
Eric Ernst
542e93d987 Merge pull request #1180 from egernst/qemu-cleanup-check
qemu: no state to save if QEMU isn't running
2021-01-06 11:17:54 -08:00
Fabiano Fidêncio
f531bab745 Merge pull request #1169 from wainersm/qemu_for_ci-1
packaging/qemu: Build and package completely inside the container
2021-01-05 19:50:32 +01:00
Snir Sheriber
894fa42a57 rustjail: allow network sysctls
The network ns is shared with the guest skip looking for it
in the spec

Fixes: #1228
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-01-05 17:12:42 +02:00
Tim Zhang
91c6ba74fa Merge pull request #1225 from Tim-Zhang/update-cgroup-to-0.2.0
agent: upgrade cgroups to 0.2.0
2021-01-05 19:50:05 +08:00
Peng Tao
7f38c200e0 Merge pull request #1202 from dgibson/bug1201
agent: Simplify .or_else() to .or()
2021-01-05 14:42:53 +08:00
Tim Zhang
d4cd255485 agent: Avoid container stats panic caused by cgroup controller non-exist
Return SingularPtrField::none() instead of panic when getting stats
from cgroup failed caused by cgroup controller missing.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-05 11:35:41 +08:00
Tim Zhang
157e055fdd agent: upgrade crate cgroups to 0.2.0
Fixes: #1224

35ecd6f (origin/change-name, change-name) Update readme
eb6577e Change package name to cgroups-rs
8f6a7e0 Merge pull request #19 from Tim-Zhang/0.2.0
9baa065 (origin/0.2.0, 0.2.0) release: v0.2.0
e160df0 Make read_i64_from private and merge read_str_from to its caller
e1e05d3 Make new_with_relative_paths=new and load_with_relative_paths=new in v2
a89f4a0 Support set notify_on_release & release_agent
61a0957 Fix set_swappiness in cgroup v2
0592045 Ignore kmem in cgroup v2
c254fff Update readme
438d774 Fix test
42ee1ba Make Cgroup can be stored in struct
b6bb5ae docs: Hide Re-exports
d2882b1 Print cause when println!("{}")
abcb5ed Add more logs for create_dir error in controller.create
1f188be Detect subsystems and get root from /proc/self/mountinfo
fbd7164 Fix warnings in tests
f342254 Remove Box wrap of Cgroup.hire
cd998f3 Do not place cgroup under relative path read from cgroup by default
1ac76b6 Make function find_v1_mount pub
121f78d Expose deletion error
0f76570 Avoid exception caused by cgroup writeback feature
10650e2 Update tests to adapt new type of fields in resource
567cdb4 Use Option as resource fields, remove the update switch: update_values
0c18b08 Support customized attributes for CpuController and MemController
ca610bb add add_task_by_tgid

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-01-05 11:35:34 +08:00
David Gibson
e3ec1d509e agent: Simplify .or_else() to .or()
get_bool_value() in src/agent/src/config.rs includes a Result::or_else()
call with a trivial closure which can be replaced by a Result::or.  This
removes a clippy warning.

fixes #1201

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-01-05 12:54:21 +11:00
David Gibson
e9e39fd081 Merge pull request #1207 from dgibson/bug1206
Fix error reporting in listInterfaces() and listRoutes()
2021-01-05 12:02:07 +11:00
Wainer dos Santos Moschetta
14e7042cf6 agent: Clean up commented use declarations
There are some commented use declarations, removed them all.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-29 09:16:29 -05:00
Wainer dos Santos Moschetta
5fe5b3212f agent: Fix temp prefix on Namespace::test_setup_persistent_ns
Wrong prefix on the created temp directory on the test_setup_persistent_ns
for uts namesmpace type test.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-29 09:16:29 -05:00
Wainer dos Santos Moschetta
3a891d4e8f agent: Return error on trying to persist a pid namespace
An pid namespace cannot be persisted, so add a check-and-error on
Namespace::setup() for handling that case.

Fixes #1220

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-29 09:16:26 -05:00
Peng Tao
0e215ece36 Merge pull request #1215 from jiangliu/liujiang/validator
improve rustjail validator
2020-12-28 15:32:23 +08:00
Snir Sheriber
5c464018ed shimv2: Avoid double removing of container from sandbox
RemoveContainerRequest results in calling to deleteContainer, according
to spec calling to RemoveContainer is idempotent and "must not return
an error if the container has already been removed", hence, don't
return error if the error reports that the container is not found.

Fixes: #836

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2020-12-27 18:04:06 +02:00
Liu Jiang
b366af9358 jail: add more test cases for validator
Fixes: #1214

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-24 20:17:06 +08:00
Liu Jiang
d38a5d3fcf jail/validator: introduce helpers to reduce duplicated code
Fixes: #1214

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-24 19:02:31 +08:00
Liu Jiang
76ad32136f jail/validator: avoid unwrap() for safety
Explicitly return error codes instead of unwrap().

Fixes: #1214

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-24 19:02:13 +08:00
Liu Jiang
51fd624f3e rustjail: add more context info for errors
Fixes: #1214

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-24 17:47:58 +08:00
Peng Tao
f1b3f2e178 Merge pull request #1150 from fidencio/wip/make-install-breaks
Add void "install" targets for both "trace-forwarder" and "agent-ctl"
2020-12-23 18:41:42 +08:00
Peng Tao
d6a6f68fd7 Merge pull request #1177 from devimc/2020-12-07/osbuilder/gentoo
[forwardport] Add support for Gentoo
2020-12-23 18:39:12 +08:00
Peng Tao
109ab54d63 Merge pull request #1212 from jiangliu/typo
oci: fix a typo in "addtionalGids"
2020-12-23 18:03:26 +08:00
Bin Liu
8d6096210e Merge pull request #1186 from maruthgoyal/2.0-dev
Don't update cpusets if no CPUs changed closes #1172
2020-12-23 10:05:59 +08:00
Liu Jiang
9321e1b21b oci: fix two incompatible issues with OCI spec
The first incompatible issue is caused by a typo, "swapiness" should
be "swappiness". The second incompatible issue is caused by a serde
format. The struct LinuxBlockIODevice is introduced for convenience,
but it also changes serialized data, so "#[serde(flatten)]" should
be used for compatibility with OCI spec.

Fixes: #1211

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-22 11:16:15 +08:00
Liu Jiang
406a91ffdd agent: consume ttrpc crate from crates.io
The ttrpc v0.3.0 has been published to crates.io, so consume from
crates.io.

Fixes: #1213

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-22 09:46:41 +08:00
Eric Ernst
9a7bcccc8e qemu: no state to save if QEMU isn't running
On pod delete, we were looking to read files that we had just deleted. In particular,
stopSandbox for QEMU was called (we cleanup up vmpath), and then QEMU's
save function was called, which immediately checks for the PID file.

Let's only update the persist store for QEMU if QEMU is actually
running. This'll avoid Error messages being displayed when we are
stopping and deleting a sandbox:

```
level=error msg="Could not read qemu pid file"
```

I reviewed CLH, and it looks like it is already taking appropriate
action, so no changes needed.

Ideally we won't spend much time saving state to persist.json unless
there's an actual error during stop/delete/shutdown path, as the persist will
also be removed after the pod is removed. We may want to optimize this,
as currently we are doing a persist store when deleting each container
(after the sandbox is stopped, VM is killed), and when we stop the sandbox.
This'll require more rework... tracked in:
  https://github.com/kata-containers/kata-containers/issues/1181

Fixes: #1179

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-12-21 11:29:44 -08:00
Liu Jiang
6181570ccc oci: fix a typo in "addtionalGids"
There's a typo in "addtionalGids", which should be "additionalGids".

Fixes: #1211

Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
2020-12-22 00:03:27 +08:00
Gabriela Cervantes
a5372e006f github: Add github actions
This PR adds the github actions for this repository.

Fixes #1205

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2020-12-18 15:19:22 -06:00
Maruth Goyal
4af5beda35 agent/sandbox: Don't update cpuset when ncpus = 0
When receiving an OnlineCpuMemory RPC, if the number of CPUs to be
made available is 0, then updating the cpusets is a redundant operation.

Fixes: #1172

Signed-off-by: Maruth Goyal <maruthgoyal@gmail.com>
2020-12-18 18:11:16 +05:30
David Gibson
e004616b02 runtime/network: Fix error reporting in listRoutes()
If the upcast from resultingRoutes to *grpc.IRoutes fails, we return
(nil, err), but previous code ensures that err is nil at that point, so we
return no error.

fixes #1206

Forward port of
0ffaeeb5d8

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-12-18 14:36:09 +11:00
David Gibson
1ae8e81abb runtime/network: Correct error reporting in listInterfaces()
If the upcast from resultingInterfaces to *grpc.Interfaces fails, we
return (nil, err), but previous code ensures that err is nil at that
point, so we return no error.

Forward port of
b86e904c2d

fixes #1206

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-12-18 14:35:50 +11:00
Bin Liu
1b2ccf87f8 Merge pull request #1189 from devimc/2020-12-10/improveDebianImage
rootfs: reduce size of debian image
2020-12-17 22:35:34 +08:00
Bin Liu
caa6965c17 Merge pull request #1183 from wainersm/runtime_destdir
runtime: Allow to overwrite DESTDIR
2020-12-17 14:10:56 +08:00
Bin Liu
7fa3045136 Merge pull request #1175 from devimc/2020-12-07/fixSnapWorkflow
snap: fix snap release channel
2020-12-17 12:51:22 +08:00
Bin Liu
3b87d10d79 Merge pull request #1191 from mxpv/fd
Don't leak fd when reseeding rng
2020-12-17 12:50:55 +08:00
David Gibson
c2da484e23 Merge pull request #1199 from dgibson/proto-regen
Fixes for make generate-protocols
2020-12-17 15:43:49 +11:00
David Gibson
a19263e58d agent/protocols: Remove unneeded import from oci.proto
oci.proto imports "google/protobuf/wrappers.proto", but doesn't appear to
use it, which causes a warning from protoc when we compile it.  Remove the
import to fix the warning.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-12-17 13:06:41 +11:00
David Gibson
a19cf28c26 agent/protocols: Remove some unnecessary include directives from protoc
The generate_go_sources() function in update-generate-proto.sh adds a
number of include directives to the protoc command line.  Some of these
don't appear to be necessary to correctly compile the agent's protocol
files, so remove them.

Amongst other things were directives pointing at the old Kata1 runtime and
agent repositories.  Those ones could be actively harmful by causing odd
dependencies of the Kata2 build on the Kata1 repositories.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-12-16 12:10:27 +11:00
David Gibson
2b4520904c agent/protocols: Remove some unneeded dependencies for protocol generation
src/agent/protocols/hack/update-generated-proto.sh checks for the presence
of protoc-gen-rust and ttrpc_rust_plugin, but it doesn't actually need
them.  Those tools are needed to generate Rust code from the gRPC proto
files, but that's already handled in src/agent/protocols/build.rs using
Cargo for dependency management.

This script is only needed for the Go code, for which the other tools are
sufficient.

fixes #1198

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-12-16 12:10:27 +11:00
Bo Chen
9f317009b0 Merge pull request #1194 from yuchunyu97/patch-1
docs: Fix docs in docs/architecture.md
2020-12-15 07:35:29 -08:00
AIsland
b36c9ea3fa docs: Fix docs in docs/architecture.md
Correct the docs in docs/architecture.md,

use virtio-fs as the default config.

Fixes: #1193

Signed-off-by: AIsland <yuchunyu01@inspur.com>
2020-12-14 09:31:20 +08:00
Maksym Pavlenko
3db1c8059d agent: Don't leak fd when reseeding rng
This PR wraps fd raw descriptor with File, so it'll be properly closed once exited.

Fixes: #1192

Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
2020-12-11 16:18:41 -08:00
Bo Chen
8edfab73c3 Merge pull request #1170 from likebreath/1203/update_virt_docs
docs: Update the Cloud Hypervisor description in virtualization.md
2020-12-10 09:04:20 -08:00
Julio Montes
8ac93f65fd rootfs-builder: add support for gentoo
Generate images based on gentoo

fixes #1178

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-12-10 09:36:15 -06:00
Julio Montes
9897238f3a rootfs: reduce size of debian image
Improve Kata Containers memory footprint by reducing debian
image size.

Without this change:
Debian image -> 256MB

With this change:
Debian image -> 128MB

Note: this change *will not* impact ubuntu image.

fixes #1188

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-12-10 09:23:48 -06:00
Bo Chen
d47122e959 docs: Update the Cloud Hypervisor description in virtualization.md
The current description on the Cloud Hypervisor support in Kata
containers were introduced back to kata 1.10 and are out-dated.

Depends-on: github.com/kata-containers/tests#3106

Fixes: #1167

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-12-10 06:17:53 -08:00
Wainer dos Santos Moschetta
10e9bfc6f7 runtime: Allow to overwrite DESTDIR
On runtime/Makefile the value of DESTDIR is set to "/", unless one
pass that variable as an argument to `make`. This change will
allow its overwrite if DESTDIR is exported in the environment as
well.

Fixes #1182

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-09 09:04:04 -05:00
Wainer dos Santos Moschetta
f740032c42 packaging/qemu: Delete the temporary container
It is used a temporary container to pull the QEMU tarball out
of the build image, but this container is never deleted. This
will ensure it gets deleted after its execution.

Fixes #1168

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-08 16:36:16 -05:00
Wainer dos Santos Moschetta
e5c710e833 packaging/qemu: Build and package completely in the container
Currently QEMU is built inside the container, its tarball pulled to
the host, files removed then packaged again. Instead, let's run all
those steps inside the container and the resulting tarball will
be the final version. For that end, it is introduced the
qemu-build-post.sh script which will remove the uneeded files and
create the tarball.

The patterns for directories on qemu.blacklist had to be changed
to work properly with `find -path`.

Fixes #1168

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-08 16:36:14 -05:00
Wainer dos Santos Moschetta
4c3377de3b packaging/qemu: Add QEMU_DESTDIR argument to dockerfiles
The dockerfiles used to build qemu and qemu-virtiofs have the QEMU destination
path hardcoded, which in turn is also on the build scripts. This refactor
the dockerfiles to add the QEMU_DESTDIR argument, which value is passed by the scripts.

Fixes #1168

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-12-08 16:32:25 -05:00
Julio Montes
faed2369a0 rootfs-builder: add functions to run before and after the container
Define `before_starting_container` and `after_stopping_container`
functions, these functions run before and after the container that
builds the rootfs respectively.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-12-08 10:14:00 -06:00
Peng Tao
e167bf30e3 Merge pull request #1165 from liubin/fix/exec-hang-when-bg-process-running
agent: exit from exec hangs if background process is present
2020-12-08 20:32:23 +08:00
Julio Montes
94b9b812c7 Merge pull request #1173 from devimc/2020-12-07/updateSnapDoc
[forwardport] install: Improve snap documentation
2020-12-07 14:07:25 -06:00
Julio Montes
8e5603e6a8 snap: fix snap release channel
According to the new snap document
`docs/install/snap-installation-guide.md`, Kata Containers 2.x should
be available in the snapcraft `candidate` channel.

fixes #1174

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-12-07 13:37:57 -06:00
Julio Montes
8f53893535 install: Improve snap documentation
Improve snap documentation, document how to install
kata 1.x and 2.x, how to configure them and their integration
with container engines.

fixes #1138

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-12-07 08:56:11 -06:00
bin liu
1ca415d87e agent: exit from exec hangs if background process is present
This is the Rust porting of https://github.com/kata-containers/agent/pull/371

`read_stdout`/`read_stderr` is blocking rpc calls, if exec process
exited, these calls is on blocking state for reading on process's
term master fd, and can't get a chance to break the wait.

In this PR, `read_stdout`/`read_stderr` will not read directly from
a term master of a process, instead, it will first have to get
an fd to read from newly added `epoller.poll()`. `epoller.poll()` may returns:

- the term master fd of exec process, if the process is running.
- a fd(piped fd) will return EOF when reading to indicate that th process is exited.

Fixes: #1160

Signed-off-by: bin liu <bin@hyper.sh>
2020-12-07 10:52:44 +08:00
Chelsea Mafrica
49e7151d3d shimv2: Add tracing
Add trace calls to shimv2 that create spans for functions in service.go.
Tracing starts in New(), which is forked twice and is followed by either
StartShim() or Create().

Tracing cannot start without the value for Trace enabled from the
runtime config so load the config in New(), which results in it being
loaded every time New() is called in addition to where it is originally
loaded after Create().

Fixes #903

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2020-12-04 19:38:44 -08:00
Peng Tao
4bca7312c7 Merge pull request #1158 from liubin/fix/1156-fix-cpuset
handle vcpus properly utilized in the guest
2020-12-04 22:32:15 +08:00
Bin Liu
35e79c97cf Merge pull request #920 from YchauWang/update-docs-2.0-5
docs: fix the custom agent binary file path for creating initrd image
2020-12-04 21:41:20 +08:00
Ychau Wang
a00f7c3484 docs: fix the custom agent binary file path for creating initrd image
fix the custom agent binary file path for creating an initrd image in
the Developer-Guide.md file.

Fixes: #919

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-12-04 17:19:23 +08:00
Fabiano Fidêncio
f7383ef835 Merge pull request #1166 from cmaf/fix-ctx-port
shimv2: handle ctx passed by containerd
2020-12-03 19:45:52 +01:00
Bin Liu
4e0a7e31f9 Merge pull request #1103 from likebreath/1111/clh_fix_cleanupVM
runtime: clh: Enforce to call 'cleanupVM' for 'stopSandbox'
2020-12-03 17:34:26 +08:00
Chelsea Mafrica
0155fe1260 shimv2: handle ctx passed by containerd
Sometimes shim process cannot be shutdown because of container list
is not empty. This container list is written in shim service, while
creating container. We find that if containerd cancel its Create
Container Request due to timeout, but runtime didn't handle it properly
and continue creating action, then this container cannot be deleted at
all. So we should make sure the ctx passed to Create Service rpc call
is effective.

Fixes #1088

Signed-off-by: Yves Chan <shanks.cyp@gmail.com>
Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2020-12-02 14:28:31 -08:00
Archana Shinde
f96cdc1a67 Merge pull request #1114 from c3d/bug/1111-agent-oom-killer
agent: Adjust OOM Score to avoid agent being killed.
2020-12-02 11:40:35 -08:00
Julio Montes
2c6cee0d28 Merge pull request #1146 from devimc/2020-11-19/fixKataCheck
[forward port]  cli: make check subcommand more tolerant to failures
2020-12-02 13:11:30 -06:00
bin liu
a793b8d90d agent: update cpuset of container path
After cpu hot-plugged is available, cpuset for containers will be written into
cgroup files recursively, the paths should include container's cgroup path, and up
to root path of cgroup filesystem.

Fixes: #1156, #1159

Signed-off-by: bin liu <bin@hyper.sh>
2020-12-02 10:38:26 +08:00
bin liu
705182d04e agent: ignore updating cpuset error when update cgroups
The result of `cpuset_controller.set_cpus(&cpu.cpus)` is unwrapped,
this will lead creating container to fail if cpuset is set.

The sandbox's `CreateContainer` sequence is:

c, err := newContainer(s, &contConfig)
err = c.create()
  c.sandbox.agent.createContainer(c.sandbox, c) (1)
err = s.updateResources()
  oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs) (2)

cpuset only avaiable after `s.hypervisor.resizeVCPUs` has been called at (2),
and then cpuset is written to cgourps file.

Fixes: #1159

Signed-off-by: bin liu <bin@hyper.sh>
2020-12-02 10:38:16 +08:00
Bo Chen
647331ace6 runtime: clh: Enforce to call 'cleanupVM' for 'stopSandbox'
We should always cleanup the vm directory when doing `stopSandbox`,
while we are skipping the cleanup process on some error code paths when
using cloud-hypervisor driver.

Fixes: #1098

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-12-01 17:27:44 -08:00
Eric Ernst
095dace7d6 Merge pull request #1137 from egernst/vmt-to-top
docs: add link to VMT on top level README
2020-12-01 16:25:22 -08:00
Eric Ernst
2f1cb7995f kata-monitor: allow for building for alpine
- add a reference Dockerfile to tools
- update kata-monitor build to:
  1) utilize the kata buildflags, which were dropped before
  2) disable CGO, so we have option for building in alpine

From root of the repository, example build:
 $ docker build -f tools/packaging/kata-monitor/Dockerfile -t kata-monitor .

Fixes: #1135

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-12-01 10:28:59 -08:00
Eric Ernst
e684a54163 docs: add link to VMT on top level README
The VMT process is well documented, but users would need to land on
community repo to find it. Let's make it easier to identify the correct
way to disclose vulnerabilities.

Fixes: #1136

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-11-30 08:12:31 -08:00
Fabiano Fidêncio
68f66c515f agent-ctl: Add void "install" target
Otherwise `make install` run from the top directory would just fail as
the target is not defined.

Fixes: #1149

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-11-27 15:26:29 +01:00
Fabiano Fidêncio
5e407758f6 trace-forwarder: Add void "install" target
Otherwise `make install` run from the top directory would just fail as
the target is not defined.

Fixes: #1149

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-11-27 15:26:23 +01:00
Julio Montes
70f198d78e cli: check modules and permissions before loading a module
Before loading a module, the check subcommand should check if the
current user can load it.

fixes #3085

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-11-26 11:55:42 -06:00
Julio Montes
cb684cf8ea cli: don't fail if rate limit is exceeded
Don't fail if rate limit is exceeded since this is a
limitation/restriction of Github not a problem in the host.
Print a warning when the rate limit is exceeded.

For more information about Github's rate limit, see
https://developer.github.com/v3/#rate-limiting

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-11-26 11:50:14 -06:00
Bin Liu
b8716d8eec Merge pull request #1141 from lifupan/fix_thread_spwan
rustjail: fork a new child process to change the pid ns
2020-11-25 15:20:36 +08:00
Bin Liu
8d19b8e013 Merge pull request #1139 from lifupan/skip_networkcheck
rustjail: remove the network ns validation against container
2020-11-25 15:03:18 +08:00
fupan.lfp
9216f2ad63 rustjail: fork a new child process to change the pid ns
The main process do unshare pid namespace, the process
couldn't spawn new thread, in order to avoid this issue,
fork a new child process and do the pid namespace unshare
in the new temporary process.

Fixes: #1140

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-11-23 17:57:33 +08:00
fupan.lfp
3b08376c4e rustjail: remove the network ns validation against container
Since kata containers shared the network ns with
the guest system, thus there's no need to do the
network ns check.

Fixes: #1047

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-11-23 14:41:22 +08:00
Julio Montes
a853e8eaca Merge pull request #1124 from devimc/2020-11-17/snap/updateApps
snap: update apps section
2020-11-18 09:21:35 -06:00
James O. D. Hunt
7c12c5481e Merge pull request #1128 from liubin/fix/1127-delete-wait
runtime: don't wait the second shim process in shim start
2020-11-18 14:19:11 +00:00
Julio Montes
f00655a40f Merge pull request #1060 from jongwu/rootbus
agent: create pci root Bus Path for arm64
2020-11-18 08:13:30 -06:00
Julio Montes
e411ebc779 Merge pull request #1126 from liubin/fix/1125-enable-lto
agent: enable lto flag for Cargo to get better optimized code
2020-11-18 08:07:58 -06:00
bin liu
c388ec5bef runtime: don't wait the second shim process in shim start
In first shim v2 startup(with `start` command-line option), it will start
the second shim v2 process running as ttrpc server, there is no needs to
wait the second process, because the current shim v2 process will exit immediately.

Fixes: #1127

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-18 17:18:59 +08:00
bin liu
d6acc4c09c agent: enable lto flag for Cargo to get better optimized code
The lto setting controls the -C lto flag which controls LLVM's link time optimizations.
LTO can produce better optimized code, using whole-program analysis,
at the cost of longer linking time.

https://doc.rust-lang.org/cargo/reference/profiles.html#lto

Fixes: #1125

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-18 15:50:27 +08:00
Julio Montes
13a8e4e39e snap: update apps section
Add `kata-runtime` and `kata-collect-data.sh` commands to the apps
section, these two command will be accessible through the commands
`kata-containers.runtime` and `kata-containers.collect-data`
respectively.
Henceforth the snap command for `containerd-shim-kata-v2` will be
`kata-containers.shim`

fixes #1122

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-11-17 11:31:08 -06:00
Julio Montes
1dd77e204f Merge pull request #1120 from liubin/fix/1119-revert-cleanupcontainer-api
virtcontainers: revert CleanupContainer from PR 1079
2020-11-17 09:11:29 -06:00
Tim Zhang
7201745eae Merge pull request #1065 from jodh-intel/2.0-dev-add-hypervisors-doc
docs: Create hypervisor summary document
2020-11-17 16:07:18 +08:00
bin liu
fdbf7d3222 virtcontainers: revert CleanupContainer from PR 1079
In PR 1079, CleanupContainer's parameter of sandboxID is changed to VCSandbox, but at cleanup,
there is no VCSandbox is constructed, we should load it from disk by loadSandboxConfig() in
persist.go. This commit reverts parts of #1079

Fixes: #1119

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-17 10:31:33 +08:00
Bo Chen
960227165c Merge pull request #1121 from c3d/docs/855-hyperlinks
Add hyperlink and fix typo
2020-11-16 15:48:43 -08:00
James O. D. Hunt
91a390f072 docs: Create hypervisor summary document
Split some of the core hypervisor details out of the virtualisation
document and present in a simpler fashion for new users.

Fixes: #1063.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-16 11:52:40 +00:00
James O. D. Hunt
3eeb25a169 docs: Tidied up virtualisation summary table
- Removed the `QEMU-virtio-fs` entry from the virtualization doc since
  support is now available upstream and the QEMU virtio-fs-specific
  configuration file has been removed.
- Removed NEMU as this is no longer used.
- Sorted the remaining rows.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-16 11:50:43 +00:00
Ariel Adam
8ec3cf08f3 docs: Adding hyperlink to virtio-net in kata documentation 2.0
Referring virtio-net mentioning in the kata virtualization
documentation to the virtio-networking blog series published
and explaining how it works.

Fixes #612

Signed-off-by: Ariel Adam <aadam@redhat.com>
2020-11-16 10:57:55 +01:00
Ariel Adam
b5b67db8d7 docs: Fixing typo in virtualization.md file
Changing "implementor" to "implementer"

Fixes: #612

Signed-off-by: Ariel Adam <aadam@redhat.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-11-16 10:57:22 +01:00
Julio Montes
ac6868250a Merge pull request #1117 from fidencio/wip/update-crio-version-to-include-fix-for-k8s-oom
versions: Use CRI-O v1.18.4-4-g6dee3891e
2020-11-13 13:22:58 -06:00
Fabiano Fidêncio
4d46d0f0f5 versions: Use CRI-O v1.18.4-4-g6dee3891e
This (unreleased) version of CRI-O brings in the possibility of enabling
the `k8s-oom.bats` test.

Depends-on: github.com/kata-containers/tests#3060

Fixes: #1116

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-11-13 15:56:02 +01:00
Christophe de Dinechin
53b5d063e9 agent: Adjust OOM Score to avoid agent being killed.
Under stress, the agent can be OOM-killed, which exists the sandbox.
One possible hard-to-diagnose manifestation is a virtiofsd crash.

Fixes: #1111

Reported-by: Qian Cai <caiqian@redhat.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-11-13 11:10:19 +01:00
Tim Zhang
06b9294c7d Merge pull request #1110 from liubin/fix/1109-add-enable_pprof
runtime: change configuration key name from EnablePprof to enable_pprof
2020-11-13 17:44:34 +08:00
Peng Tao
3b5245fc55 Merge pull request #1079 from liubin/fix/1078-delete-sandboxlist
runtime: delete sandboxlist.go and sandboxlist_test.go
2020-11-13 15:02:51 +08:00
bin liu
14a21c3ab1 runtime: change configuration key name from EnablePprof to enable_pprof
Key name in configuration file is in snake case but not camel case.
And the key is processed as `enable_pprof` in code, the configuration
template file should replace `EnablePprof` it by `enable_pprof`

Fixes: #1109

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-13 14:52:56 +08:00
bin liu
4e3a8c0124 runtime: remove global sandbox variable
Remove global sandbox variable, and save *Sandbox to hypervisor struct.
For some needs, hypervisor may need to use methods from Sandbox.

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-13 09:47:09 +08:00
bin liu
290203943c runtime: delete sandboxlist.go and sandboxlist_test.go
Delete sandboxlist.go and sandboxlist_test.go under virtcontainers package.

Fixes: #1078

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-13 09:47:09 +08:00
Fabiano Fidêncio
61fccef643 Merge pull request #1095 from fidencio/wip/update-crio-version-to-include-fix-for-k8s-copy-file
versions: Use release-1.18 (commit ee9128444bec10)
2020-11-13 01:00:23 +01:00
Bo Chen
258dd55855 Merge pull request #1101 from devimc/2020-11-06/runtime/fixClhDax
runtime: clh: disable virtiofs DAX when FS cache size is 0
2020-11-12 12:26:35 -08:00
Fabiano Fidêncio
9b88a96bc7 versions: Use release-1.18 (commit ee9128444bec10)
Let's update CRI-O version to the commit which introduced the fix for
the "k8s-copy-file" tests.

Depends-on: github.com/kata-containers/tests#3042

Fixes: #1080

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-11-12 18:04:47 +01:00
Julio Montes
36f65ce182 runtime: clh: update cloud-hypervisor
Update cloud-hypervisor to commit 2706319.
Fixes a limitation in OpenAPITools/openapi-generator tool,
it's impossible to send go zero types, like false and 0 to
cloud-hypervisor because `omitempty` is added if a field is not
required.
See cloud-hypervisor/cloud-hypervisor#1961 for more information

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-11-12 09:33:56 -06:00
Julio Montes
e1396f0402 runtime: clh: disable virtiofs DAX when FS cache size is 0
Guest consumes 120Mb more of memory when DAX is enabled and the default
FS cache size (8G) is used. Disable dax when it is not required
reducing guest's memory footprint.

Without this patch:

```
7fdea4000000-7fdee4000000 rw-s 18850589 /memfd:ch_ram (deleted)
Size:            1048576 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:              187876 kB
```

With this patch:

```
7fa970000000-7fa9b0000000 rw-s 612001  /memfd:ch_ram (deleted)
Size:            1048576 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:               57308 kB
Pss:               56722 kB
```

fixes #1100

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-11-12 09:33:56 -06:00
Julio Montes
af04d71feb Merge pull request #1105 from jodh-intel/2.0-dev-fix-kata-check-version-cmp
release: Fix release candidate to major version upgrade check
2020-11-12 09:26:21 -06:00
James O. D. Hunt
8f38265be4 release: Fix release candidate to major version upgrade check
Fix `kata-runtime kata-check`'s network version check which was failing
when the user was running a release candidate build and the latest
release was a major one, two examples of the error being:

- `BUG: unhandled scenario: current version: 1.12.0-rc0, latest version: 1.12.0`
- `BUG: unhandled scenario: current version: 2.0.0-rc0, latest version: 2.0.0`

Fixes: #1104.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-12 10:07:18 +00:00
James O. D. Hunt
2e0bf40adb tests: Ensure semver build metadata is ignored
According to the Semantic Versioning specification, build metadata must
be ignored for version comparisions, so add some explicit tests for this
scenario to `TestGetNewReleaseType()`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-12 10:06:15 +00:00
James O. D. Hunt
4024a8274b release: Make error format string consistent
Use `%s` for both semver parameters.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-12 10:04:00 +00:00
Fupan Li
671a2be313 Merge pull request #1094 from liubin/fix/991
runtime: sleep 1 second after GetOOMEvent failed
2020-11-11 14:33:57 +08:00
Peng Tao
9dbd1007d7 Merge pull request #1070 from jing-wang4/readme
Agent: README updates for build on ppc64le
2020-11-11 10:15:22 +08:00
Peng Tao
3c88106f65 Merge pull request #1084 from liubin/fix/1081-clean-codes
runtime: clean/refactor code
2020-11-11 10:09:10 +08:00
Christophe de Dinechin
9511b17819 Merge pull request #1045 from c3d/issue/1044-forward-port-annotation-doc
Forward port annotation doc
2020-11-10 11:34:23 +01:00
bin liu
cb0e6094ff runtime: sleep 1 second after GetOOMEvent failed
In some cases, for example agent crashed and not marked dead yet, the GetOOMEvent
will return errors like `connection reset by peer` or `ttrpc: closed`. Do a sleep
with 1 second (agent check interval) and let agent health check to do the check.

Fixes: #991

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-10 12:02:31 +08:00
Bo Chen
359ab16a8f Merge pull request #1090 from likebreath/1106/clh_upgrade_v0.11.0
versions: Update cloud-hypervisor to release v0.11.0
2020-11-09 15:51:09 -08:00
Archana Shinde
5444a31f7b Merge pull request #1092 from yuchunyu97/patch-1
docs: Add instructions for enabling VM templating
2020-11-09 15:50:12 -08:00
Christophe de Dinechin
4c78814bda docs: Fix pre-existing spelling mistakes caught by the CI
The documentation contains existing spelling mistakes that are caught by the CI
and prevent checking in. The errors include:

    INFO: Spell checking file 'docs/how-to/how-to-load-kernel-modules-with-kata.md'
    WARNING: Word 'configurated': did you mean one of the following?: configuration, reconfigured, Confederate, confederate
    WARNING: Word 'cri': did you mean one of the following?: cir, crib, chi, cry, Fri, crier
    ERROR: Spell check failed for file: 'docs/how-to/how-to-load-kernel-modules-with-kata.md'
    INFO: spell check failed for document docs/how-to/how-to-load-kernel-modules-with-kata.md
    INFO: Spell checking file 'docs/how-to/how-to-set-sandbox-config-kata.md'
    INFO: Spell check successful for file: 'docs/how-to/how-to-set-sandbox-config-kata.md'
    ERROR: spell check failed, See https://github.com/kata-containers/documentation/blob/master/Documentation-Requirements.md#spelling for more information.

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-11-09 16:52:33 +01:00
Christophe de Dinechin
6c083d9410 docs: Add a link to document describing how to use annotations
Add a link to the document listing the available annotations

Fixes: #1044
Forward-port-of: https://github.com/kata-containers/documentation/pull/757

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-11-09 16:50:26 +01:00
Christophe de Dinechin
d67921a2af docs: Document restricted annotations
Document restricted annotations, as implemented in
https://github.com/kata-containers/kata-containers/pull/902

Fixes: #1044
Forward-port-of: https://github.com/kata-containers/documentation/pull/755

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-11-09 16:50:26 +01:00
Christophe de Dinechin
1fc7b7641d docs: Repair inconsistencies between 2.0 and 1.x
The documentation `how-to/how-to-set-sandbox-config-kata.md` contains a number
of differences relative to the 1.x variant, which do not seem to correspond to
missing features in the actual code.

Fixes: #1046

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-11-09 16:50:26 +01:00
Peng Tao
2e65a48a11 Merge pull request #1093 from fidencio/wip/reverting-revert-cri-o-bump
Revert "version: revert back to crio 1.8.3"
2020-11-09 22:43:22 +08:00
Fabiano Fidêncio
21801a11a7 versions: Revert "version: revert back to crio 1.8.3"
This reverts commit ff13bde3c1, which
moved back CRI-O to v1.18.3.

The was, IMHO, a little bit premature.  We want to know exactly what are
the issues on v1.18.4, solve those, and be prepared for a v1.18.5 bump
(or even a bump to a specific commit, if needed).

Just for the sake of the completeness, v1.18.4 caused a regression on
"k8s-copy-file" tests, which is tracked on CRI-O side as
https://github.com/cri-o/cri-o/issues/4353.

Fixes: #1080

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-11-09 13:45:01 +01:00
bin liu
b8414045bf runtime: remove nsenter
remove code for nsenter

Fixes: #1081

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-09 11:42:51 +08:00
bin liu
e3510be867 runtime: use one line if statement to check if err is nil for qemu.go
Use `if err := q.qmpSetup(); err != nil` to reduce code and make it easy
to read. And remove checking err if last function call also return an error,
return the function call directly.

Fixes: #1081

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-09 11:42:45 +08:00
Fupan Li
d22c7cf00b Merge pull request #1013 from liubin/feature/1012-dump-guest-memroy-on-panic
Dump guest memory when kernel panic for QEMU
2020-11-09 09:46:28 +08:00
AIsland
378308e205 docs: Add instructions for enabling VM templating
Kata 2.0 uses virtio-fs as the shared_fs by default,

bug VM templating cannot be used with virtio-fs.

Fixes: #1091

Signed-off-by: AIsland <yuchunyu01@inspur.com>
2020-11-07 14:20:01 +08:00
Bo Chen
92c1c4c690 versions: Update cloud-hypervisor to release v0.11.0
The release v0.11.0 of cloud-hypervisor features the following changes:
1) Improved Linux Boot Time, 2) `SIGTERM/SIGINT` Interrupt Signal,
Handling 3) Default Log Level Changed, 4) `io_uring` support by default
for `virtio-block` (on host kernel version 5.8+), 5) Windows Guest
Support, 6) New `--balloon` Parameter Added, 7) Experimental
`virtio-watchdog` Support, 8) Bug fixes.

Fixes: #1089

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-11-06 16:19:31 -08:00
Archana Shinde
6160043c01 Merge pull request #1077 from likebreath/1103/clh_refactor_device_unplug
clh: Consolidate the code path for device unplug
2020-11-06 16:00:56 -08:00
James O. D. Hunt
b85914c960 Merge pull request #979 from jodh-intel/2.0-dev-show-ttrpc-logs
agent: Log ttrpc messages
2020-11-06 13:45:48 +00:00
James O. D. Hunt
8907a33907 agent: Only show ttrpc logs for trace log level
Only display the `ttrpc` crate log output when full logging
(trace level) is enabled.

This is a slight abuse of log levels but provides developers and testers
what they need whilst also keeping the logs relatively quiet for the
default info log level (the `ttrpc` crate logging is a bit "chatty").

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-06 10:45:05 +00:00
James O. D. Hunt
21cd7ad172 agent: Log ttrpc messages
The `ttrpc` crate uses the `log` crate for logging. But the agent uses
the `slog` crate. This means that currently, all `ttrpc` log messages
are being discarded.

Use the `slog-stdlog` create to redirect `log` crate logging calls into
`slog` so they are visible in the agents log output.

Fixes: #978.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-06 10:05:02 +00:00
James O. D. Hunt
286eebf087 agent: Add env var to set log level
Add support for a `KATA_AGENT_LOG_LEVEL` environment variable for testing.
This is the equivalent to the `agent.log=` kernel command line option.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-06 10:05:02 +00:00
James O. D. Hunt
b9c6db4bb8 agent: Add env var tests
Add some tests for the existing `KATA_AGENT_SERVER_ADDR` environment
variable feature.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-06 10:05:02 +00:00
James O. D. Hunt
705e995589 agent: Add env var comment
Add a comment stating what the server address environment variable is
for.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-06 10:05:02 +00:00
Peng Tao
c7a2b12fab Merge pull request #1086 from jodh-intel/2.0-dev-fix-annotations
annotations: Improve asset annotation handling
2020-11-06 10:29:22 +08:00
Bin Liu
a68e200462 Merge pull request #1062 from bergwolf/ro-volume
runtime: readonly volume should be bind mounted readonly on the host
2020-11-06 10:26:57 +08:00
James O. D. Hunt
5ced96e96d hypervisor: Remove unused methods
Deleted `HypervisorConfig`'s unused  `CustomFirmwareAsset()` and
`JailerAssetPath()` methods.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-05 12:15:47 +00:00
James O. D. Hunt
e82c9daec3 annotations: Improve asset annotation handling
Make `asset.go` the arbiter of asset annotations by removing all asset
annotations lists from other parts of the codebase.

This makes the code simpler, easier to maintain, and more robust.

Specifically, the previous behaviour was inconsistent as the following
ways:

- `createAssets()` in `sandbox.go` was not handling the following asset
  annotations:

    - firmware:
      - `io.katacontainers.config.hypervisor.firmware`
      - `io.katacontainers.config.hypervisor.firmware_hash`

    - hypervisor:
      - `io.katacontainers.config.hypervisor.path`
      - `io.katacontainers.config.hypervisor.hypervisor_hash`

    - hypervisor control binary:
      - `io.katacontainers.config.hypervisor.ctlpath`
      - `io.katacontainers.config.hypervisor.hypervisorctl_hash`

    - jailer:
      - `io.katacontainers.config.hypervisor.jailer_path`
      - `io.katacontainers.config.hypervisor.jailer_hash`

- `addAssetAnnotations()` in the `oci` package was not handling the
  following asset annotations:

    - hypervisor:
      - `io.katacontainers.config.hypervisor.path`
      - `io.katacontainers.config.hypervisor.hypervisor_hash`

    - hypervisor control binary:
      - `io.katacontainers.config.hypervisor.ctlpath`
      - `io.katacontainers.config.hypervisor.hypervisorctl_hash`

    - jailer:
      - `io.katacontainers.config.hypervisor.jailer_path`
      - `io.katacontainers.config.hypervisor.jailer_hash`

This change fixes the bug where specifying a custom hypervisor path via an
asset annotation was having no effect.

Fixes: #1085.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-05 12:15:42 +00:00
James O. D. Hunt
0f26f1cd6f annotations: Add missing hypervisor control annotation
Add missing annotation definitions for a hypervisor control binary:

- `io.katacontainers.config.hypervisor.ctlpath`
- `io.katacontainers.config.hypervisor.hypervisorctl_hash`

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-05 12:12:58 +00:00
James O. D. Hunt
76064e3e2d asset: Formatting, grammar and whitespace
Improve formatting, grammar and whitespace.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-05 12:12:51 +00:00
bin liu
40418f6d88 runtime: add geust memory dump
When guest panic, dump guest kernel memory to host filesystem.
And also includes:
- hypervisor config
- hypervisor version
- and state of sandbox

Fixes: #1012

Signed-off-by: bin liu <bin@hyper.sh>
2020-11-05 16:04:21 +08:00
Peng Tao
ff13bde3c1 version: revert back to crio 1.8.3
This reverts commit 87848e874e as it is
breaking the k8s configMap test.

Fixex: #1080
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-11-05 14:48:32 +08:00
Jianyong Wu
6c2fc233e2 agent: create pci root Bus Path for arm64
port https://github.com/kata-containers/agent/pull/860 here.

Fixes: #1059
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2020-11-05 12:18:09 +08:00
Peng Tao
a958eaa8d3 runtime: mount shared mountpoint readonly
bindmount remount events are not propagated through mount subtrees,
so we have to remount the shared dir mountpoint directly.

E.g.,
```
mkdir -p source dest foo source/foo

mount -o bind --make-shared source dest

mount -o bind foo source/foo
echo bind mount rw
mount | grep foo
echo remount ro
mount -o remount,bind,ro source/foo
mount | grep foo
```
would result in:
```
bind mount rw
/dev/xvda1 on /home/ubuntu/source/foo type ext4 (rw,relatime,discard,data=ordered)
/dev/xvda1 on /home/ubuntu/dest/foo type ext4 (rw,relatime,discard,data=ordered)
remount ro
/dev/xvda1 on /home/ubuntu/source/foo type ext4 (ro,relatime,discard,data=ordered)
/dev/xvda1 on /home/ubuntu/dest/foo type ext4 (rw,relatime,discard,data=ordered)
```

The reason is that bind mount creats new mount structs and attaches them to different mount subtrees.
However, MS_REMOUNT only looks for existing mount structs to modify and does not try to propagate the
change to mount structs in other subtrees.

Fixes: #1061
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-11-04 17:51:49 +08:00
Peng Tao
125e21cea3 runtime: readonly mounts should be readonly bindmount on the host
So that we get protected at the VM boundary not just the guest kernel.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-11-04 17:51:49 +08:00
Bin Liu
259589ad89 Merge pull request #1051 from yuchunyu97/patch-1
docs: Fix incorrect docs in config file
2020-11-04 17:46:21 +08:00
Bin Liu
045fc2f7a2 Merge pull request #1052 from yuchunyu97/patch-2
CI: Fix incorrect URL
2020-11-04 16:53:56 +08:00
AIsland
5f0abc20f0 CI: Fix incorrect URL
Correct the link in the GitHub action commit message check showing users how to format all commits.

Fixes: #1053

Signed-off-by: AIsland <yuchunyu01@inspur.com>
2020-11-04 10:05:20 +08:00
AIsland
b6f8a1d5af docs: Fix incorrect docs in config file
Correct the default configuration of [hypervisor.qemu] shared_fs in configuration-qemu.toml to virtio-fs in kata 2.0.

Fixes: #1054

Signed-off-by: AIsland <yuchunyu01@inspur.com>
2020-11-04 09:58:02 +08:00
Bo Chen
93d7962510 clh: Consolidate the code path for device unplug
In cloud-hypervisor, it provides a single unified way of unplugging
devices, e.g. the `/vm.RemoveDevice` HTTP API. Taking advantage of this
API, we can simplify our implementation of `hotplugRemoveDevice` in
`clh.go`, where we can consolidate similar code paths for different
device unplug (e.g. no need to implement `hotplugRemoveBlockDevice` and
`hotplugRemoveVfioDevice` separately). We will only need to retrieve the
right `deviceID` based on the type of devices, and use the single
unified HTTP API for device unplug.

Fixes: #1076

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-11-03 15:46:38 -08:00
James O. D. Hunt
43ec107d94 Merge pull request #1066 from jodh-intel/2.0-dev-update-readme
docs: Update top-level README
2020-11-03 16:05:55 +00:00
Jing Wang
18a2245986 Agent: README updates for build on ppc64le
README updates for agent build on ppc64le

  Fixes: #1069

Signed-off-by: Jing Wang <jing.wang4@ibm.com>
2020-11-03 15:29:43 +00:00
Jing Wang
655f2649b3 Agent: README updates for build on ppc64le
README updates for agent build on ppc64le

  Fixes: #1069

Signed-off-by: Jing Wang <jing.wang4@ibm.com>
2020-11-03 15:24:08 +00:00
James O. D. Hunt
62c7e09405 docs: Remove credits
Removed the packagecloud credits since we no longer produce distro
packages for Kata 2.x.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-03 11:38:59 +00:00
James O. D. Hunt
679df0fb77 docs: Update top-level README
Rework the top-level README to reflect the current use of this
repository.

Fixes: #1064.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-11-03 11:38:57 +00:00
Chelsea Mafrica
32505af7bb Merge pull request #1037 from GabyCT/topic/updatecrio
versions: Update crio version
2020-11-02 16:58:22 -08:00
Jing Wang
dfe364f885 Agent: README updates for build on ppc64le
README updates for agent build on ppc64le

Fixes: #1069

Signed-off-by: Jing Wang <jing.wang4@ibm.com>
2020-11-02 20:26:36 +00:00
Peng Tao
bf57cd844e Merge pull request #1057 from devimc/29-10-2020/clh/improveMemFoot
runtime: cloud-hypervisor: reduce memory footprint
2020-11-02 15:13:06 +08:00
Bin Liu
8823ca31ad Merge pull request #1042 from devimc/2020-10-21/unitests/sandbox.rs
agent: Improve unit test coverage for src/sandbox.rs
2020-10-30 11:26:29 +08:00
Bin Liu
7b9013f047 Merge pull request #1035 from lifupan/fix_thread_panic
rustjail: fix the issue of create thread failed causing current thread panic
2020-10-30 11:25:32 +08:00
Julio Montes
77b50969ea runtime: cloud-hypervisor: reduce memory footprint
Cloud-hypervisor supports DAX, let's enable it to reduce its memory
footprint.

Before this patch:

**19.96M**

```
20448kB -- [/usr/share/kata-containers/kata.img]
```

With this patch:

**10.83M**

```
11100kB -- [/usr/share/kata-containers/kata.img]
```

fixes #1056

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-29 14:21:57 -06:00
Julio Montes
196e8d81cf Merge pull request #1032 from devimc/2020-10-21/unitests/container.rs
Improve unit test coverage for rustjail/container.rs
2020-10-28 16:08:23 -06:00
Julio Montes
2e1a8f0ae9 agent: Improve unit test coverage for src/sandbox.rs
Improve unit test coverage for src/sandbox.rs

fixes #293

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-28 08:10:46 -06:00
Gabriela Cervantes
87848e874e versions: Update crio version
This PR updates the crio version from 1.18.3 to 1.18.4 in order to include
the fix https://github.com/cri-o/cri-o/pull/4284.

Fixes #1036

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2020-10-27 10:50:09 -06:00
fupan.lfp
172d015e1b rustjail: fix the issue of create thread failed causing thread panic
It's should catch the failed error of spawning a new thread, otherwise,
it would cause the current thread panic.

Fixes: #1034

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-10-27 23:31:34 +08:00
Julio Montes
9e93463bb6 agent/rustjail: improve unit test coverage for rustjail/container.rs
Improve unit test coverage for rustjail/container.rs

fixes #282

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-27 09:28:38 -06:00
Julio Montes
ad4f7b86f2 agent/rustjail: make mount and umount2 public
make mount and umount2 public, this way they can be
used in other files

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-27 09:28:38 -06:00
Julio Montes
926a618624 agent/rustjail: fix typo
not suppoerted  ->  not supported

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-27 09:28:38 -06:00
Julio Montes
8130d9b2dd agent/rustjail: don't use unwrap in container::oci_state
replace unwrap with `match` statements, this way we can write
unit tests that don't panic

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-27 09:28:38 -06:00
Julio Montes
5d111071be rustjail: add mock implementation for cgroup manager
Only root is able to create and manipulate cgroups, this mock
implementation of a cgroup manager can used in unit testing.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-27 09:28:38 -06:00
James O. D. Hunt
157dd041b2 Merge pull request #895 from Apokleos/dev
agent: Update build instructions
2020-10-27 14:54:51 +00:00
James O. D. Hunt
583ed55680 Merge pull request #1027 from knittl/feature/kata-subcommands-aliases
cli: Provide aliases for kata-* subcommands and options
2020-10-27 09:58:05 +00:00
Tim Zhang
d1987f392d Merge pull request #1018 from c3d/bug/1017-missing-makefile-variable
runtime: Restore QEMUVIRTIOFSPATH variable in Makefile
2020-10-27 11:33:05 +08:00
LiYa'nan
e3eff0eb15 agent: Update build instructions
Fix the instructions explaining how to build the agent from source now that make needs to be run to auto-generate some source files.

Fixes: #889.

Signed-off-by: LiYa'nan <oliverliyn@gmail.com>
2020-10-27 01:19:21 +00:00
Peng Tao
f77937de35 Merge pull request #1025 from wainersm/static_build_qemu_patches
Use apply_patches.sh in qemu and kernel scripts
2020-10-26 18:23:41 +08:00
Bin Liu
43770b28da Merge pull request #1029 from bergwolf/agent-proto
clean up agent proto files
2020-10-26 17:06:18 +08:00
Peng Tao
0896ce80a4 agent: update proto file copyright
Now that it is Ant Group...

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-26 16:03:47 +08:00
Peng Tao
6e9ca45732 agent: generate proto files properly
Need to generate all protos.

Depends-on: github.com/kata-containers/tests#3006
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-25 11:27:00 +08:00
Peng Tao
837343f08a agent-ctl: update cargo.lock
Just compiling would show that the cargo.lock file is not updated.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-25 10:58:27 +08:00
Peng Tao
b316661818 runtime: remove the unused proto files
These are moved to the agent and no longer needed.

Fixes: #1028
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-25 10:57:38 +08:00
Peng Tao
54e23c8302 agent: move gogo.proto out of the github.com namespance
To follow the same namespace scope as other proto files.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-25 10:44:53 +08:00
Peng Tao
583e6ed3e5 agent: types.pb.go is not regenerated
When types.proto was relocated, types.pb.go is not regenerated and still
references the old location.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-25 10:35:35 +08:00
Peng Tao
74a155c65b Merge pull request #1023 from lifupan/fix_stdio
agent: fixes the permissions of PID 1's STDIO
2020-10-25 10:21:11 +08:00
Daniel Knittl-Frank
bb19fcb936 docs: Update documentation with new subcommand forms
Remove the old subcommands from the documentation and replace them with
the new form (without the redundant `kata-` prefix).

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2020-10-24 15:28:54 +02:00
Daniel Knittl-Frank
d2fe709174 cli: Use new subcommand forms in kata-manager script
Update the `kata-manager` script to call the new subcommand forms
without `kata-` prefix.

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2020-10-24 15:27:32 +02:00
Daniel Knittl-Frank
4d9ab0cd21 cli: Support new subcommand forms in bash completion
Support new `check` and `env` subcommands in bash completion.

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>
2020-10-24 15:27:32 +02:00
Daniel Knittl-Frank
c5d355e1ff cli: Remove kata- prefix from env and check subcommands
Provide the subcommands `kata-env` and `kata-check` as `env` and `check`
respectively.

Fixes #1011

Signed-off-by: Daniel Knittl-Frank <knittl89+git@googlemail.com>

fixup! cli: Add aliases to kata-env and kata-check commands
2020-10-24 15:25:17 +02:00
Peng Tao
063e8bd801 Merge pull request #1010 from liubin/feature/1004-add-version-for-kata-monitor
Feature/1004 add version for kata monitor
2020-10-24 11:13:22 +08:00
Peng Tao
fba181088b Merge pull request #1021 from Tim-Zhang/autogen-proto
agent: Generate proto files programmatically
2020-10-24 11:12:24 +08:00
LiYa'nan
f134b4a301 agent: Update build instructions
Fix the instructions explaining how to build the agent from source now that make needs to be run to auto-generate some source files.

Fixes: #889

Signed-off-by: LiYa'nan <oliverliyn@gmail.com>
2020-10-24 03:06:41 +00:00
Archana Shinde
e833e3ba39 Merge pull request #1002 from jodh-intel/2.0-dev-unbreak-fc-config
runtime: Fix firecracker config
2020-10-23 13:34:27 -07:00
Tim Zhang
9e9988df29 agent/protocols: Move agent.proto out of the mock folder of agent
Because the repos have been merged and the agent repo will be removed in the future,
we do not need mock the file structure any more.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-23 15:19:35 +08:00
fupan.lfp
e90aa7b417 agent: fixes the permissions of PID 1's STDIO
Fix the permissions of PID 1's STDIO within the container to
the specified user.

The ownership needs to match because it is created outside of the
container and needs to be localized.

Fixes: #1022

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-10-23 11:06:53 +08:00
Archana Shinde
f92a4c288d Merge pull request #913 from YchauWang/update-docs-2.0-4
docs: remove the 1.x version description about shim and proxy
2020-10-22 16:52:06 -07:00
Archana Shinde
5f0b83cc54 Merge pull request #1000 from jongwu/pci
arm64: correct bridge type for QEMUVIRT
2020-10-22 13:53:27 -07:00
Wainer dos Santos Moschetta
b9b281e76d packaging: Use apply-patches.sh in build-kernel.sh
Calls apply-patches.sh in kernel/build-kernel.sh to apply the
kernel patches.

Fixes #1014

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-10-22 13:45:14 -04:00
Wainer dos Santos Moschetta
163e61045a packaging: Make qemu/apply_patches.sh common
Moved the qemu/apply_patches.sh to the common scripts directory and
refactor it so that it can be used as a generic and consistent way
to apply patches.

Fixes #1014

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-10-22 13:43:44 -04:00
Wainer dos Santos Moschetta
d4cf3057a9 packaging: qemu/apply_patches.sh should sort the patches
Changed apply_patches.sh script so that patches are sorted before
they be applied.

Fixes #1014

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-10-22 13:41:15 -04:00
bin liu
5b065eb599 runtime: change govmm package
Change govmm package name from github.com/intel/govmm
to github.com/kata-containers/govmm

Fixes: #859

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-22 21:27:49 +08:00
Tim Zhang
9cb4150777 agent/protocols: Fix copyright header checking
Caused by: bb718ba1dd

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-22 16:17:24 +08:00
Tim Zhang
0d58d91959 agent/protocols: Stop generate agent proto files in the shellscript
Because the job has been done by build.rs.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-22 16:12:21 +08:00
Tim Zhang
7559382b15 agent/protocols: Ignore generated files and remove these files from repo
Files generated by build.rs does not need to be stored in repo.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-22 16:12:21 +08:00
Tim Zhang
fdc33fb7bf agent/protocols: Generate proto files programmatically
Build proto with build.rs

Fixes: #1019

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-22 16:12:15 +08:00
Peng Tao
710a4f3b76 Merge pull request #1007 from devimc/2020-10-20/ghactions/snap-release
snap: add GH actions jobs to release the snap package
2020-10-22 11:52:21 +08:00
bin liu
f1c3bf6b58 runtime: let kata-collect-data.sh collect kata-monitor info
Collect kata-monitor version in kata-collect-data.sh

Fixes: #1004

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-22 10:32:55 +08:00
bin liu
993a8da3aa kata-monitor: add version subcommand
This add a subcommand/options for `kata-monitor` command.

Fixes: #1004

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-22 10:32:47 +08:00
Fupan Li
074b5332aa Merge pull request #977 from liubin/fix/951-clear-clippy-warnings
agent: clear clippy warnings
2020-10-22 09:36:45 +08:00
Christophe de Dinechin
4ee78120e6 runtime: Restore QEMUVIRTIOFSPATH variable in Makefile
Due to a bad edit / fixup in commit be6ee2550d, the variable
QEMUVIRTIOFSPATH was incorrectly removed from the makefile.

This problem was found by the 1.x CI checks, see
https://github.com/kata-containers/runtime/pull/3005#issuecomment-712887125

Fixes: #1017

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-21 14:35:37 +02:00
bin liu
df4ce9fab7 ci: add cargo clippy for agent
To run `cargo clippy`, this commit includes changes:

- add a new Makefile target to run `cargo clippy`
- move `make`/`make check` to last step to let a fast retrun if `fmt`/`clippy` failed

Fixes: #951

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 15:14:17 +08:00
bin liu
2e13878880 agent: clear match_like_matches_macro/vec_resize_to_zero warnings
This commit fix these warnings for Rust v1.47.0:

- match_like_matches_macro
- vec_resize_to_zero

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 15:13:25 +08:00
bin liu
227edfdc9f agent: clear module_inception/type_complexity warnings
To clear these two warnings, this commit did changes:

- add `#![allow(clippy::module_inception)]` to target files
- use type alias for tuple of `(MessageHeader, Vec<u8>)`

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 15:03:16 +08:00
bin liu
698d25b7ee agent: clear redundant_field_names clippy warning
add `#![allow(clippy::redundant_field_names)]` can skip check
`protocols` package, and fix redundant_field_names in other
packages.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 15:03:16 +08:00
bin liu
4dd9bd7aba agent: clear clippy len_zero warnings
Use `.is_empty()` instead of `.len() == 0`, `.len() >0`
and `.len() != 0`

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 15:03:16 +08:00
bin liu
bf7dec5c4f agent: clear clippy warnings
This commit clears clippy warings for agent package.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 15:03:16 +08:00
bin liu
56f867ee74 rustjail: clear clippy warnings
This commit clears clippy warings for rustjail package.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 14:54:51 +08:00
bin liu
16757ad490 oci: clear clippy warnings
This commit clears clippy warings for oci package.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 14:54:51 +08:00
bin liu
f32f49bdb7 logging: clear clippy warnings
This commit contains two changes:

- clear clippy warnigns
- add pkg/logging/Cargo.lock to .gitignore

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-21 14:54:51 +08:00
Fupan Li
8c3228c286 Merge pull request #976 from Tim-Zhang/simplify-ttrpc-error
agent: simplify ttrpc error construction
2020-10-21 10:31:40 +08:00
Julio Montes
5b079a3ba7 snap: add GH actions jobs to release the snap package
Use Github actions to build and release the snap package automatically
when a new tag is pushed.

fixes #1006

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-20 14:34:59 -05:00
Peng Tao
d80e479ca2 Merge pull request #994 from c3d/bug/993-RUNTIME_NAME
Replace @RUNTIME_NAME@ with the target in generated files
2020-10-20 17:40:45 +08:00
James O. D. Hunt
2738b18b38 runtime: Fix firecracker config
The build was setting a `FCVALIDPATHS` variable for firecracker, but
that was never being used. Conversely, the firecracker configuration
template was expecting a `FCVALIDHYPERVISORPATHS`, but that variable was
never being set.

Resolve by only setting the `FCVALIDHYPERVISORPATHS` variable to ensure
the generated firecracker config is valid once again.

Fixes: #1001.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-20 09:25:31 +01:00
James O. D. Hunt
e5d4259ab4 runtime: Simplify make variables for clh
Simplify definition of the `CLHVALIDHYPERVISORPATHS` build variable to
use the already defined `CLHPATH`.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-20 09:14:09 +01:00
Jianyong Wu
9eab301526 arm64: correct bridge type for QEMUVIRT
port forward PR https://github.com/kata-containers/runtime/pull/3017

Fixes: #3016
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2020-10-20 14:09:03 +08:00
Archana Shinde
b25645d7ae Merge pull request #997 from amshinde/2.0-update-doc
2.0 update doc for hypervisor related information
2020-10-19 16:26:33 -07:00
Archana Shinde
b88aac049d docs: Update how-to Readme with hypervisor information.
While we have setup guides for firecracker and ACRN, as these
need additional configuration, it may confuse users looking
at this guide to find mentions of just these 2 hypervisors.
Call out all the hypervisors supported with Kata here.

Fixes #996

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2020-10-19 14:03:57 -07:00
Archana Shinde
d64641174e docs: Update Readme to remove hypervisor information
The repo https://github.com/kata-containers/qemu has been
archived. We should remove this, as this is not the only
hypervisor we support now.

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2020-10-19 14:03:57 -07:00
Archana Shinde
b4f9fb513e docs: Remove docs for nemu
This hypervisor is no longer supported with Kata.
Remove related docs.

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2020-10-19 13:35:33 -07:00
Christophe de Dinechin
96a4ed7d70 Makefile: Replace @RUNTIME_NAME@ with the target in generated files
In commit 966bd57 for PR #902, the makefile was changed to automate
the replacement of user variables. However, one variable was treated
specially in the original `sed` replacements, namely `RUNTIME_NAME`
which was replaced by `$(TARGET)`.

This commit adds the `RUNTIME_NAME` variable to the makefile in order
to ensure that the replacement works correctly.

Fixes: #993

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-19 18:13:49 +02:00
Julio Montes
f162e7e960 Merge pull request #948 from justin-he/max_ports
virtcontainers: Append max_ports to virtio-serial device
2020-10-19 08:55:06 -05:00
Tim Zhang
7159fc2eda agent: simplify ttrpc error construction
Fixes: #975

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-19 13:50:30 +08:00
Bin Liu
ed711b5ab1 Merge pull request #983 from bergwolf/snap
snap: install libseccomp-dev
2020-10-17 20:58:27 +08:00
Peng Tao
0f8949868c snap: install libseccomp-dev
To build qemu with virtio-fs support.

Fixes: #982
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-17 17:44:56 +08:00
Xu Wang
80cfd01130 Merge pull request #936 from bergwolf/default-virtiofs
runtime: set virtio-fs as default fs sharing method
2020-10-17 17:28:30 +08:00
Peng Tao
9a351509d2 package: drop qemu-virtiofs shim
We have enabled qemu-virtiofs by default.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-17 11:42:56 +08:00
Peng Tao
5f47f224ff Merge pull request #945 from fidencio/wip/virtiofs-performance-improvements-backport-and-default-settings
VirtioFS: backports & default settings to improve performance
2020-10-17 11:13:22 +08:00
Peng Tao
6ed669a17b packaging: install virtiofsd for normal qemu build as well
For experimental-virtiofs, we use it to test virtiofs with DAX. Let's
rename its virtiofsd to virtiofsd-dax.

Depends-on: github.com/kata-containers/tests#2951
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-17 10:58:27 +08:00
Peng Tao
e5262b1c29 Merge pull request #970 from jodh-intel/2.0-dev-agent-ctl-add-missing-apis
tools: Make agent-ctl support more APIs
2020-10-17 10:12:40 +08:00
Jia He
da79b4be67 virtcontainers: Append max_ports to virtio-serial device
Allow API consumers to change the maximum number of ports in the
virtio-serial devices, setting a lower number of ports can improve the
boot time and reduce the attack surface.

Before this patch on arm64:
[    0.028664] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
[    0.055031] printk: console [hvc0] enabled

After this patch on arm64:
[    0.028484] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
[    0.031370] printk: console [hvc0] enabled

Fixes: #2676
Signed-off-by: Jia He <justin.he@arm.com>
2020-10-16 23:40:54 +08:00
Peng Tao
bcf4853062 runtime: enable virtiofs by default
We've been shipping it for a long time. It's time to make it default
replacing the old obsolet 9pfs.

Fixes: #935
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-16 15:58:35 +08:00
Peng Tao
0d5d69e8cd Merge pull request #902 from c3d/bug-v2/launchpad-1878234-access
Validate runtime annotations
2020-10-16 15:47:45 +08:00
Peng Tao
e0da3af40b Merge pull request #974 from egernst/kernel-bump
kernel: update to 5.4.71
2020-10-16 15:41:58 +08:00
James O. D. Hunt
e2221d34bf tools: Improve agent-ctl README
Add a summary to help understand how to use the `agent-ctl` tool.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-16 07:43:41 +01:00
Peng Tao
6f46be2f2e Merge pull request #962 from egernst/config-update
config: make virtio-fs part of standard kernel
2020-10-16 10:06:02 +08:00
Fupan Li
cacb27fe4b Merge pull request #942 from Tim-Zhang/optimize-error-handling
agent: Optimize error handling
2020-10-16 09:15:22 +08:00
Eric Ernst
2d1f2c7b95 kernel: update to 5.4.71
vsock fix was backported to 5.4 stable, so we can drop this patch.

Fixes: #973

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-15 16:21:10 -07:00
Salvador Fuentes
18553459d1 Merge pull request #961 from chavafg/topic/update-k8s-1.18
versions: Update Kubernetes, containerd, cri-o and cri-tools
2020-10-15 16:54:21 -05:00
Eric Ernst
d3c9862059 config: make virtio-fs part of standard kernel
Basic virtio-fs support has made it upstream in the Linux kernel, as
well as in QEMU and Cloud Hypervisor. Let's go ahead and add it to the
standard configuration.

Since the device driver / DAX handling is still in progress for
upstream, we will want to still build a seperate experimental kernel for
those who are comfortable trading off bleeding edge stability/kernel
updates for improved FIO numbers.

Fixes: #963

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-15 12:21:41 -07:00
James O. D. Hunt
edf02af1d4 tools: Make agent-ctl support more APIs
Added new `agent-ctl` commands to allow the following agent API calls to
be made:

- `AddARPNeighborsRequest`
- `CloseStdinRequest`
- `CopyFileRequest`
- `GetMetricsRequest`
- `GetOOMEventRequest`
- `MemHotplugByProbeRequest`
- `OnlineCPUMemRequest`
- `ReadStreamRequest`
- `ReseedRandomDevRequest`
- `SetGuestDateTimeRequest`
- `TtyWinResizeRequest`
- `UpdateContainerRequest`
- `WriteStreamRequest`

Fixes: #969.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 17:33:25 +01:00
James O. D. Hunt
5620180302 tools: Remove commented out code in agent-ctl
Remove a few lines of commented out code.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 17:33:00 +01:00
James O. D. Hunt
9bac4ee651 tools: Log request in agent-ctl tool if debug enabled
Display the API request before making the call so users can see what is
sent to the agent.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 17:32:23 +01:00
James O. D. Hunt
68821f0814 tools: Rename agent-ctl command to GetGuestDetails
Rename the `GuestDetails` command to `GetGuestDetails` to match the
actual agent API name.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 17:32:20 +01:00
James O. D. Hunt
8553f06298 tools: Fix comment in agent-ctl
Correct a comment in the agent control tool.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 16:54:34 +01:00
Julio Montes
238e4562e0 Merge pull request #965 from jodh-intel/2.0-dev-agent-fix-crashers
agent: fix crashers if API requests empty
2020-10-15 09:53:11 -05:00
Peng Tao
6612b0c4bf Merge pull request #953 from liubin/fix/952
rustjail: add length check for uid_mappings in rootless euid mapping
2020-10-15 20:42:09 +08:00
Peng Tao
bcda074e5a Merge pull request #964 from liubin/fix/957-use-regex-to-filter-kata-contaienrs
kata-monitor: use regexp to check if runtime is kata containers
2020-10-15 20:41:54 +08:00
Tim Zhang
6ba294a11e agent: remove unwrap() for e.as_errno()
Use `{:?}` to print `e.as_errno()` instead of using `{}`
to print `e.as_errno().unwrap().desc()`.

Avoid panic only caused by error's content.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 19:46:00 +08:00
Tim Zhang
e77482fe16 agent: Use ? instead of match when the error returns directly
It's more clear and more readable.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 19:45:55 +08:00
bin liu
1b7ed32836 kata-monitor: use regexp to check if runtime is kata containers
To support a few common configurations for Kata, including:

- `io.containerd.kata.v2`
- `io.containerd.kata-qemu.v2`
- `io.containerd.kata-clh.v2`

`kata-monintor` changes to use regexp instead of direct string comparison.

Fixes: #957

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-15 18:42:44 +08:00
Tim Zhang
47ff2fb9a0 agent: use anyhow context to attach context to Error instead of match
Context is clearer than match for these situations.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 18:16:54 +08:00
Tim Zhang
2f690a2bb0 agent: remove useless match
Remove useless match.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 18:16:54 +08:00
James O. D. Hunt
cb6231c1bc Merge pull request #930 from YchauWang/update-docs-2.0-6
docs: update the build kata containers kernel document
2020-10-15 11:15:25 +01:00
Tim Zhang
1d8def6663 agent: Use ok_or_else instead of match for Option -> Result
Using ok_or is clearer than match.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 18:15:14 +08:00
James O. D. Hunt
8495306641 agent: Fix crasher if AddARPNeighbors request empty
Check if the ARP neighbours specified in the `AddARPNeighbors` API is
set before using it to avoid crashing the agent.

Fixes: #955.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 11:12:40 +01:00
James O. D. Hunt
3d084c7d23 agent: Fix crasher if UpdateRoutes request empty
Check if the routes specified in the `UpdateRoutes` API is set before
using it to avoid crashing the agent.

Fixes: #949.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 11:12:38 +01:00
James O. D. Hunt
5615e5a7fe agent: Fix crasher if UpdateInterface request empty
Check if the interface specified in the `UpdateInterface` API is set
before using it to avoid crashing the agent.

Fixes: #950.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-15 11:12:35 +01:00
Tim Zhang
0dce817ebb agent: replace match Result with or_else
`or_else` is suitable for more complicated situations.
We can use it to return Ok in Err handling.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:58:51 +08:00
Tim Zhang
7bf4073d8d agent: replace unnecessary match Result with map_err
Replace `match Result` whose Ok hand is useless.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:58:43 +08:00
Tim Zhang
7f9e5913e0 agent: replace check! with map_err for readability
It's ambiguous and not easy to read to call method use macro.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:37:33 +08:00
Tim Zhang
09aca49ed7 agent: remove check! in child process because we cant' see logs.
The check macro will log the errors but the log in child process can't
be seen, just ignore it.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:33:16 +08:00
Tim Zhang
a18899f1a3 agent: refactor namespace::setup to optimize error handling
- Replace the return value with anyhow::Result.
- Remove if let Err.
- Remove match.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:28:20 +08:00
Tim Zhang
a3c64e5ce5 agent: replace if let Err with or_else
Fixes #934

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:27:10 +08:00
Tim Zhang
6ffa8283f0 agent: replace if let Err with map_err
Fixes #934

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-10-15 17:26:40 +08:00
bin liu
863f918a2c rustjail: add length check for uid_mappings in rootless euid mapping
This might be a copy miss, gid_mappings is checked twice, one should
be uid_mappings.

Fixes: #952

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-15 16:43:52 +08:00
Salvador Fuentes
720eab78bb versions: Update Kubernetes, containerd, cri-o and cri-tools
Kubernetes: from 1.17.3 to 1.18.9
CRI-O: from 0eec454168e381e460b3d6de07bf50bfd9b0d082 (1.17) to 1.18.3
Containerd: from 3a4acfbc99aa976849f51a8edd4af20ead51d8d7 (1.3.3) to 1.3.7
cri-tools: from 1.17.0 to 1.18.0

Fixes: #960.
Depends-on: github.com/kata-containers/tests#2958

Signed-off-by: Salvador Fuentes <salvador.fuentes@intel.com>
2020-10-14 18:02:37 -04:00
Eric Ernst
71be16c401 Merge pull request #933 from egernst/cgroup-updates
cgroup and cpuset fixes from 1.x
2020-10-14 08:41:52 -07:00
Peng Tao
225ed59202 Merge pull request #941 from jodh-intel/2.0-dev-update-upgrading-doc
docs: Update upgrading guide
2020-10-14 23:29:08 +08:00
Eric Ernst
8132417512 Merge pull request #947 from bergwolf/pod-updates
agent: fix panic on malformed device resource in container update
2020-10-14 08:27:10 -07:00
Christophe de Dinechin
c5771be2de annotations: Correct unit tests to validate new protections
Add the verification of some basic protections, namely that:
- EnableAnnotations is honored
- Dangerous paths cannot be modified if no match
- Errors are returned when expected

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
398d79184c annotations: Split addHypervisorOverrides to reduce complexity
Warning from gocyclo during make check:
 virtcontainers/pkg/oci/utils.go:404:1: cyclomatic complexity 37 of func `addHypervisorConfigOverrides` is high (> 30) (gocyclo)
 func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, runtime RuntimeConfig) error {
^

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
b2b3bc7ad8 annotations: Add unit test for checkPathIsInGlobs
There are a few interesting corner cases to consider for this
function.

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
6f52179ce4 annotations: Add unit test for regexpContains function
James O.D Hunt: "But also, regexpContains() and
checkPathIsInGlobList() seem like good candidates for some unit
tests. The "look" obvious, but a few boundary condition tests would be
useful I think (filenames with spaces, backslashes, special
characters, and relative & absolute paths are also an interesting
thought here)."

There aren't that many boundary conditions on a list with regexps,
if you assume the regexp match function itself works. However, the
tests is useful in documenting expectations.

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
966bd57344 makefile: Add missing generated vars to USER_VARS
This was discovered while checking a massive change in variables.
The root cause for the error is a very long list of manual
replacements, that is best replaced with a $(foreach).

All individual variables in the output configuration files were
checked against the old build using diff.

This is a forward port of a makefile fix included in
PR https://github.com/kata-containers/runtime/issues/3004
for issue https://github.com/kata-containers/runtime/issues/2943

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
be6ee2550d makefile: Improve names of config entries for annotation checks
The entries used to be things like PATH_LIST, which are too generic.
Replace them with more precise name with a distinguishing keyword,
namely VALID. For example valid_hypervisor_paths.

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
b119427405 annotations: Give better names to local variabes in search functions
Use more meaningful variable names for clarity.

Fixes: #901

Suggested-by: James O.D. Hunt james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
b5db114aad annotations: Rename checkPathIsInGlobList with checkPathIsInGlobs
The name is shorter and more specific

Fixes: #901

Suggested-by: James O.D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
d65a7d1083 config: Add better comments in the template files
When there is a default value from the code (usually empty) that
differs from a possible suggested value from the distro, then the
wording "default: empty" is confusing.

Fixes: #901

Suggested-by: Julio Montes <julio.montes@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
7c6aede5d4 config: Whitelist hypervisor annotations by name
Add a field "enable_annotations" to the runtime configuration that can
be used to whitelist annotations using a list of regular expressions,
which are used to match any part of the base annotation name, i.e. the
part after "io.katacontainers.config.hypervisor."

For example, the following configuraiton will match "virtio_fs_daemon",
"initrd" and "jailer_path", but not "path" nor "firmware":

  enable_annotations = [ "virtio.*", "initrd", "_path" ]

The default is an empty list of enabled annotations, which disables
annotations entirely.

If an anontation is rejected, the message is something like:

  annotation io.katacontainers.config.hypervisor.virtio_fs_daemon is not enabled

Fixes: #901

Suggested-by: Peng Tao <tao.peng@linux.alibaba.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
f047fced0b config: Use glob instead of regexp to match paths in annotations
When filtering annotations that correspond to paths,
e.g. hypervisor.path, it is better to use a glob syntax than a regexp
syntax, as it is more usual for paths, and prevents classes of matches
that are undesirable in our case, such as matching .. against .*

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
11b9c90cd8 annotations: Fix typo in comment
A comment talking about runtime related annotations describes them as
being related to the agent. A similar comment for the agent
annotations is missing.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
c16cdcb2a5 config: Add makefile variables for path lists
Add variables to override defaults at build time for the various lists
used to control path annotations.

Fixes: #901

Suggested-by: Fabiano Fidencio <fidencio@redhat.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
4e89b885d2 config: Protect file_mem_backend against annotation attacks
This one could theoretically be used to overwrite data on the host.
It seems somewhat less risky than the earlier ones for a number
of reasons, but worth protecting a little anyway.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
aae9656d8b config: Protect vhost_user_store_path against annotation attacks
This path could be used to overwrite data on the host.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
5588165399 config: Add security warning on configuration examples
Add the following text explaining the risk of using regular
expressions in path lists:

Each member of the list can be a regular expression, but prefer names.
Otherwise, please read and understand the following carefully.
SECURITY WARNING: If you use regular expressions, be mindful that
an attacker could craft an annotation that uses .. to escape the paths
you gave. For example, if your regexp is /bin/qemu.* then if there is
a directory named /bin/qemu.d/, then an attacker can pass an annotation
containing /bin/qemu.d/../put-any-binary-name-here and attack your host.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
b21a829c61 config: Protect ctlpath from annotation attack
This also adds annotation for ctlpath which were not present
before. It's better to implement the code consistenly right now to make
sure that we don't end up with a leaky implementation tacked on later.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
27b6620b23 config: Protect jailer_path annotation
The jailer_path annotation can be used to execute arbitrary code on
the host. Add a jailer_path_list configuration entry providing a list
of regular expressions that can be used to filter annotations that
represent valid file names.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
076690179d config: Add examples for path_list configuration
The path_list configuration gives a series of regular expressions that
limit which values are acceptable through annotations in order to
avoid kata launching arbitrary binaries on the host when receiving an
annotation.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
2d431c61c6 annotations: Simplify negative logic
Replace strange negative logic  (!ok -> continue) with positive
logic (ok -> do it)

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
2ca9ca892d config: Add hypervisor path override through annotations
The annotation is provided, so it should be respected.
Furthermore, it is important to implement it with the appropriate
protetions similar to what was done for virtiofsd.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
2e093dfd8b config: Fix typo in function name
There was an extra 'p' in addHypervisorVirtioFsOverrides.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
bf13ff0a3a config: Protect virtio_fs_daemon annotation
Sending the virtio_fs_daemon annotation can be used to execute
arbitrary code on the host. In order to prevent this, restrict the
values of the annotation to a list provided by the configuration
file.

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Christophe de Dinechin
8c75de1966 config: Add 'List' alternates for hypervisor configuration paths
Paths mentioned in the hypervisor configuration can be overriden
using annotations, which is potentially dangerous. For each path,
add a 'List' variant that specifies the list of acceptable values
from annotations.

Bug: https://bugs.launchpad.net/katacontainers.io/+bug/1878234

Fixes: #901

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-14 16:10:12 +02:00
Peng Tao
fc6468efdb agent: fix panic on malformed device resource in container update
Somehow containerd is sending a malformed device in update API. While it
should not happen, we should not panic either.

Fixes: #946
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-14 13:27:23 +08:00
Eric Ernst
d8a8fe47fb cpuset: don't set cpuset.mems in the guest
Kata doesn't map any numa topologies in the guest. Let's make sure we
clear the Cpuset fields before passing container updates to the
guest.

Note, in the future we may want to have a vCPU to guest CPU mapping and
still include the cpuset.Cpus. Until we have this support, clear this as
well.

Fixes: #932

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-13 15:54:03 -07:00
Eric Ernst
88cd712876 sandbox: consider cpusets if quota is not enforced
CPUSet cgroup allows for pinning the memory associated with a cpuset to
a given numa node. Similar to cpuset.cpus, we should take cpuset.mems
into account for the sandbox-cgroup that Kata creates.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-13 15:54:03 -07:00
Eric Ernst
77a463e57a cpuset: support setting mems for sandbox
CPUSet cgroup allows for pinning the memory associated with a cpuset to
a given numa node. Similar to cpuset.cpus, we should take cpuset.mems
into account for the sandbox-cgroup that Kata creates.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-13 15:54:03 -07:00
Eric Ernst
2d690536b8 cpuset: add cpuset pkg
Pulled from 1.18.4 Kubernetes, adding the cpuset pkg for managing
CPUSet calculations on the host. Go mod'ing the original code from
k8s.io/kubernetes was very painful, and this is very static, so let's
just pull in what we need.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-13 15:54:03 -07:00
Fabiano Fidêncio
1a9515a998 runtime: Pass --thread-pool-size=1 to virtiofsd
Dave Gilbert brough up that passing --thread-pool-size=1 to virtiofsd
may result in a performance improvement especially when using
`cache=none`. While our current default is `cache=auto`, Dave mentioned
that he seems no harm in having it set and he also mentiond that it may
use a lot less stack space on aarch/arm.

Fixes: #943

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-13 22:33:08 +02:00
Fabiano Fidêncio
1c528cd1cf packaging: Apply virtiofs performance related fixes to 5.x
Vivek Goyal found out that using "shared" thread pool, instead of
"exclusive" results in better performance.

Knowning that and with the plan to have virtio-fs as the default fs for
the 2.0, let's bring this patch in for both 5.0 and 5.1.

Fixes: #944

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-10-13 19:06:13 +02:00
James O. D. Hunt
5b5200037a docs: Update upgrading guide
Update the upgrading guide for 2.0.

Fixes: #928.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-13 14:00:13 +01:00
Fupan Li
25cdf2d728 Merge pull request #931 from dgibson/bug703
Forward port device conflict fixes from Kata 1 / Go agent
2020-10-13 15:59:17 +08:00
Ychau Wang
0e0564a55d docs: update the build kata containers kernel document
Update the build kata containers kernel document for 2.0 release. Fixed
the 1.x release project paths and urls, using the kata-containers
project file paths and urls.

Fixes: #929

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-13 15:12:53 +08:00
David Gibson
ae6b8ec747 agent/device: Check type as well as major:minor when looking up devices
To update device resource entries from host to guest, we search for
the right entry by host major:minor numbers, then later update it.
However block and character devices exist in separate major:minor
namespaces so we could have one block and one character device with
matching major:minor and thus incorrectly update both with the details
for whichever device is processed second.

Add a check on device type to prevent this.

Port from the Kata 1 Go agent
https://github.com/kata-containers/agent/commit/27ebdc9d2761

Fixes: #703

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-13 16:26:52 +11:00
David Gibson
859301b009 agent/device: Index all devices in spec before updating them
The agent needs to update device entries in the OCI spec so that it
has the correct major:minor numbers for the guest, which may differ
from the host.

Entries in the main device list are looked up by device path, but
entries in the device resources list are looked up by (host)
major:minor.  This is done one device at a time, updating as we go in
update_spec_device_list().

But since the host and guest have different namespaces, one device
might have the same major:minor as a different device on the host.  In
that case we could update one resource entry to the correct guest
values, then mistakenly update it again because it now matches a
different host device.

To avoid this, rather than looking up and updating one by one, we make
all the lookups in advance, creating a map from (host) device path to
the indices in the spec where the device and resource entries can be
found.

Port from the Go agent in Kata 1,
https://github.com/kata-containers/agent/commit/d88d46849130

Fixes: #703

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-13 16:26:26 +11:00
David Gibson
2477c355bc agent/device: Forward port update_spec_device_list() unit test
The Kata 1 Go agent included a unit test for updateSpecDeviceList, but no
such unit test exists for the Rust agent's equivalent
update_spec_device_list().  Port the Kata1 test to Rust.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-13 16:25:58 +11:00
David Gibson
08d80c1aaa agent/device: update_spec_device_list() should error if dev not found
If update_spec_device_list() is given a device that can't be found in the
OCI spec, it currently does nothing, and returns Ok(()).  That doesn't
seem like what we'd expect and is not what the Go agent in Kata 1 does.

Change it to return an error in that case, like Kata 1.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-10-13 16:25:36 +11:00
Eric Ernst
12cc0ee168 sandbox: don't constrain cpus, mem only cpuset, devices
Allow for constraining the cpuset as well as the devices-whitelist . Revert
sandbox constraints for cpu/memory, as they break the K8S use case. Can
re-add behind a non-default flag in the future.

The sandbox CPUSet should be updated every time a container is created,
updated, or removed.

To facilitate this without rewriting the 'non constrained cgroup'
handling, let's add to the Sandbox's cgroupsUpdate function.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-12 21:31:27 -07:00
Eric Ernst
b6cf68a985 cgroups: add ability to update CPUSet
Add function for applying a cpuset change to a cgroup

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-12 21:31:27 -07:00
Eric Ernst
b812d4f7fa virtcontainers: add method for calculating cpuset for sandbox
Calculate sandbox's CPUSet as the union of each of the container's
CPUSets.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-12 21:31:27 -07:00
Peng Tao
c88820454d Merge pull request #739 from jodh-intel/2.0-dev-containerd-install-docs
docs: Add containerd install guide
2020-10-13 11:48:56 +08:00
Peng Tao
16a6427ca9 Merge pull request #923 from liubin/fix/simplify-codes
agent: simplify codes
2020-10-13 09:54:46 +08:00
Eric Ernst
2e72972cd7 Merge pull request #910 from egernst/fix-parsing
agent: fix errorneous parsing for guest block size
2020-10-12 12:40:02 -07:00
Eric Ernst
f63f740545 agent: fix errorneous parsing for guest block size
We were assuming base 10 string before, when the block size from sysfs
is actually a hex string. Let's fix that.

Fixes: #908

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-12 11:18:39 -07:00
James O. D. Hunt
43d70a32df docs: Add containerd install guide
Create a containerd installation guide and a new `kata-manager` script
for 2.0 that automated the steps outlined in the guide.

Also cleaned up and improved the installation documentation in various
ways, the most significant being:

- Added legacy install link for 1.x installs.
- Official packages section:
  - Removed "Contact" column (since it was empty!)
  - Reworded "Versions" column to clarify the versions are a minimum
    (to reduce maintenance burden).
  - Add a column to show which installation methods receive automatic updates.
  - Modified order of installation options in table and document to
    de-emphasise automatic installation and promote official packages
    and snap more.
- Removed sections no longer relevant for 2.0.

Fixes: #738.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-12 17:54:53 +01:00
Fupan Li
27634982f7 Merge pull request #915 from liubin/fix/914-use-macro-to-simplify-codes
agent: use macro to simplify parse_cmdline function in config.rs
2020-10-12 22:23:30 +08:00
bin liu
11c1ab8bca agent: use ok_or/map_err instead of match
Sometimes `Option.or_or` and `Result.map_err` may be simpler
than match statement. Especially in rpc.rs, there are
many `ctr.get_process` and `sandbox.get_container` which
are using `match`.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-12 16:59:02 +08:00
bin liu
6b9f99156e rustjail: use Iterator to manipulate vector elements
Use Iterator can save codes, and make code more readable

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-12 14:26:33 +08:00
Ychau Wang
a7251651f8 docs: remove the 1.x version description about shim and proxy
Remove the build in shim and proxy desgin description from the
kata-api-design.md file.

Fixes: #912

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-10-12 14:11:18 +08:00
bin liu
dc1442c33a rustjail: delete codes commented out
There are some uses/codes/struct fields are commented out, and
may not turn into  un-comment these codes, so delete these comments.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-12 12:29:23 +08:00
bin liu
aa04111d9f rustjail: delete unused test code
The auto generated test code is no meanings, delete it.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-12 10:23:22 +08:00
Fupan Li
7470055cdc Merge pull request #898 from bergwolf/arm-musl
fix arm CI
2020-10-12 10:03:45 +08:00
Bin Liu
490f030e2a Merge pull request #917 from bergwolf/fix-cloud_hypervisor_repo
packaging: fix missing cloud_hypervisor_repo
2020-10-12 09:50:30 +08:00
Peng Tao
4f3206ac7e Merge pull request #888 from liubin/881-add-crictl-examples
docs: Add crictl example json files
2020-10-11 12:07:18 +08:00
bin liu
eae685dc53 agent: use chain of Result to avoid early return
Use rust `Result`'s `or_else`/`and_then` can write clean codes.
And can avoid early return by check wether the `Result`
is `Ok` or `Err`.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 22:22:54 +08:00
bin liu
5e3d1fb60b agent: add blank lines between methods
In rpc.rs, there are no blank lines between methods, this commit
add blank lines for these methods.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 12:37:34 +00:00
bin liu
980e48ca94 agent: delete unused field in agentService
The code is for test, and not needed now.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 12:23:44 +00:00
bin liu
52b821fa5f agent: use no-named closure to reduce codes
For simple closures, inline closures can save codes.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 20:10:16 +08:00
Peng Tao
82e9450124 packaging: fix cloud-hypervisor binary path
1. ensure build-static-clh.sh puts cloud-hypervisor under ./cloud-hypervisor directory
2. install cloud-hypervisor/cloud-hypervisor binary

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-10 20:10:03 +08:00
bin liu
b1f95e8d27 agent: use a local fn to reduce duplicated codes
The same codes used twices, aggregated into a function can
reduce codes.

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 19:55:05 +08:00
Peng Tao
154a356ad4 packaging: apply qemu v5.1 stable fixes
Qemu v5.1 was released with an affending commit 9b3a35ec82
(virtio: verify that legacy support is not accidentally on).
As a result, it breaks commandline compatiblilities for old qemu
users. Upstream qemu has fixed it but no release has been put out yet.
Let's apply these fixes by hand for now.

Refs: https://www.mail-archive.com/qemu-devel@nongnu.org/msg729556.html

Depends-on: github.com/kata-containers/tests#2945
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-10 18:29:23 +08:00
Jianyong Wu
c781a80820 agent: fix aarch64 build
aarch64 needs libgcc to resolve some non-builtin symbols.

Fixes: #909
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-10 18:29:23 +08:00
bin liu
906b38441c agent: update not accurate comments
This commit includes:
- update comments that not matched the function name
- file path with doubled slash

Fixes: #922

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 17:57:13 +08:00
Peng Tao
78318c18f3 packaging: fix missing cloud_hypervisor_repo
It is needed in order to build from source.

Fixes: #916
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-10 15:47:41 +08:00
bin liu
b7309943af agent: use macro to simplify parse_cmdline function in config.rs
In function parse_cmdline there are some similar codes, if we want
to add more commandline arguments, the code will grow too long.
Use macro can reduce some codes with the same logic/processing.

Fixes: #914

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 15:20:47 +08:00
bin liu
9834a766aa docs: add namespace key to pod/container config files
If no namespace field in config files, CRI-O will failed:
 setting pod sandbox name and id: cannot generate pod name without namespace

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-10 10:02:08 +08:00
Julio Montes
291257c0df Merge pull request #897 from devimc/2020-10-09/snapCI
ci: snap: add event filtering
2020-10-09 15:28:57 -05:00
Julio Montes
37e7de72a5 ci: snap: add event filtering
Run the snap CI on every PR is not needed. Don't run the snap CI
on PRs that don't change the source code (*.go/*.rs), a configuration
file or Makefile.

fixes #896

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-09 09:48:28 -05:00
Julio Montes
4f0fe8473b Merge pull request #886 from bergwolf/CVE-2019-19921
agent: do not follow link when mounting container proc and sysfs
2020-10-09 09:47:30 -05:00
bin liu
9a02e6eb88 docs: Add crictl example json files
Add basic sample pod/container config files to show
how to use `crictl` with Kata containers.

Fixes: #881

Signed-off-by: bin liu <bin@hyper.sh>
2020-10-09 21:03:22 +08:00
Bin Liu
fd7d0ef999 Merge pull request #884 from bergwolf/cargo-lock
agent-ctl: include cargo lock updates
2020-10-09 20:23:19 +08:00
Fupan Li
3a659a6733 Merge pull request #891 from bergwolf/CVE-2016-9962
agent: set init process non-dumpable
2020-10-09 19:03:24 +08:00
Peng Tao
b7147edadb agent: do not follow link when mounting container proc and sysfs
Attackers might use it to explore other containers in the same pod.
While it is still safe to allow it, we can just close the race window
like runc does.

Fixes: #885
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-09 18:54:26 +08:00
Bin Liu
43da14e7b3 Merge pull request #752 from YchauWang/clear-moke-code01
runtime: Clear the VCMock 1.x API Methods from 2.0
2020-10-09 17:41:21 +08:00
Peng Tao
15b7156348 agent: set init process non-dumpable
On old kernels (like v4.9), kernel applies CLOECEC in wrong order w.r.t.
dumpable task flags. As a result, we might leak guest file descriptor to
containers. This is a former runc CVE-2016-9962 and still applies to
kata agent. Although Kata container is still valid at protecting the
host, we should not leak extra resources to user containers.

This sets the init processes that join and setup the container's
namespaces as non-dumpable before they setns to the container's pid (or
any other ) namespace.

This settings is automatically reset to the default after the Exec in
the container so that it does not change functionality for the
applications that are running inside, just our init processes.

This prevents parent processes, the pid 1 of the container, to ptrace
the init process before it drops caps and other sets LSMs.

The order during the exec syscall is that the process is set back to
dumpable before O_CLOEXEC are processed.

Refs:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=613cc2b6f272c1a8ad33aefa21cad77af23139f7
https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
opencontainers/runc@50a19c6
https://nvd.nist.gov/vuln/detail/CVE-2016-9962

Fixes: #890
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-09 17:12:06 +08:00
Peng Tao
3f8e619c2f Merge pull request #876 from jcvenegas/dax-off
virtiofs: Disable DAX
2020-10-09 13:39:42 +08:00
Peng Tao
83d80872cc Merge pull request #546 from amshinde/2.0-agent-debug
docs: Update docs for enabling agent debug console
2020-10-09 10:54:54 +08:00
Peng Tao
00ad3fd308 agent-ctl: include cargo lock updates
Simply running `make` would generate some cargo lock updates for
agent-ctl. Let's include them so that we have fixed dependencies.

Fixes: #883
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-09 10:50:45 +08:00
Peng Tao
367e436ff8 Merge pull request #795 from c3d/bug/750-warnings
Remove compilation warnings
2020-10-09 10:26:08 +08:00
Peng Tao
a82237a810 Merge pull request #874 from rhafer/fix-873
osbuilder: Create target directory for agent
2020-10-09 10:22:55 +08:00
Julio Montes
441026b877 Merge pull request #880 from devimc/2020-10-08/versions/sriov-network-device-plugin
versions: add plugins section
2020-10-08 14:47:10 -05:00
Julio Montes
8cd62d7bdd versions: add plugins section
plugins sections contains the details of plugins required for
the components or testing.

Add sriov-network-device-plugin url and version that are consumed
by the VFIO test in the tests repository.

fixes #879

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-08 13:01:01 -05:00
Julio Montes
2712e31c3e Merge pull request #878 from devimc/2020-10-01/snapCI
snap: specify python version
2020-10-08 12:59:19 -05:00
Jose Carlos Venegas Munoz
c4472481bc virtiofs: Disable DAX
virtiofs DAX support is not stable today, there are
a few corner cases to make it default.

Fixes: #862
Fixes: #875

Signed-off-by: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com>
2020-10-08 10:59:10 -05:00
Julio Montes
3e56de8101 snap: specify python version
In order to avoid `unmet dependencies` error in the CI,
the python version must be specified in the yaml.

fixes #877

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-08 10:53:20 -05:00
Ralf Haferkamp
e3cdc89b6c osbuilder: Create target directory for agent
When building with AGENT_SOURCE_BIN pointing to an already built
kata-agent binary, the target directory needs to be created in the
rootfs tree.

Fixes #873

Signed-off-by: Ralf Haferkamp <rhafer@suse.com>
2020-10-08 17:07:40 +02:00
Bo Chen
b9205cae1c Merge pull request #870 from egernst/fixup
packaging: fix image build script
2020-10-07 13:25:13 -07:00
Eric Ernst
7cad865d13 packaging: fix image build script
There were a couple of issues with the build-scripts discovered while
doing release:
 - Relative paths are error prone. Fix error.
 - short_commit_length is used to truncate sha for commits when
 appending agent version to resulting files. Before this was
 in pkglib.sh, which is otherwise an unused file from when we
 supported OBS. Add this define to lib.sh, which is sourced by
 the applicable packaging scripts.

There's plenty of room for improvement, but these fixes make the
existing scripts functional again.

Fixes: #871

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-07 09:09:31 -07:00
Christophe de Dinechin
0e898c6bc4 rust-agent: Treat warnings as error
Avoid the accumulation of warnings we had, as reported in #750.

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:30:21 +02:00
Christophe de Dinechin
0e4baaabcc rust-agent: Identify unused results in tests
Assign unused results to _ in order to silence warnings.

This addresses the following warnings:

    warning: unused `std::result::Result` that must be used
        --> rustjail/src/mount.rs:1182:16
         |
    1182 |         defer!(unistd::chdir(&olddir););
         |                ^^^^^^^^^^^^^^^^^^^^^^^
         |
         = note: `#[warn(unused_must_use)]` on by default
         = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
        --> rustjail/src/mount.rs:1183:9
         |
    1183 |         unistd::chdir(tempdir.path());
         |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         |
         = note: this `Result` may be an `Err` variant, which should be handled

While in regular code, we want to log possible errors, in test code
it's OK to simply ignore the returned value.

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:30:13 +02:00
Christophe de Dinechin
5b2b565249 rust-agent: Log returned errors rather than ignore them
In a number of cases, we have functions that return a Result<...>
and where the possible error case is simply ignored. This is a bit
unhealthy.

Add a `check!` macro that allows us to not ignore error values
that we want to log, while not interrupting the flow by returning
them. This is useful for low-level functions such as `signal::kill` or
`unistd::close` where an error is probably significant, but should not
necessarily interrupt the flow of the program (i.e. using `call()?` is
not the right answer.

The check! macro is then used on low-level calls. This addresses the
following warnings from #750:

This addresses the following warning:

    warning: unused `std::result::Result` that must be used
       --> /home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail/src/container.rs:903:17
        |
    903 |                 signal::kill(Pid::from_raw(p.pid), Some(Signal::SIGKILL));
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> /home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail/src/container.rs:916:17
        |
    916 |                 signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL));
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:340:13
        |
    340 |             write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:554:13
        |
    554 | /             write_sync(
    555 | |                 cwfd,
    556 | |                 SYNC_FAILED,
    557 | |                 format!("setgroups failed: {:?}", e).as_str(),
    558 | |             );
        | |______________^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:340:13
        |
    340 |             write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:340:13
        |
    340 |             write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str());
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:554:13
        |
    554 | /             write_sync(
    555 | |                 cwfd,
    556 | |                 SYNC_FAILED,
    557 | |                 format!("setgroups failed: {:?}", e).as_str(),
    558 | |             );
        | |______________^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:626:5
        |
    626 |     unistd::close(cfd_log);
        |     ^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:627:5
        |
    627 |     unistd::close(crfd);
        |     ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:628:5
        |
    628 |     unistd::close(cwfd);
        |     ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:770:9
        |
    770 |         fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:799:9
        |
    799 |         fcntl::fcntl(prfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:800:9
        |
    800 |         fcntl::fcntl(pwfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:803:13
        |
    803 |             unistd::close(prfd);
        |             ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:930:9
        |
    930 |         log_handler.join();
        |         ^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:803:13
        |
    803 |             unistd::close(prfd);
        |             ^^^^^^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_must_use)]` on by default
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:804:13
        |
    804 |             unistd::close(pwfd);
        |             ^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:842:13
        |
    842 |             sched::setns(old_pid_ns, CloneFlags::CLONE_NEWPID);
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/container.rs:843:13
        |
    843 |             unistd::close(old_pid_ns);
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

Fixes: #844
Fixes: #750

Suggested-by: Tim Zhang <tim@hyper.sh>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
d617caf1b5 rust-agent: Remove unused imports
This addresses the following warnings (and similar ones)::

    Compiling rustjail v0.1.0 (/home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail)
    warning: unused import: `debug`
      --> rustjail/src/container.rs:57:12
       |
    57 | use slog::{debug, info, o, Logger};
       |            ^^^^^

    warning: unused imports: `AddressFamily`, `SockFlag`, `SockType`, `self`
      --> rustjail/src/process.rs:18:24
       |
    18 | use nix::sys::socket::{self, AddressFamily, SockFlag, SockType};
       |                        ^^^^  ^^^^^^^^^^^^^  ^^^^^^^^  ^^^^^^^^

    warning: unused import: `nix::Error`
      --> rustjail/src/process.rs:23:5
       |
    23 | use nix::Error;
       |     ^^^^^^^^^^

    warning: unused import: `protobuf::RepeatedField`
      --> rustjail/src/validator.rs:11:5
       |
    11 | use protobuf::RepeatedField;
       |     ^^^^^^^^^^^^^^^^^^^^^^^

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
ee739c5d59 rust-agent: Report errors to caller if possible
Various recently added error-causing calls

This addresses the following warning:

    warning: unused `std::result::Result` that must be used
      --> rustjail/src/cgroups/fs/mod.rs:93:9
       |
    93 |         cg.add_task(CgroupPid::from(pid as u64));
       |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       |
       = note: `#[warn(unused_must_use)]` on by default
       = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:196:17
        |
    196 |                 freezer_controller.thaw();
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:199:17
        |
    199 |                 freezer_controller.freeze();
        |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:365:9
        |
    365 |         cpuset_controller.set_cpus(&cpu.cpus);
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:369:9
        |
    369 |         cpuset_controller.set_mems(&cpu.mems);
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:381:13
        |
    381 |             cpu_controller.set_shares(shares);
        |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/cgroups/fs/mod.rs:385:5
        |
    385 |     cpu_controller.set_cfs_quota_and_period(cpu.quota, cpu.period);
        |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
        = note: this `Result` may be an `Err` variant, which should be handled

    warning: unused `std::result::Result` that must be used
        --> rustjail/src/cgroups/fs/mod.rs:1061:13
         |
    1061 |             cpuset_controller.set_cpus(cpuset_cpus);
         |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         |
         = note: this `Result` may be an `Err` variant, which should be handled

The specific case of cpu_controller.set_cfs_quota_and_period is
addressed in a way that changes the logic following a suggestion by
Liu Bin, who had just added the code.

Fixes: #750

Suggested-by: Liu Bin <bin@hyper.sh>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
d5b492a1e7 rust-agent: Ignore write errors while writing to the logs
When we are writing to the logs and there is an error doing so, there
is not much we can do. Chances are that a panic would make things
worse. So let it go through.

    warning: unused `std::result::Result` that must be used
       --> rustjail/src/sync.rs:26:9
        |
    26  |         write_count(lfd, log_str.as_bytes(), log_str.len());
        |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |
       ::: rustjail/src/container.rs:339:13
        |
    339 |             log_child!(cfd_log, "child exit: {:?}", e);
        |             ------------------------------------------- in this macro invocation
        |
        = note: this `Result` may be an `Err` variant, which should be handled
        = note: this warning originates in a macro (in Nightly builds, run with -Z macro-backtrace for more info)

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
c635c46a4b rust-agent: Remove unused code that has undefined behavior
Some functions have undefined behavior and are not actually used.

This addresses the following warning:
    warning: the type `oci::User` does not permit zero-initialization
      --> rustjail/src/lib.rs:99:18
       |
    99 |         unsafe { MaybeUninit::zeroed().assume_init() }
       |                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       |                  |
       |                  this code causes undefined behavior when executed
       |                  help: use `MaybeUninit<T>` instead, and only call `assume_init` after initialization is done
       |
       = note: `#[warn(invalid_value)]` on by default
    note: `std::ptr::Unique<u32>` must be non-null (in this struct field)

    warning: the type `protocols::oci::Process` does not permit zero-initialization
       --> rustjail/src/lib.rs:146:14
        |
    146 |     unsafe { MaybeUninit::zeroed().assume_init() }
        |              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        |              |
        |              this code causes undefined behavior when executed
        |              help: use `MaybeUninit<T>` instead, and only call `assume_init` after initialization is done
        |
    note: `std::ptr::Unique<std::string::String>` must be non-null (in this struct field)

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
ec24f688ed rust-agent: Remove 'mut' where not needed
Addresses the following warning (and a few similar ones):
    warning: variable does not need to be mutable
       --> rustjail/src/container.rs:369:9
        |
    369 |     let mut oci_process: oci::Process = serde_json::from_str(process_str)?;
        |         ----^^^^^^^^^^^
        |         |
        |         help: remove this `mut`
        |
        = note: `#[warn(unused_mut)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
c8f406d4c4 rust-agent: Remove uses of deprecated functions
This addresses the following:

    warning: use of deprecated item 'std::error::Error::description': use the Display impl or to_string()
        --> rustjail/src/container.rs:1598:31
         |
    1598 | ...                   e.description(),
         |                         ^^^^^^^^^^^
         |
         = note: `#[warn(deprecated)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
f832d8a651 rust-agent: Remove or rename unused parameters
Parameters that are never used were removed.
Parameters that are unused, but necessary because of some common
interface were renamed with a _ prefix.
In one case, consume the parameter by adding an info! call, and fix a
minor typo in a message in the same function.

This addresses the following warning:

    warning: unused variable: `child`
        --> rustjail/src/container.rs:1128:5
         |
    1128 |     child: &mut Child,
         |     ^^^^^ help: if this is intentional, prefix it with an underscore: `_child`

    warning: unused variable: `logger`
        --> rustjail/src/container.rs:1049:22
         |
    1049 | fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> {
         |                      ^^^^^^ help: if this is intentional, prefix it with an underscore: `_logger`

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:46 +02:00
Christophe de Dinechin
5a1d331135 rust-agent: Remove or rename unused variables
Remove variables that are simply not used.
Rename as _ variables where only initialization matters.

This addresses the following warnings:

    warning: unused variable: `writer`
       --> src/main.rs:130:9
        |
    130 |     let writer = unsafe { File::from_raw_fd(wfd) };
        |         ^^^^^^ help: if this is intentional, prefix it with an underscore: `_writer`
        |
        = note: `#[warn(unused_variables)]` on by default

    warning: unused variable: `ctx`
       --> src/rpc.rs:782:9
        |
    782 |         ctx: &ttrpc::TtrpcContext,
        |         ^^^ help: if this is intentional, prefix it with an underscore: `_ctx`

    warning: unused variable: `ctx`
       --> src/rpc.rs:808:9
        |
    808 |         ctx: &ttrpc::TtrpcContext,
        |         ^^^ help: if this is intentional, prefix it with an underscore: `_ctx`

    warning: unused variable: `dns_list`
        --> src/rpc.rs:1152:16
         |
    1152 |             Ok(dns_list) => {
         |                ^^^^^^^^ help: if this is intentional, prefix it with an underscore: `_dns_list`

    warning: value assigned to `child_stdin` is never read
       --> rustjail/src/container.rs:807:13
        |
    807 |         let mut child_stdin = std::process::Stdio::null();
        |             ^^^^^^^^^^^^^^^
        |
        = note: `#[warn(unused_assignments)]` on by default
        = help: maybe it is overwritten before being read?

    warning: value assigned to `child_stdout` is never read
       --> rustjail/src/container.rs:808:13
        |
    808 |         let mut child_stdout = std::process::Stdio::null();
        |             ^^^^^^^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `child_stderr` is never read
       --> rustjail/src/container.rs:809:13
        |
    809 |         let mut child_stderr = std::process::Stdio::null();
        |             ^^^^^^^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `stdin` is never read
       --> rustjail/src/container.rs:810:13
        |
    810 |         let mut stdin = -1;
        |             ^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `stdout` is never read
       --> rustjail/src/container.rs:811:13
        |
    811 |         let mut stdout = -1;
        |             ^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

    warning: value assigned to `stderr` is never read
       --> rustjail/src/container.rs:812:13
        |
    812 |         let mut stderr = -1;
        |             ^^^^^^^^^^
        |
        = help: maybe it is overwritten before being read?

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 17:29:30 +02:00
Christophe de Dinechin
27efe291c0 rust-agent: Remove unused functions
Fixes the following warning:

   Compiling logging v0.1.0 (/home/ddd/go/src/github.com/kata-containers-2.0/pkg/logging)
   warning: associated function is never used: `set_level`
      --> /home/ddd/go/src/github.com/kata-containers-2.0/pkg/logging/src/lib.rs:186:8
       |
   186 |     fn set_level(&self, level: slog::Level) {
       |        ^^^^^^^^^
       |
       = note: `#[warn(dead_code)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 13:39:26 +02:00
Christophe de Dinechin
d76ece0cf3 rust-agent: Remove useless braces
This addresses the following warning:

    warning: unnecessary braces around assigned value
        --> src/rpc.rs:1411:26
         |
    1411 |     detail.init_daemon = { unistd::getpid() == Pid::from_raw(1) };
         |                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: remove these braces
         |
         = note: `#[warn(unused_braces)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 13:39:26 +02:00
Christophe de Dinechin
3682812e57 rust-agent: Remove unused macros
This addresses the following warnings:

   Compiling rustjail v0.1.0 (/home/ddd/go/src/github.com/kata-containers-2.0/src/agent/rustjail)
   warning: unused `#[macro_use]` import
     --> rustjail/src/lib.rs:15:1
      |
   15 | #[macro_use]
      | ^^^^^^^^^^^^
      |
      = note: `#[warn(unused_imports)]` on by default

   warning: unused macro definition
     --> rustjail/src/lib.rs:38:1
      |
   38 | / macro_rules! sl {
   39 | |     () => {
   40 | |         slog_scope::logger().new(o!("subsystem" => "rustjail"))
   41 | |     };
   42 | | }
      | |_^
      |
      = note: `#[warn(unused_macros)]` on by default

Fixes: #750

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-10-07 13:39:26 +02:00
Eric Ernst
cf26ac0d28 Merge pull request #867 from egernst/main-packaging-fixups
Main packaging fixups
2020-10-06 16:22:44 -07:00
Eric Ernst
483209bf49 actions: add kata deploy test
Pull over kata-deploy-test from the 1.x packaging repository. This is
intended to be used for testing any changes to the kata-deploy
scripting, and does not exercise any new source code changes.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 13:44:03 -07:00
Eric Ernst
0793002464 packaging: cleaning, updating based on new filepaths
Update scripts to take into account some files being moved, and some
general cleanup.

Fixes: #866

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 13:44:03 -07:00
Eric Ernst
f0f205cd7b packaging: remove obs-packaging
No longer required -- let's remove them.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 13:44:03 -07:00
Eric Ernst
4b1753c53f packaging: pull versions, build-image out from obs dir
These are still required; let's pull them out.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 13:44:03 -07:00
Eric Ernst
3f6cd4d5f7 packaging: Revert "packaging: Stop providing OBS packages"
This reverts commit c0ea910273.

Two scripts are still required for release and testing, which should
have never been under obs-packaging dir in the first place.  Let's
revert, move the scripts / update references to it, and then we can
remove the remaining obs-packaging/ tooling.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-06 13:43:53 -07:00
James O. D. Hunt
13e260a864 Merge pull request #861 from likebreath/clh_vfio_unplug
clh: Support VFIO device unplug
2020-10-06 09:47:19 +01:00
Julio Montes
4657983d80 Merge pull request #839 from devimc/2020-10-01/snapCI
ci: add github action to test the snap
2020-10-05 15:49:30 -05:00
Bo Chen
c33ee54a21 clh: Support VFIO device unplug
This patch adds the support of VFIO device unplug when using
cloud-hypervisor.

Fixes: #860

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-05 12:20:13 -07:00
Bo Chen
1f4dfa3166 clh: Remove unnecessary VmmPing
We can rely on the error handling of the actual HTTP API calls to catch
errors, and don't need to call VmmPing explicitly in advance.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-05 12:17:45 -07:00
Bo Chen
cc80ae0afb versions: cloud-hypervisor: Bump to version 6d30fe05
The cloud-hypervisor commit `6d30fe05` introduced a fix on its API for
VFIO device hotplug (`VmAddDevice`), which is required for supporting
VFIO unplug through openAPI calls in kata.

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-10-05 12:13:36 -07:00
Eric Ernst
2929250b61 Merge pull request #798 from cmaf/architecture-tap
docs: update networking description
2020-10-05 08:42:19 -07:00
Chelsea Mafrica
0fec7a4d73 docs: Change kata_tap0 to tap0_kata
Tap device's should be tap0_kata for architecture.md

Fixes #797

Signed-off-by: duanquanfeng <duanquanfeng_yewu@cmss.chinamobile.com>
Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2020-10-04 16:55:50 -07:00
Chelsea Mafrica
3394a6a549 docs: update networking description
First, most people don't care about CNM. Move that out of main doc.

Second, tc-filter is the default. Let's add a bit more background on
our usage of tc-filter (and clarify why we use this instead of macvtap).

Fixes #797

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2020-10-04 16:55:50 -07:00
Eric Ernst
62b0522b33 Merge pull request #852 from egernst/dev-guide
docs: update dev guide for agent build
2020-10-02 17:13:52 -07:00
Eric Ernst
2e83f40576 dev-guide: update kata-agent install details
Install paths were wrong. Updated based on new agent...

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-02 16:17:36 -07:00
Archana Shinde
ffea705a27 docs: Update docs for enabling agent debug console
The systemd method of adding a debug console is not really
user friendly. Since we have added a much more straightforward
method to enable agent debug console, update developer guide to
reflect this.

Fixes #834

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2020-10-02 16:09:22 -07:00
Eric Ernst
777f398184 docs: update dev guide for agent build
Include details on setting up rust.

Fixes: #851

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2020-10-02 15:38:07 -07:00
Julio Montes
aa8eefd813 ci: add github action to test the snap
Add github action to test that the snap package was generated
correctly, this CI don't test the snap, it just build it.

fixes #838

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-01 14:34:36 -05:00
James O. D. Hunt
1d5eab42d6 Merge pull request #833 from bergwolf/197
rust-agent: Update README
2020-10-01 17:26:21 +01:00
James O. D. Hunt
2133f3b742 Merge pull request #832 from bergwolf/arch
docs: update architecture.md
2020-10-01 17:26:16 +01:00
Julio Montes
c2bf8a0f37 Merge pull request #484 from devimc/2020-07-31/SGX
runtime: add support for SGX
2020-10-01 11:08:11 -05:00
Julio Montes
3fec031100 Merge pull request #817 from edmond-hk/qemu
version: upgrade qemu version to v5.1.0 for arm64
2020-10-01 10:31:19 -05:00
James O. D. Hunt
64c3f8bbe0 Merge pull request #686 from jodh-intel/2.0-dev-fix-windows-network-typo
agent: Fix OCI Windows network shared container name typo
2020-10-01 14:25:49 +01:00
James O. D. Hunt
05286d6b1f Merge pull request #731 from jodh-intel/rm-issue-template
github: Remove issue template and use central one
2020-10-01 14:25:40 +01:00
Julio Montes
ea1cb37b59 versions: cloud-hypervisor: bump version
Use commit c54452c08a467a3e35d8d72f2a91d424e9718c57 as
version for cloud-hypervisor.
Bring openapi fix cloud-hypervisor/cloud-hypervisor#1760 to
support SGX.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-01 08:24:29 -05:00
Julio Montes
0ebffdf29f runtime: cloud-hypervisor: tag openapi-generator-cli container
Tag openapi-generator-cli container to v4.3.1 that is the latest
stable, this way we can have reproducible builds and the same
generated code in all the systems

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-01 08:24:29 -05:00
Julio Montes
e51a1ea346 docs: use-cases: Add Intel SGX use case
Document how to use Intel SGX with Kata.

fixes kata-containers/documentation#697

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-01 08:24:29 -05:00
Julio Montes
7d63823141 runtime/vendor: add k8s.io/apimachinery/pkg/api/resource
k8s.io/apimachinery/pkg/api/resource is a memory quantities parser,
we use it to parse the SGX EPC size defined by the `sgx.intel.com/epc`
annotation

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-01 08:24:29 -05:00
Julio Montes
6df165c19d runtime: add support for SGX
Support the `sgx.intel.com/epc` annotation that is defined by the intel
k8s plugin. This annotation enables SGX. Hardware-based isolation and
memory encryption.

For example, use `sgx.intel.com/epc = "64Mi"` to create a container
with 1 EPC section with pre-allocated memory.

At the time of writing this patch, SGX patches have not landed on the
linux kernel project.
The following github kernel fork contains all the SGX patches for the
host and guest: https://github.com/intel/kvm-sgx

fixes #483

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-10-01 08:24:29 -05:00
Peng Tao
a5b3e1cdfd docs: drop docker installation guide
We have removed cli support and that means dockder support is dropped
for now. Also it doesn't make sense to have so many duplications on each
distribution as we can simply refer to the official docker guide on how
to install docker.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-01 20:52:10 +08:00
Peng Tao
6c4300c65f docs: fix static check errors in docs/install/README.md
It was merged in while the static checker is disabled.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-01 20:52:10 +08:00
Peng Tao
59224a76b4 docs: update architecture.md
To match the current architecture of Kata Containers 2.0.

Fixes: #831
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-01 20:51:55 +08:00
Yang Bo
a89deb3ec1 rust-agent: Update README
rust agent does not use grpc as submodule for a while, update README
to reflect the change.

Fixes: #196
Signed-off-by: Yang Bo <bo@hyper.sh>
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-10-01 20:41:20 +08:00
James O. D. Hunt
80c52834e6 github: Remove issue template and use central one
Remove the GitHub issue template from this repository. We already have a
central set of templates [1] that are being used so the template in this
repository is redundant.

[1] - https://github.com/kata-containers/.github/tree/master/.github/ISSUE_TEMPLATE/

Fixes: #728.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-01 11:19:32 +01:00
James O. D. Hunt
0ccbca3be8 agent: Fix OCI Windows network shared container name typo
Correct the typo which would break the Windows-specific OCI network
shared container name feature.

See:

- https://github.com/opencontainers/runtime-spec/blob/master/config-windows.md#network

Fixes: #685.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-10-01 11:18:21 +01:00
James O. D. Hunt
cce80bf746 Merge pull request #827 from devimc/2020-09-29/fixTravis
docs: fix broken links
2020-10-01 09:50:48 +01:00
Edmond AK Dantes
a6221a74e7 qemu: upgrade qemu version to 5.1.0 for arm64.
Now, the qemu version used in arm is so old. As some new features have merged
in current qemu, so it's time to upgrade it. As obs-packaging has been removed,
I put the qemu patch under qemu/patch/5.1.x.
As vxfs has been Deprecated in qemu-5.1, it will be no longer exist in
configuration-hyperversior.sh when qemu version larger than 5.0.

Fixes: #816
Signed-off-by: Edmond AK Dantes <edmond.dantes.ak47@outlook.com>
2020-09-30 22:40:49 +08:00
Julio Montes
d68d850ac4 Merge pull request #830 from YchauWang/update-docs-2.0-2
Packaging: release notes script using error kernel path urls
2020-09-30 09:35:23 -05:00
Peng Tao
ec09971d8e Merge pull request #794 from c3d/bug/793-bad-match
rust-agent: Replaces improper use of match for non-constant patterns
2020-09-30 20:39:45 +08:00
Peng Tao
414b906e9c Merge pull request #747 from Kvasscn/fix-warning-in-device-manager-test
devices: fix go test warning in manager_test.go
2020-09-30 20:37:20 +08:00
Bin Liu
ae72ba5cb6 Merge pull request #688 from jodh-intel/2.0-dev-action-improve-line-length-check
action: Allow long lines if non-alphabetic
2020-09-30 15:47:53 +08:00
Ychau Wang
f30b86f131 Packaging: release notes script using error kernel path urls
2.0 Packaging runtime-release-notes.sh script is using 1.x Packaging
kernel urls. Fix these urls to 2.0 branch Packaging urls.

Fixes: #829

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-09-30 14:47:46 +08:00
Bin Liu
ad7b1ff5ad Merge pull request #820 from Tim-Zhang/mark-unreachable
Indicates never return function and remove unreachable code
2020-09-30 13:59:30 +08:00
Julio Montes
a7faeaac51 docs: fix broken links
Some sections and files were removed in a previous commit,
remove all reference to such sections and files to fix the
check-markdown test.

fixes #826

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-29 14:08:40 -05:00
Julio Montes
c32af06fc6 Merge pull request #825 from lifupan/2.0-dev
agent: propagate the internal detail errors to users
2020-09-29 12:12:34 -05:00
fupan.lfp
4501c25a46 agent: propagate the internal detail errors to users
It's should propagate the detail errors to users when
the rpc call failed.

Fixes: #824

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-09-30 00:01:23 +08:00
Fabiano Fidêncio
3360fb343f Merge pull request #628 from fidencio/wip/update-install-information
Update Installation Guide to better reflect the current state of the project
2020-09-29 17:30:47 +02:00
Peng Tao
3dcd611a3b Merge pull request #822 from chavafg/topic/ci-tests-clone-fix
ci: fix clone_tests_repo function
2020-09-29 23:18:54 +08:00
Fabiano Fidêncio
eaf0338b18 Merge pull request #813 from bpradipt/ppc64le-make
agent: Set LIBC=gnu for ppc64le arch by default
2020-09-29 16:28:19 +02:00
Peng Tao
496ac0a4f5 Merge pull request #516 from liubin/feature/472-integrate-fc-metrics
fc: integrate Firecracker's metrics
2020-09-29 21:30:37 +08:00
Salvador Fuentes
1984e635de ci: fix clone_tests_repo function
We should not checkout to 2.0-dev branch in the clone_tests_repo
function when running in Jenkins CI as it discards changes from
tests repo.

Fixes: #818.

Signed-off-by: Salvador Fuentes <salvador.fuentes@intel.com>
2020-09-29 08:56:11 -04:00
Pradipta Kr. Banerjee
02c1a59f6d agent: Set LIBC=gnu for ppc64le arch by default
Fixes: #812

Signed-off-by: Pradipta Kr. Banerjee <pradipta.banerjee@gmail.com>
2020-09-29 09:36:45 +00:00
Tim Zhang
7019e72c7b agent: remove unreachable code
The code in the end of init_child is unreachable and need to be removed.
The code after do_exec is unreachable and need to be removed.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-09-29 15:51:59 +08:00
Tim Zhang
942999edb9 agent: Change do_exec return type to ! because it will never return
Indicates unreachable code.

Fixes #819

Signed-off-by: Tim Zhang <tim@hyper.sh>
2020-09-29 15:50:54 +08:00
Peng Tao
9b2eaeeff0 Merge pull request #787 from wainersm/static_build_qemu
Fix to qemu experimental and improvements
2020-09-29 10:26:23 +08:00
Bin Liu
46f420a9dc Merge pull request #811 from bergwolf/runtime-check
ci: resurrect travis static checkers
2020-09-28 15:23:50 +08:00
Bin Liu
7e7d4c8cea Merge pull request #814 from bergwolf/agent-mount
agent: fix UT failures due to chdir
2020-09-28 15:22:35 +08:00
bin liu
757dfa70e6 fc: integrate Firecracker's metrics
Firecracker expose metrics through fifo file
and using a JSON format. This PR will parse the
Firecracker's metrics and convert to Prometheus metrics.

Fixes: #472

Signed-off-by: bin liu <bin@hyper.sh>
2020-09-28 15:20:02 +08:00
Peng Tao
b03d958e2f gitignore: ignore agent service file
As it is auto-generated.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 14:16:11 +08:00
Peng Tao
64b4f69847 agent: fix UT failures due to chdir
Current working directory is a process level resource. We cannot call
chdir in parallel from multiple threads, which would cause cwd confusion
and result in UT failures.

The agent code itself is correct that chdir is only called from spawned
child init process. Well, there is one exception that it is also called
in do_create_container() but it is safe to assume that containers are
never created in parallel (at least for now).

Fixes: #782
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 14:09:22 +08:00
Peng Tao
85d22301e9 runtime: fix TestNewConsole UT failure
It needs root.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 11:02:56 +08:00
Peng Tao
e90e9a2c9b travis: skip static checker for ppc64
As we have already run it on x64.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 11:01:07 +08:00
Peng Tao
5611283ec5 runtime: fix golint errors
Need to run gofmt -s on them.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 11:01:07 +08:00
Peng Tao
daf2a54dc8 agent: fix cargo fmt
Otherwise travis fails.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 11:01:07 +08:00
Peng Tao
c05c4ba5e7 ci: always checkout 2.0-dev of test repository
We use 2.0-dev in the tests repository now. Always make sure
we use the right branch.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 11:01:07 +08:00
Peng Tao
1569b3b32a docs: fix static check errors
Somehow we are not running static checks for a long time.
And that ended up with a lot for errors.

* Ensure debug options are valid is dropped
* fix snap links
* drop extra CONTRIBUTING.md
* reference kata-pkgsync
* move CODEOWNERS to proper place
* remove extra CODE_OF_CONDUCT.md.
* fix spell checker error on Developer-Guide.md

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 11:01:03 +08:00
Peng Tao
df3119b679 runtime: fix make check
Need to use the correct script path.

Fixes: #802
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-28 10:24:49 +08:00
Hui Zhu
fb12fff4d8 Merge pull request #808 from lifupan/2.0-dev
agent: Only allow proc mount if it is procfs
2020-09-28 10:09:47 +08:00
Peng Tao
5596eaa31d Merge pull request #441 from liubin/feature/245-add-debug-console
kata 2.0: add debug console service
2020-09-28 10:06:13 +08:00
Peng Tao
ac6f020c6c Merge pull request #697 from keloyang/destroy
runtime: Call s.newStore.Destroy if globalSandboxList.addSandbox
2020-09-27 16:30:24 +08:00
Peng Tao
b20ca6334b Merge pull request #733 from cailca/732
shimv2: add a comment in checkAndMount()
2020-09-27 16:29:51 +08:00
bin liu
484a595f1a runtime: add enable_debug_console configuration item for agent
Set enable_debug_console=true in Kata's congiguration file,
runtime will pass `agent.debug_console`
and `agent.debug_console_vport=1026` to agent.

Fixes: #245

Signed-off-by: bin liu <bin@hyper.sh>
2020-09-27 15:04:59 +08:00
bin liu
febdf8f68c runtime: add debug console service
Add `kata-runtime exec` to enter guest OS
through shell started by agent

Fixes: #245

Signed-off-by: bin liu <bin@hyper.sh>
2020-09-27 10:57:17 +08:00
zhanghj
07d339c788 devices: fix go test warning in manager_test.go
Create "class" and "config" file in temporary device BDF dir,
and remove dir created  by ioutil.TempDir() when test finished.

fixes: #746

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2020-09-26 20:46:12 -04:00
Christophe de Dinechin
a4afe3af33 rust-agent: Replaces improper use of match for non-constant patterns
The code used `match` as a switch with variable patterns `ev_fd` and
`cf_fd`, but the way Rust interprets the code is that the first
pattern matches all values. The code does not perform as expected.

This addresses the following warning:

   warning: unreachable pattern
      --> rustjail/src/cgroups/notifier.rs:114:21
       |
   107 |                     ev_fd => {
       |                     ----- matches any value
   ...
   114 |                     cg_fd => {
       |                     ^^^^^ unreachable pattern
       |
       = note: `#[warn(unreachable_patterns)]` on by default

Fixes: #750
Fixes: #793

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2020-09-25 14:48:26 +02:00
Bin Liu
740c45ac61 Merge pull request #800 from Kvasscn/rust-version-bump-test
osbuilder: specify default toolchain verion in rust-init
2020-09-25 17:49:01 +08:00
fupan.lfp
acaa806cc7 agent: Only allow proc mount if it is procfs
This only allows some whitelists files bind mounted under proc
and prevent other malicious mount to procfs.

Fixes: #807

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-09-25 15:00:07 +08:00
Peng Tao
594519d883 Merge pull request #790 from likebreath/upgrade_clh_v0.10.0
runtime: Update CLH client pkg to version v0.10.0
2020-09-25 11:51:11 +08:00
zhanghj
ca501e5478 osbuilder: specify default toolchain verion in rust-init.
Specify default toolchain version in rust-init.

Fixes: #799

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2020-09-24 23:20:43 -04:00
James O. D. Hunt
0351732778 action: Allow long lines if non-alphabetic
Overly long commit lines are annoying. But sometimes,
we need to be able to force the use of long lines
(for example to reference a URL).

Ironically, I can't refer to the URL that explains this
because of ... the long line check! Hence:

```sh
$ cat <<EOT | tr -d '\n'; echo
See: https://github.com/kata-containers/tests/tree/master/
cmd/checkcommits#handling-long-lines
EOT
```

Maximum body length updated to 150 bytes for parity with:

https://github.com/kata-containers/tests/pull/2848

Fixes: #687.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-09-24 15:32:38 +01:00
Christophe de Dinechin
8cdccedfb3 Merge pull request #792 from dgibson/bug791
agent/oci: Don't use deprecated Error::description() method
2020-09-24 11:49:06 +02:00
fupan.lfp
33513fb49b rustjail: make the mount error info much more clear
Make the invalid mount destination's error info much
more clear.

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-09-24 17:48:53 +08:00
David Gibson
45b0b4ede0 agent/oci: Don't use deprecated Error::description() method
We shouldn't use it, and we don't need to implement it.

fixes #791

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2020-09-24 18:26:50 +10:00
James O. D. Hunt
ce9a4eeffd Merge pull request #784 from jodh-intel/2.0-dev-fix-linter-errors
runtime: Fix linter errors in release files
2020-09-24 08:40:19 +01:00
Bo Chen
a34478ffa5 runtime: Update cloud-hypervisor client pkg to version v0.10.0
The latest release of cloud-hypervisor v0.10.0 contains the following
updates: 1) `virtio-block` Support for Multiple Descriptors; 2) Memory
Zones; 3) `Seccomp` Sandbox Improvements; 4) Preliminary KVM HyperV
Emulation Control; 5) various bug fixes and refactoring.

Note that this patch updates the client code of clh's HTTP API in kata,
while the 'versions.yaml' file was updated in an earlier PR.

Fixes: #789

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-09-23 17:37:34 -07:00
Bo Chen
952b9fe856 Merge pull request #762 from likebreath/fix_clh_ci
packaging: Build from source if the clh release binary is missing
2020-09-23 13:46:24 -07:00
Wainer dos Santos Moschetta
ce675075e1 static-build/qemu-virtiofs: Refactor apply virtiofs patches
In static-build/qemu-virtiofs/Dockerfile the code which
applies the virtiofs specific patches is spread in several
RUN instructions. Refactor this code so that it runs in a
single RUN and produce a single overlay image.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-09-23 15:42:22 -04:00
Wainer dos Santos Moschetta
512b38cf61 packaging/qemu: Add common code to apply patches
The qemu and qemu-virtiofs Dockerfile files repeat the code to apply
patches based on QEMU stable branch being built. Instead, this adds
a common script (qemu/apply_patches.sh) and make it called by the
respective Dockerfile files.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-09-23 15:42:22 -04:00
Wainer dos Santos Moschetta
edce2712cd static-build/qemu-virtiofs: Fix to apply QEMU patches
Fix a bug on qemu-virtiofs Dockerfile which end up not applying
the QEMU patches.

Fixes #786

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2020-09-23 15:41:45 -04:00
Bo Chen
86a864b8c3 packaging: Build from source if the clh release binary is missing
This patch add fall-back code path that builds cloud-hypervisor static
binary from source, when the downloading of cloud-hypervisor binary is
failing. This is useful when we experience network issues, and also
useful for upgrading clh to non-released version.

Together with the changes in the tests repo
(https://github.com/kata-containers/tests/pull/2862), the Jenkins config
file is also updated with new Execute shell script for the clh CI in the
kata-containers repo. Those two changes fix the regression on clh CI
here. Please check details in the issue below.

Fixes: #781
Fixes: https://github.com/kata-containers/tests/issues/2858

Signed-off-by: Bo Chen <chen.bo@intel.com>
2020-09-23 11:41:49 -07:00
Christophe de Dinechin
1c1b4c9c43 Merge pull request #718 from fgiudici/podman_data_collection_script
runtime: add podman configuration to data collection script
2020-09-23 16:01:47 +02:00
Xu Wang
1641655d8f Merge pull request #780 from liubin/feature/748-use-travis-cache-and-rust
ci: use Travis cache to reduce build time
2020-09-23 20:30:59 +08:00
Hui Zhu
f5f29d00e0 Merge pull request #779 from lifupan/2.0-dev
agent: update cgroups crate
2020-09-23 17:27:34 +08:00
James O. D. Hunt
33585a8edc runtime: Fix linter errors in release files
Fix the linter errors caught in the `runtime` repos `master` branch [1],
but not in the `2.0-dev` branch [2]. See [3] for further details.

[1] - https://github.com/kata-containers/runtime/pull/2976
[2] - https://github.com/kata-containers/kata-containers/pull/735
[3] - https://github.com/kata-containers/tests/issues/2870

Fixes: #783.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-09-23 09:24:54 +01:00
James O. D. Hunt
6e9592e13d Merge pull request #778 from YchauWang/update-docs-2.0-1
docs: Update the reference path of kata-deploy in the packaging
2020-09-23 08:32:48 +01:00
bin liu
e3a0f9b30e ci: use export command to export envs instead of env config item
Config item env is used as a Matrix Expansion key, so these envs
will export to build jobs individually.

Signed-off-by: bin liu <bin@hyper.sh>
2020-09-23 10:26:07 +08:00
James O. D. Hunt
77ebbc5435 Merge pull request #735 from jodh-intel/2.0-dev-kata-check-compare-versions
runtime: make kata-check check for newer release
2020-09-22 17:49:34 +01:00
fupan.lfp
36ce7018e2 agent: update cgroups crate
Update cgroups crate to fix the building issue
on Aarch64.

Fixes: #770

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2020-09-22 23:00:11 +08:00
Shukui Yang
3523167d20 runtime: Call s.newStore.Destroy if globalSandboxList.addSandbox
Fixes: #696

Signed-off-by: Shukui Yang <keloyangsk@gmail.com>
2020-09-22 22:47:57 +08:00
bin liu
9e5a4b8b80 ci: use Travis cache to reduce build time
This PR includes these changes:
- use Rust installed by Travis
- install x86_64-unknown-linux-musl
- install rustfmt
- use Travis cache
- delete ci/install_vc.sh

Fixes: #748

Signed-off-by: bin liu <bin@hyper.sh>
2020-09-22 21:37:56 +08:00
Julio Montes
d4408807b4 Merge pull request #776 from bergwolf/containerd
how-to: add privileged_without_host_devices to containerd guide
2020-09-22 07:20:46 -05:00
Julio Montes
056a509418 Merge pull request #705 from devimc/2020-08-13/unittest/rustjail-mount
agent: Unit tests for rustjail/mount.rs
2020-09-22 07:19:51 -05:00
Fupan Li
6695e4ff0f Merge pull request #774 from YchauWang/update-docs-2.0branch
docs: Fix the kata-pkgsync tool's docs script path
2020-09-22 17:17:50 +08:00
Ychau Wang
52984b6724 docs: Update the reference path of kata-deploy in the packaging
Use the relative path of kata-deploy to replace the 1.x packaging url in
the kata-deploy/README.md file. Fixed the path issue, producted by
creating new branch.

Fixes: #777

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-09-22 16:48:51 +08:00
Francesco Giudici
eae2159168 runtime: add podman configuration to data collection script
Be more verbose about podman configuration in the output of the data
collection script: get the system configuration as seen by podman and
dump the configuration files when present.

Fixes: #243
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2020-09-22 10:35:45 +02:00
James O. D. Hunt
954a2cc813 Merge pull request #768 from bergwolf/links
Fix developer guide
2020-09-22 09:11:01 +01:00
Peng Tao
d12778480b how-to: add privileged_without_host_devices to containerd guide
It should be set by default for Kata containers working with containerd.

Fixes: #775
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-22 15:38:53 +08:00
Fupan Li
0b9bf24d05 Merge pull request #772 from bergwolf/agent-panic
fix guest panic when running agent as init
2020-09-22 12:57:54 +08:00
Peng Tao
98c4d11b6a docs: fix k8s containerd howto links
It should points to the internal versions.yaml file.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-22 11:13:18 +08:00
Peng Tao
f107b12bec docs: fix up developer guide for 2.0
1. Until we restore docker/moby support, we should use crictl as
developer example.
2. Most of the hyperlinks should point to kata-containers repository.
3. There is no more standalone mode.

Fixes: #767
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-22 11:13:07 +08:00
Ychau Wang
9f2f520130 docs: Fix the kata-pkgsync tool's docs script path
Fix the kata-pkgsync tool's docs, change the download path of the
packaging tool in 2.0 release.

Fixes: #773

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-09-22 09:21:02 +08:00
Julio Montes
96f8769a99 travis: enable RUST_BACKTRACE
RUST_BACKTRACE=1 will help us a lot to debug unit tests when
a test is failing

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:48:02 -05:00
Julio Montes
cda7acf7da agent/rustjail: add more unit tests
Add unit tests for finish_root, read_only_path and mknod_dev
increasing code coverage of mount.rs

fixes #284

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
98cc979ae1 agent/rustjail: remove makedev function
remove `makedev` function, use `nix`'s implementation instead

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
b99fefad7e agent/rustjail: add unit tests for ms_move_rootfs and mask_path
Increase code coverage of mount.rs

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
d79fad2dd8 agent/rustjail: implement functions to chroot
Use conditional compilation (#[cfg]) to change chroot behaviour
at compilation time. For example, such function will just return
`Ok(())` when the unit tests are being compiled, otherwise real
chroot operation is performed.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
25c91afbea agent/rustjail: add unit test for pivot_rootfs
Add unit test for pivot_rootfs increasing the code coverage of
mount.rs

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
7cf0fd95f1 agent/rustjail: implement functions to pivot_root
Use conditional compilation (#[cfg]) to change pivot_root behaviour
at compilation time. For example, such function will just return
`Ok(())` when the unit tests are being compiled, otherwise real
pivot_root operation is performed.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
672da4d08c agent/rustjail: add unit test for mount_cgroups
Add a unit test for `mount_cgroups` increasing the code coverage
of mount.rs from 44% to 52%

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
ab61cf7f9f agent/rustjail: add unit test for init_rootfs
Add a unit test for `init_rootfs` increasing the code coverage
of mount.rs from 0% to 44%.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
0a0714c9c3 agent/rustjail/mount: don't use unwrap
Don't use unwrap in `init_rootfs` instead return an Error, this way
we can write unit tests that don't panic.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
3dc9452bc6 agent/rustjail: add tempfile crate as depedency
Add tempfile crate as depedency, it will be used in the following
commits to create temporary directories for unit testing.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Julio Montes
d756f52c73 rustjail: implement functions to mount and umount files
Use conditional compilation (#[cfg]) to change mount and umount
behaviours at compilation time. For example, such functions will just
return `Ok(())` when the unit tests are being compiled, otherwise real
mount and umount operations are performed.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-09-21 15:24:53 -05:00
Peng Tao
a02d17870a gitignore: ignore agent version.rs
It is auto-generated.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-21 20:58:57 +08:00
Peng Tao
b518ddeac8 agent: fix agent panic running as init
We should mount procfs before trying to parse kernel command lines.

Fixes: #771
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-21 20:58:13 +08:00
James O. D. Hunt
1a77f69e15 runtime: make kata-check check for newer release
Update `kata-check` to see if there is a newer version available for
download. Useful for users installing static packages (without a package
manager).

Fixes: #734.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2020-09-21 13:47:29 +01:00
Peng Tao
53b0e085e1 Merge pull request #757 from Kvasscn/update-version-file-url-in-packaging-makefile
packaging: update version file url for kata 2.0 in Makefile
2020-09-21 11:47:36 +08:00
Bin Liu
571699c926 Merge pull request #764 from bergwolf/fix-release-notes
Fix release notes
2020-09-21 09:59:53 +08:00
zhanghj
61181b9f86 packaging: use local version file for kata 2.0 in Makefile
Use local version file instead of downloading from upstream repo.

Fixes: #756

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2020-09-20 21:21:47 -04:00
Peng Tao
e1c6aa27b7 docs: fix release process doc
We no longer build OBS packages. And we use
kata-containers/tools/packaging/release to do release.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-19 09:10:14 +08:00
Peng Tao
1acfba4dd7 packaging: fix release notes
Should mention the 2.0 branch docs.

Fixes: #763
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2020-09-19 09:10:14 +08:00
Ychau Wang
1839dfd95a runtime: Clear the VCMock 1.x API Methods from 2.0
Clear the 1.x branch api methods in the 2.0. Keep the same methods to
the VC interface, like the VCImpl struct.

Fixes: #751

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
2020-09-18 16:30:12 +08:00
Qian Cai
7225460a68 shimv2: add a comment in checkAndMount()
In checkAndMount(), it is not clear why we check IsBlockDevice() and if
DisableBlockDeviceUse == false and then only return "false, nil" instead
of "false, err". Adding a comment to make it a bit more readable.

Fixes: #732
Signed-off-by: Qian Cai <cai@redhat.com>
2020-09-15 13:01:03 -04:00
Fabiano Fidêncio
22ca2da6f7 packaging: Stop providing OBS packages
The community has discussed and took the decision in favour of promoting
kata-deploy as the way of distributing and using kata for distros that
officially don't maintain the project.

Fixes: #623
Fixes: https://github.com/kata-containers/packaging/issues/1120

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:26:17 +02:00
Fabiano Fidêncio
afa88c1bd3 install: Add contacts to the distribution packages
Let's add a new column to the Official packages table, and let the
maintainers of the official distro packages to jump in and add their
names there.

This will help us to ping & redirect to the right people possible issues
that are reported against the official packages.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
Fabiano Fidêncio
3955cc8957 install: Update information about Community Packages
Kata Containers will stop distributing the community packages in favour
of kata-deploy.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
Fabiano Fidêncio
218f77d7be install: Update SUSE information
Following up a conversation with Ralf Haferkamp, we can safely drop the
instructions for using Kata Containers on SLES 12 SP3 in favour of using
the official builds provided for SLE 15 SP1, and SLE 15 SP2.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
Fabiano Fidêncio
2a0e76a8a9 install: Update openSUSE information
Let's update the openSUSE Installation Guide to reflect the current
information on how to install kata packages provided by the distro
itself.

The official packages are present on Leap 15.2 and Tumbleweed, and can
be just installed. Leap 15.1 is slightly different, as the .repo file
has to be added before the packages can be installed.

Leap 15.0 has been removed as it already reached its EOL.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
Fabiano Fidêncio
691f13645b install: Update RHEL information
Although the community packages are present for RHEL, everything about
them is extremely unsupported on the Red Hat side.

Knowing this, we'd be better to simply not mentioned those and, if users
really want to try kata-containers on RHEL, they can simply follow the
CentOS installation guide.

In the future, if the Fedora packages make their way to RHEL, we can add
the information here. However, if we're recommending something
unsupported we'd be better recommending kata-deploy instead.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
Fabiano Fidêncio
270fc4b2d1 install: Update Fedora information
Let's update the Fedora Installation Guide to reflect the current
information on how to install kata packages provided by the distro
itself.

These are official packages and we, as Fedora members, recommend using
kata-containers on Fedora 32 and onwards, as from this version
everything works out-of-the-box. Also, Fedora 31 will reach its EOL as
soon as Fedora 33 is out, which should happen on October.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
Fabiano Fidêncio
492b4e906b install: Update CentOS information
Let's update the CentOS Installation Guide to reflect the current
information on how to install kata packages provided by the
Virtualiation Special Interest Group.

These are not official CentOS packages, as those are not coming from Red
Hat Enterprise Linux. These are the same packages we have on Fedora and
we have decided to keep them up-to-date and sync'ed on both Fedora and
CentOS, so people can give Kata Containers a try also on CentOS.

The nature of these packages makes me think that those are "as official
as they can be", so that's the reason I've decided to add the
instructions to the "official" table.

Together with the change in the Installation Guide, let's also update
the README and reflect the fact we **strongly recommend** using CentOS
8, with the packages provided by the Virtualization Special Interest
Group, instead of using the CentOS 7 with packages built on OBS.

Fixes: #623

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2020-09-15 17:24:24 +02:00
833 changed files with 79323 additions and 37687 deletions

View File

@@ -10,7 +10,7 @@ env:
error_msg: |+
See the document below for help on formatting commits for the project.
https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md#patch-forma
https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md#patch-format
jobs:
commit-message-check:

View File

@@ -1,12 +1,9 @@
on:
issue_comment:
types: [created, edited]
on: issue_comment
name: test-kata-deploy
jobs:
check_comments:
if: ${{ github.event.issue.pull_request }}
types: [created, edited]
runs-on: ubuntu-latest
steps:
- name: Check for Command
@@ -14,7 +11,7 @@ jobs:
uses: kata-containers/slash-command-action@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
command: "test_kata_deploy"
command: "test-kata-deploy"
reaction: "true"
reaction-type: "eyes"
allow-edits: "false"
@@ -22,7 +19,6 @@ jobs:
- name: verify command arg is kata-deploy
run: |
echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
create-and-test-container:
needs: check_comments
runs-on: ubuntu-latest
@@ -33,26 +29,22 @@ jobs:
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
echo "reference for PR: " ${ref}
echo "##[set-output name=pr-ref;]${ref}"
- name: check out
uses: actions/checkout@v2
- uses: actions/checkout@v2-beta
with:
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
- name: build-container-image
id: build-container-image
run: |
PR_SHA=$(git log --format=format:%H -n1)
VERSION="2.0.0"
VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/main/VERSION)
ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
wget "${ARTIFACT_URL}" -O tools/packaging/kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./tools/packaging/kata-deploy
wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
docker push katadocker/kata-deploy-ci:$PR_SHA
echo "##[set-output name=pr-sha;]${PR_SHA}"
- name: test-kata-deploy-ci-in-aks
uses: ./tools/packaging/kata-deploy/action
uses: ./kata-deploy/action
with:
packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
env:

View File

@@ -43,7 +43,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
@@ -71,7 +71,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
@@ -97,7 +97,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
@@ -124,7 +124,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
@@ -151,7 +151,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
@@ -178,7 +178,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts
@@ -205,7 +205,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@master
with:
name: kata-artifacts

View File

@@ -44,7 +44,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
@@ -72,7 +72,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
@@ -98,7 +98,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
@@ -124,7 +124,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
@@ -150,7 +150,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
@@ -177,7 +177,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts
@@ -203,7 +203,7 @@ jobs:
echo "artifact-built=false" >> $GITHUB_ENV
fi
- name: store-artifacts
if: env.artifact-built == 'true'
if: ${{ env.artifact-built }} == 'true'
uses: actions/upload-artifact@v2
with:
name: kata-artifacts

View File

@@ -19,7 +19,7 @@ jobs:
run: |
sudo apt-get install -y git git-extras
kata_url="https://github.com/kata-containers/kata-containers"
latest_version=$(git ls-remote --tags ${kata_url} | egrep -o "refs.*" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r -u | head -1)
latest_version=$(git ls-remote --tags ${kata_url} | egrep -o "refs.*" | egrep -v "\-alpha|\-rc|{}" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r | head -1)
current_version="$(echo ${GITHUB_REF} | cut -d/ -f3)"
# Check semantic versioning format (x.y.z) and if the current tag is the latest tag
if echo "${current_version}" | grep -q "^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+$" && echo -e "$latest_version\n$current_version" | sort -C -V; then

1
.gitignore vendored
View File

@@ -1,4 +1,5 @@
**/*.bk
**/*~
**/*.orig
**/*.rej
**/target

201
README.md
View File

@@ -2,130 +2,143 @@
# Kata Containers
* [Raising issues](#raising-issues)
* [Kata Containers repositories](#kata-containers-repositories)
* [Code Repositories](#code-repositories)
* [Kata Containers-developed components](#kata-containers-developed-components)
* [Agent](#agent)
* [KSM throttler](#ksm-throttler)
* [Runtime](#runtime)
* [Trace forwarder](#trace-forwarder)
* [Additional](#additional)
* [Kernel](#kernel)
* [CI](#ci)
* [Community](#community)
* [Kata Containers](#kata-containers)
* [Introduction](#introduction)
* [Getting started](#getting-started)
* [Documentation](#documentation)
* [Packaging](#packaging)
* [Test code](#test-code)
* [Utilities](#utilities)
* [OS builder](#os-builder)
* [Web content](#web-content)
* [Community](#community)
* [Getting help](#getting-help)
* [Raising issues](#raising-issues)
* [Kata Containers 1.x versions](#kata-containers-1x-versions)
* [Developers](#developers)
* [Components](#components)
* [Kata Containers 1.x components](#kata-containers-1x-components)
* [Common repositories](#common-repositories)
* [Packaging and releases](#packaging-and-releases)
---
Welcome to Kata Containers!
The purpose of this repository is to act as a "top level" site for the project. Specifically it is used:
This repository is the home of the Kata Containers code for the 2.0 and newer
releases.
- To provide a list of the various *other* [Kata Containers repositories](#kata-containers-repositories),
along with a brief explanation of their purpose.
If you want to learn about Kata Containers, visit the main
[Kata Containers website](https://katacontainers.io).
- To provide a general area for [Raising Issues](#raising-issues).
For further details on the older (first generation) Kata Containers 1.x
versions, see the
[Kata Containers 1.x components](#kata-containers-1x-components)
section.
## Raising issues
## Introduction
This repository is used for [raising
issues](https://github.com/kata-containers/kata-containers/issues/new):
Kata Containers is an open source project and community working to build a
standard implementation of lightweight Virtual Machines (VMs) that feel and
perform like containers, but provide the workload isolation and security
advantages of VMs.
- That might affect multiple code repositories.
## Getting started
- Where the raiser is unsure which repositories are affected.
See the [installation documentation](docs/install).
## Documentation
See the [official documentation](docs)
(including [installation guides](docs/install),
[the developer guide](docs/Developer-Guide.md),
[design documents](docs/design) and more).
## Community
To learn more about the project, its community and governance, see the
[community repository](https://github.com/kata-containers/community). This is
the first place to go if you wish to contribute to the project.
## Getting help
See the [community](#community) section for ways to contact us.
### Raising issues
Please raise an issue
[in this repository](https://github.com/kata-containers/kata-containers/issues).
> **Note:**
>
> - If an issue affects only a single component, it should be raised in that
> components repository.
> If you are reporting a security issue, please follow the [vulnerability reporting process](https://github.com/kata-containers/community#vulnerability-handling)
## Kata Containers repositories
#### Kata Containers 1.x versions
### CI
For older Kata Containers 1.x releases, please raise an issue in the
[Kata Containers 1.x component repository](#kata-containers-1x-components)
that seems most appropriate.
The [CI](https://github.com/kata-containers/ci) repository stores the Continuous
Integration (CI) system configuration information.
If in doubt, raise an issue
[in the Kata Containers 1.x runtime repository](https://github.com/kata-containers/runtime/issues).
### Community
## Developers
The [Community](https://github.com/kata-containers/community) repository is
the first place to go if you want to use or contribute to the project.
### Components
### Code Repositories
| Component | Type | Description |
|-|-|-|
| [agent-ctl](tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
| [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. |
| [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). |
| [osbuilder](tools/osbuilder) | infrastructure | Tool to create "mini O/S" rootfs and initrd images for the hypervisor. |
| [packaging](tools/packaging) | infrastructure | Scripts and metadata for producing packaged binaries<br/>(components, hypervisors, kernel and rootfs). |
| [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. |
| [trace-forwarder](src/trace-forwarder) | utility | Agent tracing helper. |
#### Kata Containers-developed components
#### Kata Containers 1.x components
##### Agent
For the first generation of Kata Containers (1.x versions), each component was
kept in a separate repository.
The [`kata-agent`](src/agent/README.md) runs inside the
virtual machine and sets up the container environment.
For information on the Kata Containers 1.x releases, see the
[Kata Containers 1.x releases page](https://github.com/kata-containers/runtime/releases).
##### KSM throttler
For further information on particular Kata Containers 1.x components, see the
individual component repositories:
The [`kata-ksm-throttler`](https://github.com/kata-containers/ksm-throttler)
is an optional utility that monitors containers and deduplicates memory to
maximize container density on a host.
| Component | Type | Description |
|-|-|-|
| [agent](https://github.com/kata-containers/agent) | core | See [components](#components). |
| [documentation](https://github.com/kata-containers/documentation) | documentation | |
| [KSM throttler](https://github.com/kata-containers/ksm-throttler) | optional core | Daemon that monitors containers and deduplicates memory to maximize container density on the host. |
| [osbuilder](https://github.com/kata-containers/osbuilder) | infrastructure | See [components](#components). |
| [packaging](https://github.com/kata-containers/packaging) | infrastructure | See [components](#components). |
| [proxy](https://github.com/kata-containers/proxy) | core | Multiplexes communications between the shims, agent and runtime. |
| [runtime](https://github.com/kata-containers/runtime) | core | See [components](#components). |
| [shim](https://github.com/kata-containers/shim) | core | Handles standard I/O and signals on behalf of the container process. |
##### Runtime
> **Note:**
>
> - There are more components for the original Kata Containers 1.x implementation.
> - The current implementation simplifies the design significantly:
> compare the [current](docs/design/architecture.md) and
> [previous generation](https://github.com/kata-containers/documentation/blob/master/design/architecture.md)
> designs.
The [`kata-runtime`](src/runtime/README.md) is usually
invoked by a container manager and provides high-level verbs to manage
containers.
### Common repositories
##### Trace forwarder
The following repositories are used by both the current and first generation Kata Containers implementations:
The [`kata-trace-forwarder`](src/trace-forwarder) is a component only used
when tracing the [agent](#agent) process.
| Component | Description | Current | First generation | Notes |
|-|-|-|-|-|
| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/main) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
| kernel | The Linux kernel used by the hypervisor to boot the guest image. | [Kata 2.x][kernel] | [Kata 1.x][kernel] | Patches are stored in the packaging component. |
| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/main) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
| www.katacontainers.io | Contains the source for the [main web site](https://www.katacontainers.io). | [Kata 2.x][github-katacontainers.io] | [Kata 1.x][github-katacontainers.io] | | |
#### Additional
### Packaging and releases
##### Kernel
Kata Containers is now
[available natively for most distributions](docs/install/README.md#packaged-installation-methods).
However, packaging scripts and metadata are still used to generate snap and GitHub releases. See
the [components](#components) section for further details.
The hypervisor uses a [Linux\* kernel](https://github.com/kata-containers/linux) to boot the guest image.
---
### Documentation
The [docs](docs/README.md) directory holds documentation common to all code components.
### Packaging
We use the [packaging](tools/packaging/README.md) to create packages for the [system
components](#kata-containers-developed-components) including
[rootfs](#os-builder) and [kernel](#kernel) images.
### Test code
The [tests](https://github.com/kata-containers/tests) repository hosts all
test code except the unit testing code (which is kept in the same repository
as the component it tests).
### Utilities
#### OS builder
The [osbuilder](tools/osbuilder/README.md) tool can create
a rootfs and a "mini O/S" image. This image is used by the hypervisor to setup
the environment before switching to the workload.
#### `kata-agent-ctl`
[`kata-agent-ctl`](tools/agent-ctl) is a low-level test tool for
interacting with the agent.
### Web content
The
[www.katacontainers.io](https://github.com/kata-containers/www.katacontainers.io)
repository contains all sources for the https://www.katacontainers.io site.
## Credits
Kata Containers uses [packagecloud](https://packagecloud.io) for package
hosting.
[kernel]: https://www.kernel.org
[github-katacontainers.io]: https://github.com/kata-containers/www.katacontainers.io

View File

@@ -1 +1 @@
2.0.4
2.1-alpha1

View File

@@ -12,10 +12,11 @@ install_aarch64_musl() {
local musl_tar="${arch}-linux-musl-native.tgz"
local musl_dir="${arch}-linux-musl-native"
pushd /tmp
curl -sLO https://musl.cc/${musl_tar}
tar -zxf ${musl_tar}
mkdir -p /usr/local/musl/
cp -r ${musl_dir}/* /usr/local/musl/
if curl -sLO --fail https://musl.cc/${musl_tar}; then
tar -zxf ${musl_tar}
mkdir -p /usr/local/musl/
cp -r ${musl_dir}/* /usr/local/musl/
fi
popd
fi
}

View File

@@ -56,7 +56,7 @@ function install_yq() {
die "Please install curl"
fi
local yq_version=3.1.0
local yq_version=3.4.1
## NOTE: ${var,,} => gives lowercase value of var
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"

View File

@@ -5,7 +5,7 @@
export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
export tests_repo_dir="$GOPATH/src/$tests_repo"
export branch="${branch:-$TRAVIS_BRANCH}"
export branch="${branch:-main}"
clone_tests_repo()
{

View File

@@ -1,54 +1,54 @@
- [Warning](#warning)
- [Assumptions](#assumptions)
- [Initial setup](#initial-setup)
- [Requirements to build individual components](#requirements-to-build-individual-components)
- [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
- [Check hardware requirements](#check-hardware-requirements)
- [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
- [Enable full debug](#enable-full-debug)
- [debug logs and shimv2](#debug-logs-and-shimv2)
- [Enabling full `containerd` debug](#enabling-full-containerd-debug)
- [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
- [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
- [journald rate limiting](#journald-rate-limiting)
- [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
- [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
- [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
- [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
- [Get the osbuilder](#get-the-osbuilder)
- [Create a rootfs image](#create-a-rootfs-image)
- [Create a local rootfs](#create-a-local-rootfs)
- [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
- [Build a rootfs image](#build-a-rootfs-image)
- [Install the rootfs image](#install-the-rootfs-image)
- [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
- [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
- [Build an initrd image](#build-an-initrd-image)
- [Install the initrd image](#install-the-initrd-image)
- [Install guest kernel images](#install-guest-kernel-images)
- [Install a hypervisor](#install-a-hypervisor)
- [Build a custom QEMU](#build-a-custom-qemu)
- [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
- [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
- [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
- [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
- [Appendices](#appendices)
- [Checking Docker default runtime](#checking-docker-default-runtime)
- [Set up a debug console](#set-up-a-debug-console)
- [Simple debug console setup](#simple-debug-console-setup)
- [Enable agent debug console](#enable-agent-debug-console)
- [Connect to debug console](#connect-to-debug-console)
- [Traditional debug console setup](#traditional-debug-console-setup)
- [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
- [Build the debug image](#build-the-debug-image)
- [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
- [Create a container](#create-a-container)
- [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
- [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
- [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
- [Connecting to the debug console](#connecting-to-the-debug-console)
- [Obtain details of the image](#obtain-details-of-the-image)
- [Capturing kernel boot logs](#capturing-kernel-boot-logs)
* [Warning](#warning)
* [Assumptions](#assumptions)
* [Initial setup](#initial-setup)
* [Requirements to build individual components](#requirements-to-build-individual-components)
* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
* [Check hardware requirements](#check-hardware-requirements)
* [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
* [Enable full debug](#enable-full-debug)
* [debug logs and shimv2](#debug-logs-and-shimv2)
* [Enabling full `containerd` debug](#enabling-full-containerd-debug)
* [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
* [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
* [journald rate limiting](#journald-rate-limiting)
* [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
* [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
* [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
* [Get the osbuilder](#get-the-osbuilder)
* [Create a rootfs image](#create-a-rootfs-image)
* [Create a local rootfs](#create-a-local-rootfs)
* [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
* [Build a rootfs image](#build-a-rootfs-image)
* [Install the rootfs image](#install-the-rootfs-image)
* [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
* [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
* [Build an initrd image](#build-an-initrd-image)
* [Install the initrd image](#install-the-initrd-image)
* [Install guest kernel images](#install-guest-kernel-images)
* [Install a hypervisor](#install-a-hypervisor)
* [Build a custom QEMU](#build-a-custom-qemu)
* [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
* [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
* [Appendices](#appendices)
* [Checking Docker default runtime](#checking-docker-default-runtime)
* [Set up a debug console](#set-up-a-debug-console)
* [Simple debug console setup](#simple-debug-console-setup)
* [Enable agent debug console](#enable-agent-debug-console)
* [Connect to debug console](#connect-to-debug-console)
* [Traditional debug console setup](#traditional-debug-console-setup)
* [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
* [Build the debug image](#build-the-debug-image)
* [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
* [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
* [Create a container](#create-a-container)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Obtain details of the image](#obtain-details-of-the-image)
* [Capturing kernel boot logs](#capturing-kernel-boot-logs)
# Warning
@@ -103,7 +103,7 @@ The build will create the following:
You can check if your system is capable of creating a Kata Container by running the following:
```
$ sudo kata-runtime kata-check
$ sudo kata-runtime check
```
If your system is *not* able to run Kata Containers, the previous command will error out and explain why.
@@ -353,9 +353,12 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `euleros`, and `fedora`
>
> - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.
Optionally, add your custom agent binary to the rootfs with the following:
Optionally, add your custom agent binary to the rootfs with the following, `LIBC` default is `musl`, if `ARCH` is `ppc64le`, should set the `LIBC=gnu` and `ARCH=powerpc64le`:
```
$ sudo install -o root -g root -m 0550 -T ../../agent/kata-agent ${ROOTFS_DIR}/sbin/init
$ export ARCH=$(shell uname -m)
$ [ ${ARCH} == "ppc64le" ] && export LIBC=gnu || export LIBC=musl
$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le
$ sudo install -o root -g root -m 0550 -T ../../../src/agent/target/$(ARCH)-unknown-linux-$(LIBC)/release/kata-agent ${ROOTFS_DIR}/sbin/init
```
### Build an initrd image
@@ -381,30 +384,31 @@ You can build and install the guest kernel image as shown [here](../tools/packag
# Install a hypervisor
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
`QEMU` VMM is installed automatically. Cloud-Hypervisor and Firecracker VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
You may choose to manually build your VMM/hypervisor.
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
## Build a custom QEMU
Kata Containers makes use of upstream QEMU branch. The exact version
and repository utilized can be found by looking at the [versions file](../versions.yaml).
Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
Kata often utilizes patches for not-yet-upstream fixes for components,
including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches)
```
$ go get -d github.com/kata-containers/qemu
$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/kata-containers/versions.yaml | cut -d '"' -f2)
$ cd ${GOPATH}/src/github.com/kata-containers/qemu
$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
```
To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
```
$ go get -d github.com/kata-containers/kata-containers/tools/packaging
$ cd $your_qemu_directory
$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
$ eval ./configure "$(cat kata.cfg)"
$ make -j $(nproc)
$ sudo -E make install
```
See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata.
### Build a custom QEMU for aarch64/arm64 - REQUIRED
> **Note:**
>
@@ -472,17 +476,6 @@ debug_console_enabled = true
This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.
#### Start `kata-monitor`
The `kata-runtime exec` command needs `kata-monitor` to get the sandbox's `vsock` address to connect to, first start `kata-monitor`.
```
$ sudo kata-monitor
```
`kata-monitor` will serve at `localhost:8090` by default.
#### Connect to debug console
Command `kata-runtime exec` is used to connect to the debug console.
@@ -497,6 +490,10 @@ bash-4.2# exit
exit
```
`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/master/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured
with Kubernetes. For CRI-O, the namespace should set to `default` explicitly. This should not be confused with [Kubernetes namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
For other CRI-runtimes and configurations, you may need to set the namespace utilizing the `runtime-namespace` option.
If you want to access guest OS through a traditional way, see [Traditional debug console setup)](#traditional-debug-console-setup).
### Traditional debug console setup
@@ -616,11 +613,8 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
> **Note** Ports 1024 and 1025 are reserved for communication with the agent
> and gathering of agent logs respectively.
##### Connecting to the debug console
Next, connect to the debug console. The VSOCKS paths vary slightly between each
VMM solution.
Next, connect to the debug console. The VSOCKS paths vary slightly between
cloud-hypervisor and firecracker.
In case of cloud-hypervisor, connect to the `vsock` as shown:
```
$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
@@ -637,12 +631,6 @@ CONNECT 1026
**Note**: You need to press the `RETURN` key to see the shell prompt.
For QEMU, connect to the `vsock` as shown:
```
$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"
```
To disconnect from the virtual machine, type `CONTROL+q` (hold down the
`CONTROL` key and press `q`).

View File

@@ -22,4 +22,4 @@ licensing and allows automated tooling to check the license of individual
files.
This SPDX licence identifier requirement is enforced by the
[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/master/.ci/static-checks.sh).
[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/main/.ci/static-checks.sh).

View File

@@ -19,8 +19,6 @@
* [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
* [docker --net=host](#docker---nethost)
* [docker run --link](#docker-run---link)
* [Storage limitations](#storage-limitations)
* [Kubernetes `volumeMounts.subPaths`](#kubernetes-volumemountssubpaths)
* [Host resource sharing](#host-resource-sharing)
* [docker run --privileged](#docker-run---privileged)
* [Miscellaneous](#miscellaneous)
@@ -218,17 +216,6 @@ Equivalent functionality can be achieved with the newer docker networking comman
See more documentation at
[docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).
## Storage limitations
### Kubernetes `volumeMounts.subPaths`
Kubernetes `volumeMount.subPath` is not supported by Kata Containers at the
moment.
See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
## Host resource sharing
### docker run --privileged
@@ -237,7 +224,7 @@ Privileged support in Kata is essentially different from `runc` containers.
Kata does support `docker run --privileged` command, but in this case full access
to the guest VM is provided in addition to some host access.
The container runs with elevated capabilities within the guest and is granted
The container runs with elevated capabilities within the guest and is granted
access to guest devices instead of the host devices.
This is also true with using `securityContext privileged=true` with Kubernetes.

View File

@@ -40,6 +40,7 @@ See the [howto documentation](how-to).
* [Intel QAT with Kata](./use-cases/using-Intel-QAT-and-kata.md)
* [VPP with Kata](./use-cases/using-vpp-and-kata.md)
* [SPDK vhost-user with Kata](./use-cases/using-SPDK-vhostuser-and-kata.md)
* [Intel SGX with Kata](./use-cases/using-Intel-SGX-and-kata.md)
## Developer Guide

View File

@@ -48,10 +48,10 @@ Alternatively, if you are using Kata Containers version 1.12.0 or newer, you
can check for newer releases using the command line:
```bash
$ kata-runtime kata-check --check-version-only
$ kata-runtime check --check-version-only
```
There are various other related options. Run `kata-runtime kata-check --help`
There are various other related options. Run `kata-runtime check --help`
for further details.
# Configuration changes

View File

@@ -58,7 +58,7 @@ to go through the VSOCK interface exported by QEMU.
The container workload, that is, the actual OCI bundle rootfs, is exported from the
host to the virtual machine. In the case where a block-based graph driver is
configured, `virtio-scsi` will be used. In all other cases a 9pfs VIRTIO mount point
configured, `virtio-scsi` will be used. In all other cases a `virtio-fs` VIRTIO mount point
will be used. `kata-agent` uses this mount point as the root filesystem for the
container processes.
@@ -137,7 +137,7 @@ The runtime uses a TOML format configuration file called `configuration.toml`. B
The actual configuration file paths can be determined by running:
```
$ kata-runtime --kata-show-default-config-paths
$ kata-runtime --show-default-config-paths
```
Most users will not need to modify the configuration file.

View File

@@ -3,7 +3,6 @@ To fulfill the [Kata design requirements](kata-design-requirements.md), and base
- Sandbox based top API
- Storage and network hotplug API
- Plugin frameworks for external proprietary Kata runtime extensions
- Built-in shim and proxy types and capabilities
## Sandbox Based API
### Sandbox Management API
@@ -57,7 +56,7 @@ To fulfill the [Kata design requirements](kata-design-requirements.md), and base
|Name|Description|
|---|---|
|`sandbox.GetOOMEvent()`| Monitor the OOM events that occur in the sandbox..|
|`sandbox.UpdateRuntimeMetrics()`| Update the shim/`hypervisor`'s metrics of the running sandbox.|
|`sandbox.UpdateRuntimeMetrics()`| Update the `shim/hypervisor` metrics of the running sandbox.|
|`sandbox.GetAgentMetrics()`| Get metrics of the agent and the guest in the running sandbox.|
## Plugin framework for external proprietary Kata runtime extensions
@@ -99,32 +98,3 @@ Built-in implementations include:
### Sandbox Connection Plugin Workflow
![Sandbox Connection Plugin Workflow](https://raw.githubusercontent.com/bergwolf/raw-contents/master/kata/Sandbox-Connection.png "Sandbox Connection Plugin Workflow")
## Built-in Shim and Proxy Types and Capabilities
### Built-in shim/proxy sandbox configurations
- Supported shim configurations:
|Name|Description|
|---|---|
|`noopshim`|Do not start any shim process.|
|`ccshim`| Start the cc-shim binary.|
|`katashim`| Start the `kata-shim` binary.|
|`katashimbuiltin`|No standalone shim process but shim functionality APIs are exported.|
- Supported proxy configurations:
|Name|Description|
|---|---|
|`noopProxy`| a dummy proxy implementation of the proxy interface, only used for testing purpose.|
|`noProxy`|generic implementation for any case where no actual proxy is needed.|
|`ccProxy`|run `ccProxy` to proxy between runtime and agent.|
|`kataProxy`|run `kata-proxy` to translate Yamux connections between runtime and Kata agent. |
|`kataProxyBuiltin`| no standalone proxy process and connect to Kata agent with internal Yamux translation.|
### Built-in Shim Capability
Built-in shim capability is implemented by removing standalone shim process, and
supporting the shim related APIs.
### Built-in Proxy Capability
Built-in proxy capability is achieved by removing standalone proxy process, and
connecting to Kata agent with a custom gRPC dialer that is internal Yamux translation.
The behavior is enabled when proxy is configured as `kataProxyBuiltin`.

View File

@@ -22,10 +22,10 @@ the multiple hypervisors and virtual machine monitors that Kata supports.
## Mapping container concepts to virtual machine technologies
A typical deployment of Kata Containers will be in Kubernetes by way of a Container Runtime Interface (CRI) implementation. On every node,
Kubelet will interact with a CRI implementor (such as containerd or CRI-O), which will in turn interface with Kata Containers (an OCI based runtime).
Kubelet will interact with a CRI implementer (such as containerd or CRI-O), which will in turn interface with Kata Containers (an OCI based runtime).
The CRI API, as defined at the [Kubernetes CRI-API repo](https://github.com/kubernetes/cri-api/), implies a few constructs being supported by the
CRI implementation, and ultimately in Kata Containers. In order to support the full [API](https://github.com/kubernetes/cri-api/blob/a6f63f369f6d50e9d0886f2eda63d585fbd1ab6a/pkg/apis/runtime/v1alpha2/api.proto#L34-L110) with the CRI-implementor, Kata must provide the following constructs:
CRI implementation, and ultimately in Kata Containers. In order to support the full [API](https://github.com/kubernetes/cri-api/blob/a6f63f369f6d50e9d0886f2eda63d585fbd1ab6a/pkg/apis/runtime/v1alpha2/api.proto#L34-L110) with the CRI-implementer, Kata must provide the following constructs:
![API to construct](./arch-images/api-to-construct.png)
@@ -41,14 +41,9 @@ Each hypervisor or VMM varies on how or if it handles each of these.
## Kata Containers Hypervisor and VMM support
Kata Containers is designed to support multiple virtual machine monitors (VMMs) and hypervisors.
Kata Containers supports:
- [ACRN hypervisor](https://projectacrn.org/)
- [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor)/[KVM](https://www.linux-kvm.org/page/Main_Page)
- [Firecracker](https://github.com/firecracker-microvm/firecracker)/KVM
- [QEMU](http://www.qemu-project.org/)/KVM
Kata Containers [supports multiple hypervisors](../hypervisors.md).
Which configuration to use will depend on the end user's requirements. Details of each solution and a summary are provided below.
Details of each solution and a summary are provided below.
### QEMU/KVM
@@ -62,7 +57,7 @@ be changed by editing the runtime [`configuration`](./architecture.md/#configura
Devices and features used:
- virtio VSOCK or virtio serial
- virtio block or virtio SCSI
- virtio net
- [virtio net](https://www.redhat.com/en/virtio-networking-series)
- virtio fs or virtio 9p (recommend: virtio fs)
- VFIO
- hotplug
@@ -105,25 +100,34 @@ Devices used:
### Cloud Hypervisor/KVM
Cloud Hypervisor, based on [rust-VMM](https://github.com/rust-vmm), is designed to have a lighter footprint and attack surface. For Kata Containers,
relative to Firecracker, the Cloud Hypervisor configuration provides better compatibility at the expense of exposing additional devices: file system
sharing and direct device assignment. As of the 1.10 release of Kata Containers, Cloud Hypervisor does not support device hotplug, and as a result
does not support updating container resources after boot, or utilizing block based volumes. While Cloud Hypervisor does support VFIO, Kata is still adding
this support. As of 1.10, Kata does not support block based volumes or direct device assignment. See [Cloud Hypervisor device support documentation](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/docs/device_model.md)
for more details on Cloud Hypervisor.
[Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor), based
on [rust-vmm](https://github.com/rust-vmm), is designed to have a
lighter footprint and smaller attack surface for running modern cloud
workloads. Kata Containers with Cloud
Hypervisor provides mostly complete compatibility with Kubernetes
comparable to the QEMU configuration. As of the 1.12 and 2.0.0 release
of Kata Containers, the Cloud Hypervisor configuration supports both CPU
and memory resize, device hotplug (disk and VFIO), file-system sharing through virtio-fs,
block-based volumes, booting from VM images backed by pmem device, and
fine-grained seccomp filters for each VMM threads (e.g. all virtio
device worker threads). Please check [this GitHub Project](https://github.com/orgs/kata-containers/projects/21)
for details of ongoing integration efforts.
Devices used:
- virtio VSOCK
Devices and features used:
- virtio VSOCK or virtio serial
- virtio block
- virtio net
- virtio fs
- virtio pmem
- VFIO
- hotplug
- seccomp filters
- [HTTP OpenAPI](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/vmm/src/api/openapi/cloud-hypervisor.yaml)
### Summary
| Solution | release introduced | brief summary |
|-|-|-|
| QEMU | 1.0 | upstream QEMU, with support for hotplug and filesystem sharing |
| NEMU | 1.4 | Deprecated, removed as of 1.10 release. Slimmed down fork of QEMU, with experimental support of virtio-fs |
| Cloud Hypervisor | 1.10 | upstream Cloud Hypervisor with rich feature support, e.g. hotplug, VFIO and FS sharing|
| Firecracker | 1.5 | upstream Firecracker, rust-VMM based, no VFIO, no FS sharing, no memory/CPU hotplug |
| QEMU-virtio-fs | 1.7 | upstream QEMU with support for virtio-fs. Will be removed once virtio-fs lands in upstream QEMU |
| Cloud Hypervisor | 1.10 | rust-VMM based, includes VFIO and FS sharing through virtio-fs, no hotplug |
| QEMU | 1.0 | upstream QEMU, with support for hotplug and filesystem sharing |

View File

@@ -185,7 +185,7 @@ in Kibana:
![Kata tags in EFK](./images/efk_syslog_entry_detail.png).
We can however further sub-parse the Kata entries using the
[Fluentd plugins](https://docs.fluentbit.io/manual/v/1.3/parser/logfmt) that will parse
[Fluentd plugins](https://docs.fluentbit.io/manual/pipeline/parsers/logfmt) that will parse
`logfmt` formatted data. We can utilise these to parse the sub-fields using a Fluentd filter
section. At the same time, we will prefix the new fields with `kata_` to make it clear where
they have come from:
@@ -222,7 +222,7 @@ test to check the parsing works. The resulting output from Fluentd is:
"_COMM":"kata-runtime",
"_EXE":"/opt/kata/bin/kata-runtime",
"SYSLOG_TIMESTAMP":"Feb 21 10:31:27 ",
"_CMDLINE":"/opt/kata/bin/kata-runtime --kata-config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
"_CMDLINE":"/opt/kata/bin/kata-runtime --config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
"SYSLOG_PID":"14314",
"_PID":"14314",
"MESSAGE":"time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox",
@@ -281,7 +281,7 @@ own file (rather than into the system journal).
```bash
#!/bin/bash
/opt/kata/bin/kata-runtime --kata-config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
/opt/kata/bin/kata-runtime --config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
```
And then we'll add the Fluentd config section to parse that file. Note, we inform the parser that Kata is

View File

@@ -56,8 +56,9 @@ There are some limitations with this approach:
As was mentioned above, not all containers need the same modules, therefore using
the configuration file for specifying the list of kernel modules per [POD][3] can
be a pain. Unlike the configuration file, annotations provide a way to specify
custom configurations per POD.
be a pain.
Unlike the configuration file, [annotations](how-to-set-sandbox-config-kata.md)
provide a way to specify custom configurations per POD.
The list of kernel modules and parameters can be set using the annotation
`io.katacontainers.config.agent.kernel_modules` as a semicolon separated
@@ -101,7 +102,7 @@ spec:
tty: true
```
> **Note**: To pass annotations to Kata containers, [`CRI` must to be configured correctly](how-to-set-sandbox-config-kata.md#cri-configuration)
> **Note**: To pass annotations to Kata containers, [CRI-O must be configured correctly](how-to-set-sandbox-config-kata.md#cri-o-configuration)
[1]: ../../src/runtime
[2]: ../../src/agent

View File

@@ -34,7 +34,7 @@ Also you should ensure that `kubectl` working correctly.
Start Prometheus by utilizing our sample manifest:
```
$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/prometheus.yml
$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/prometheus.yml
```
This will create a new namespace, `prometheus`, and create the following resources:
@@ -60,7 +60,7 @@ go_gc_duration_seconds{quantile="0.75"} 0.000229911
`kata-monitor` can be started on the cluster as follows:
```
$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/kata-monitor-daemonset.yml
$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/kata-monitor-daemonset.yml
```
This will create a new namespace `kata-system` and a `daemonset` in it.
@@ -73,7 +73,7 @@ Once the `daemonset` is running, Prometheus should discover `kata-monitor` as a
Run this command to run Grafana in Kubernetes:
```
$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/grafana.yml
$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/grafana.yml
```
This will create deployment and service for Grafana under namespace `prometheus`.
@@ -99,7 +99,7 @@ You can import this dashboard using Grafana UI, or using `curl` command in conso
$ curl -XPOST -i localhost:3000/api/dashboards/import \
-u admin:admin \
-H "Content-Type: application/json" \
-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/dashboard.json )}"
-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/dashboard.json )}"
```
## References

View File

@@ -3,6 +3,11 @@
Kata Containers gives users freedom to customize at per-pod level, by setting
a wide range of Kata specific annotations in the pod specification.
Some annotations may be [restricted](#restricted-annotations) by the
configuration file for security reasons, notably annotations that could lead the
runtime to execute programs on the host. Such annotations are marked with _(R)_ in
the tables below.
# Kata Configuration Annotations
There are several kinds of Kata configurations and they are listed below.
@@ -26,6 +31,7 @@ There are several kinds of Kata configurations and they are listed below.
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.config.agent.enable_tracing` | `boolean` | enable tracing for the agent |
| `io.katacontainers.config.agent.container_pipe_size` | uint32 | specify the size of the std(in/out) pipes created for containers |
| `io.katacontainers.config.agent.kernel_modules` | string | the list of kernel modules and their parameters that will be loaded in the guest kernel. Semicolon separated list of kernel modules and their parameters. These modules will be loaded in the guest kernel using `modprobe`(8). E.g., `e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0` |
| `io.katacontainers.config.agent.trace_mode` | string | the trace mode for the agent |
| `io.katacontainers.config.agent.trace_type` | string | the trace type for the agent |
@@ -38,17 +44,24 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.block_device_cache_noflush` | `boolean` | Denotes whether flush requests for the device are ignored |
| `io.katacontainers.config.hypervisor.block_device_cache_set` | `boolean` | cache-related options will be set to block devices or not |
| `io.katacontainers.config.hypervisor.block_device_driver` | string | the driver to be used for block device, valid values are `virtio-blk`, `virtio-scsi`, `nvdimm`|
| `io.katacontainers.config.hypervisor.cpu_features` | `string` | Comma-separated list of CPU features to pass to the CPU (QEMU) |
| `io.katacontainers.config.hypervisor.ctlpath` (R) | `string` | Path to the `acrnctl` binary for the ACRN hypervisor |
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
| `io.katacontainers.config.hypervisor.default_vcpus` | uint32| the default vCPUs assigned for a VM by the hypervisor |
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
| `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) |
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
| `io.katacontainers.config.hypervisor.enable_iommu_platform` | `boolean` | enable `iommu` on CCW devices (QEMU s390x) |
| `io.katacontainers.config.hypervisor.enable_iommu` | `boolean` | enable `iommu` on Q35 (QEMU x86_64) |
| `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver |
| `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor |
| `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
| `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) |
| `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) |
| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
| `io.katacontainers.config.hypervisor.file_mem_backend` | string | file based memory backend root directory |
| `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory |
| `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
| `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
| `io.katacontainers.config.hypervisor.guest_hook_path` | string | the path within the VM that will be used for drop in hooks |
@@ -59,7 +72,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.initrd_hash` | string | container guest initrd SHA-512 hash value |
| `io.katacontainers.config.hypervisor.initrd` | string | the guest initrd image that will run in the container VM |
| `io.katacontainers.config.hypervisor.jailer_hash` | string | container jailer SHA-512 hash value |
| `io.katacontainers.config.hypervisor.jailer_path` | string | the jailer that will constrain the container VM |
| `io.katacontainers.config.hypervisor.jailer_path` (R) | string | the jailer that will constrain the container VM |
| `io.katacontainers.config.hypervisor.kernel_hash` | string | container kernel image SHA-512 hash value |
| `io.katacontainers.config.hypervisor.kernel_params` | string | additional guest kernel parameters |
| `io.katacontainers.config.hypervisor.kernel` | string | the kernel used to boot the container VM |
@@ -69,19 +82,19 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.memory_slots` | uint32| the memory slots assigned to the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.msize_9p` | uint32 | the `msize` for 9p shares |
| `io.katacontainers.config.hypervisor.path` | string | the hypervisor that will run the container VM |
| `io.katacontainers.config.hypervisor.pcie_root_port` | specify the number of PCIe Root Port devices. The PCIe Root Port device is used to hot-plug a PCIe device (QEMU) |
| `io.katacontainers.config.hypervisor.shared_fs` | string | the shared file system type, either `virtio-9p` or `virtio-fs` |
| `io.katacontainers.config.hypervisor.use_vsock` | `boolean` | specify use of `vsock` for agent communication |
| `io.katacontainers.config.hypervisor.vhost_user_store_path` (R) | `string` | specify the directory path where vhost-user devices related folders, sockets and device nodes should be (QEMU) |
| `io.katacontainers.config.hypervisor.virtio_fs_cache_size` | uint32 | virtio-fs DAX cache size in `MiB` |
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` |
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
# CRI Configuration
# CRI-O Configuration
In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.
# containerd Configuration
For containerd, annotations specified in the pod spec are passed down to Kata
starting with version `1.3.0` of containerd. Additionally, extra configuration is
needed for containerd, by providing a `pod_annotations` field in the containerd config
@@ -94,14 +107,16 @@ for passing annotations to Kata from containerd:
$ cat /etc/containerd/config
....
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.runc.v1"
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata.options]
BinaryName = "/usr/bin/kata-runtime"
....
```
Additional documentation on the above configuration can be found in the
Additional documentation on the above configuration can be found in the
[containerd docs](https://github.com/containerd/cri/blob/8d5a8355d07783ba2f8f451209f6bdcc7c412346/docs/config.md).
# Example - Using annotations
@@ -159,3 +174,31 @@ spec:
stdin: true
tty: true
```
# Restricted annotations
Some annotations are _restricted_, meaning that the configuration file specifies
the acceptable values. Currently, only hypervisor annotations are restricted,
for security reason, with the intent to control which binaries the Kata
Containers runtime will launch on your behalf.
The configuration file validates the annotation _name_ as well as the annotation
_value_.
The acceptable annotation names are defined by the `enable_annotations` entry in
the configuration file.
For restricted annotations, an additional configuration entry provides a list of
acceptable values. Since most restricted annotations are intended to control
which binaries the runtime can execute, the valid value is generally provided by
a shell pattern, as defined by `glob(3)`. The table below provides the name of
the configuration entry:
| Key | Config file entry | Comments |
|-------| ----- | ----- |
| `ctlpath` | `valid_ctlpaths` | Valid paths for `acrnctl` binary |
| `file_mem_backend` | `valid_file_mem_backends` | Valid locations for the file-based memory backend root directory |
| `jailer_path` | `valid_jailer_paths`| Valid paths for the jailer constraining the container VM (Firecracker) |
| `path` | `valid_hypervisor_paths` | Valid hypervisors to run the container VM |
| `vhost_user_store_path` | `valid_vhost_user_store_paths` | Valid paths for vhost-user related files|
| `virtio_fs_daemon` | `valid_virtio_fs_daemon_paths` | Valid paths for the `virtiofsd` daemon |

View File

@@ -7,10 +7,9 @@
* [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
* [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
* [Start Kubernetes](#start-kubernetes)
* [Configure Pod Network](#configure-pod-network)
* [Install a Pod Network](#install-a-pod-network)
* [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
* [Create runtime class for Kata Containers](#create-runtime-class-for-kata-containers)
* [Run pod in Kata Containers](#run-pod-in-kata-containers)
* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
* [Delete created pod](#delete-created-pod)
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
@@ -19,6 +18,9 @@ The Kubernetes cluster will use the
[CRI containerd plugin](https://github.com/containerd/cri) and
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
## Requirements
- Kubernetes, Kubelet, `kubeadm`
@@ -123,33 +125,43 @@ $ sudo systemctl daemon-reload
$ sudo -E kubectl get pods
```
## Configure Pod Network
## Install a Pod Network
A pod network plugin is needed to allow pods to communicate with each other.
You can find more about CNI plugins from the [Creating a cluster with `kubeadm`](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions) guide.
By default the CNI plugin binaries is installed under `/opt/cni/bin` (in package `kubernetes-cni`), you only need to create a configuration file for CNI plugin.
- Install the `flannel` plugin by following the
[Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
guide, starting from the **Installing a pod network** section.
- Create a pod network using flannel
> **Note:** There is no known way to determine programmatically the best version (commit) to use.
> See https://github.com/coreos/flannel/issues/995.
```bash
$ sudo -E mkdir -p /etc/cni/net.d
$ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
```
$ sudo -E cat > /etc/cni/net.d/10-mynet.conf <<EOF
{
"cniVersion": "0.2.0",
"name": "mynet",
"type": "bridge",
"bridge": "cni0",
"isGateway": true,
"ipMasq": true,
"ipam": {
"type": "host-local",
"subnet": "172.19.0.0/24",
"routes": [
{ "dst": "0.0.0.0/0" }
]
}
}
EOF
- Wait for the pod network to become available
```bash
# number of seconds to wait for pod network to become available
$ timeout_dns=420
$ while [ "$timeout_dns" -gt 0 ]; do
if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
break
fi
sleep 1s
((timeout_dns--))
done
```
- Check the pod network is running
```bash
$ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
```
## Allow pods to run in the master node
@@ -160,38 +172,24 @@ By default, the cluster will not schedule pods in the master node. To enable mas
$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
```
## Create runtime class for Kata Containers
## Create an untrusted pod using Kata Containers
By default, all pods are created with the default runtime configured in CRI containerd plugin.
From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods.
```bash
$ cat > runtime.yaml <<EOF
apiVersion: node.k8s.io/v1beta1
kind: RuntimeClass
metadata:
name: kata
handler: kata
EOF
$ sudo -E kubectl apply -f runtime.yaml
```
## Run pod in Kata Containers
If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the
If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
[Kata Containers runtime](../../src/runtime/README.md).
- Create an pod configuration that using Kata Containers runtime
- Create an untrusted pod configuration
```bash
$ cat << EOT | tee nginx-kata.yaml
$ cat << EOT | tee nginx-untrusted.yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx-kata
name: nginx-untrusted
annotations:
io.kubernetes.cri.untrusted-workload: "true"
spec:
runtimeClassName: kata
containers:
- name: nginx
image: nginx
@@ -199,9 +197,9 @@ If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod w
EOT
```
- Create the pod
- Create an untrusted pod
```bash
$ sudo -E kubectl apply -f nginx-kata.yaml
$ sudo -E kubectl apply -f nginx-untrusted.yaml
```
- Check pod is running
@@ -218,5 +216,5 @@ If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod w
## Delete created pod
```bash
$ sudo -E kubectl delete -f nginx-kata.yaml
$ sudo -E kubectl delete -f nginx-untrusted.yaml
```

View File

@@ -91,7 +91,7 @@ To configure Kata Containers with ACRN, copy the generated `configuration-acrn.t
The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)
```bash
$ sudo kata-runtime --kata-show-default-config-paths
$ sudo kata-runtime --show-default-config-paths
```
>**Warning:** Please offline CPUs using [this](offline_cpu.sh) script, else VM launches will fail.

View File

@@ -46,6 +46,7 @@ overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
- `enable_template = true`
- `initrd =` is set
- `image =` option is commented out or removed
- `shared_fs` should not be `virtio-fs`
Then you can create a VM templating for later usage by calling
```

68
docs/hypervisors.md Normal file
View File

@@ -0,0 +1,68 @@
# Hypervisors
* [Hypervisors](#hypervisors)
* [Introduction](#introduction)
* [Types](#types)
* [Determine currently configured hypervisor](#determine-currently-configured-hypervisor)
* [Choose a Hypervisor](#choose-a-hypervisor)
## Introduction
Kata Containers supports multiple hypervisors. This document provides a very
high level overview of the available hypervisors, giving suggestions as to
which hypervisors you may wish to investigate further.
> **Note:**
>
> This document is not prescriptive or authoritative:
>
> - It is up to you to decide which hypervisors may be most appropriate for
> your use-case.
> - Refer to the official documentation for each hypervisor for further details.
## Types
Since each hypervisor offers different features and options, Kata Containers
provides a separate
[configuration file](/src/runtime/README.md#configuration)
for each. The configuration files contain comments explaining which options
are available, their default values and how each setting can be used.
> **Note:**
>
> The simplest way to switch between hypervisors is to create a symbolic link
> to the appropriate hypervisor-specific configuration file.
| Hypervisor | Written in | Architectures | Type | Configuration file |
|-|-|-|-|-|
[ACRN] | C | `x86_64` | Type 1 (bare metal) | `configuration-acrn.toml` |
[Cloud Hypervisor] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-clh.toml` |
[Firecracker] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-fc.toml` |
[QEMU] | C | all | Type 2 ([KVM]) | `configuration-qemu.toml` |
## Determine currently configured hypervisor
```bash
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/' | grep Path
```
## Choose a Hypervisor
The table below provides a brief summary of some of the differences between
the hypervisors:
| Hypervisor | Summary | Features | Limitations | Container Creation speed | Memory density | Use cases | Comment |
|-|-|-|-|-|-|-|-|
[ACRN] | Safety critical and real-time workloads | | | excellent | excellent | Embedded and IOT systems | For advanced users |
[Cloud Hypervisor] | Low latency, small memory footprint, small attack surface | Minimal | | excellent | excellent | High performance modern cloud workloads | |
[Firecracker] | Very slimline | Extremely minimal | Doesn't support all device types | excellent | excellent | Serverless / FaaS | |
[QEMU] | Lots of features | Lots | | good | good | Good option for most users | | All users |
For further details, see the [Virtualization in Kata Containers](design/virtualization.md) document and the official documentation for each hypervisor.
[ACRN]: https://projectacrn.org
[Cloud Hypervisor]: https://github.com/cloud-hypervisor/cloud-hypervisor
[Firecracker]: https://github.com/firecracker-microvm/firecracker
[KVM]: https://en.wikipedia.org/wiki/Kernel-based_Virtual_Machine
[QEMU]: http://www.qemu-project.org

View File

@@ -51,7 +51,6 @@ Kata packages are provided by official distribution repositories for:
|----------------------------------------------------------|--------------------------------------------------------------------------------|
| [CentOS](centos-installation-guide.md) | 8 |
| [Fedora](fedora-installation-guide.md) | 32, Rawhide |
| [openSUSE](opensuse-installation-guide.md) | [Leap 15.1](opensuse-leap-15.1-installation-guide.md)<br>Leap 15.2, Tumbleweed |
> **Note::**
>

View File

@@ -3,9 +3,15 @@
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf install -y centos-release-advanced-virtualization
$ sudo -E dnf module disable -y virt:rhel
$ source /etc/os-release
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/advanced-virt.repo
[advanced-virt]
name=Advanced Virtualization
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/advanced-virtualization
enabled=1
gpgcheck=1
skip_if_unavailable=1
EOF
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
[kata-containers]
name=Kata Containers
@@ -14,7 +20,8 @@
gpgcheck=1
skip_if_unavailable=1
EOF
$ sudo -E dnf install -y kata-containers
$ sudo -E dnf module disable -y virt:rhel
$ sudo -E dnf install -y kata-runtime
```
2. Decide which container manager to use and select the corresponding link that follows:

View File

@@ -18,7 +18,7 @@
>
> - If you decide to proceed and install a Kata Containers release, you can
> still check for the latest version of Kata Containers by running
> `kata-runtime kata-check --only-list-releases`.
> `kata-runtime check --only-list-releases`.
>
> - These instructions will not work for Fedora 31 and higher since those
> distribution versions only support cgroups version 2 by default. However,

View File

@@ -3,7 +3,7 @@
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf -y install kata-containers
$ sudo -E dnf -y install kata-runtime
```
2. Decide which container manager to use and select the corresponding link that follows:

View File

@@ -6,7 +6,7 @@
* [Install Kata](#install-kata)
* [Create a Kata-enabled Image](#create-a-kata-enabled-image)
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime kata-check` checks for nested virtualization, but does not fail if support is not found.
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime check` checks for nested virtualization, but does not fail if support is not found.
As a pre-requisite this guide assumes an installed and configured instance of the [Google Cloud SDK](https://cloud.google.com/sdk/downloads). For a zero-configuration option, all of the commands below were been tested under [Google Cloud Shell](https://cloud.google.com/shell/) (as of Jun 2018). Verify your `gcloud` installation and configuration:

View File

@@ -54,7 +54,7 @@ to enable nested virtualization can be found on the
[KVM Nested Guests page](https://www.linux-kvm.org/page/Nested_Guests)
Alternatively, and for other architectures, the Kata Containers built in
[`kata-check`](../../src/runtime/README.md#hardware-requirements)
[`check`](../../src/runtime/README.md#hardware-requirements)
command can be used *inside Minikube* once Kata has been installed, to check for compatibility.
## Setting up Minikube

View File

@@ -1,10 +0,0 @@
# Install Kata Containers on openSUSE
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E zypper -n install katacontainers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,11 +0,0 @@
# Install Kata Containers on openSUSE Leap 15.1
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/openSUSE_Leap_15.1/devel:kubic.repo"
$ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -2,6 +2,9 @@
* [Install Kata Containers](#install-kata-containers)
* [Configure Kata Containers](#configure-kata-containers)
* [Integration with non-compatible shim v2 Container Engines](#integration-with-non-compatible-shim-v2-container-engines)
* [Integration with Docker](#integration-with-docker)
* [Integration with Podman](#integration-with-podman)
* [Integration with shim v2 Container Engines](#integration-with-shim-v2-container-engines)
* [Remove Kata Containers snap package](#remove-kata-containers-snap-package)
@@ -11,7 +14,20 @@
Kata Containers can be installed in any Linux distribution that supports
[snapd](https://docs.snapcraft.io/installing-snapd).
Run the following command to install **Kata Containers**:
> NOTE: From Kata Containers 2.x, only the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
> is supported, note that some container engines (`docker`, `podman`, etc) may not
> be able to run Kata Containers 2.x.
Kata Containers 1.x is released through the *stable* channel while Kata Containers
2.x is available in the *candidate* channel.
Run the following command to install **Kata Containers 1.x**:
```sh
$ sudo snap install kata-containers --classic
```
Run the following command to install **Kata Containers 2.x**:
```sh
$ sudo snap install kata-containers --candidate --classic
@@ -30,6 +46,55 @@ $ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/confi
$ $EDITOR /etc/kata-containers/configuration.toml
```
## Integration with non-compatible shim v2 Container Engines
At the time of writing this document, `docker` and `podman` **do not support Kata
Containers 2.x, therefore Kata Containers 1.x must be used instead.**
The path to the runtime provided by the Kata Containers 1.x snap package is
`/snap/bin/kata-containers.runtime`, it should be used to run Kata Containers 1.x.
### Integration with Docker
`/etc/docker/daemon.json` is the configuration file for `docker`, use the
following configuration to add a new runtime (`kata`) to `docker`.
```json
{
"runtimes": {
"kata": {
"path": "/snap/bin/kata-containers.runtime"
}
}
}
```
Once the above configuration has been applied, use the
following commands to restart `docker` and run Kata Containers 1.x.
```sh
$ sudo systemctl restart docker
$ docker run -ti --runtime kata busybox sh
```
### Integration with Podman
`/usr/share/containers/containers.conf` is the configuration file for `podman`,
add the following configuration in the `[engine.runtimes]` section.
```toml
kata = [
"/snap/bin/kata-containers.runtime"
]
```
Once the above configuration has been applied, use the following command to run
Kata Containers 1.x with `podman`
```sh
$ sudo podman run -ti --runtime kata docker.io/library/busybox sh
```
## Integration with shim v2 Container Engines
The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the

View File

@@ -0,0 +1,15 @@
# Install Kata Containers on Ubuntu
1. Install the Kata Containers components with the following commands:
```bash
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
$ sudo -E apt-get update
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -1,56 +1,62 @@
# Table of Contents
* [Table of Contents](#table-of-contents)
* [Introduction](#introduction)
* [Helpful Links before starting](#helpful-links-before-starting)
* [Steps to enable Intel QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
* [Script variables](#script-variables)
* [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
* [Prepare the Clear Linux Host](#prepare-the-clear-linux-host)
* [Identify which PCI Bus the Intel QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
* [Install necessary bundles for Clear Linux](#install-necessary-bundles-for-clear-linux)
* [Download Intel QAT drivers](#download-intel-qat-drivers)
* [Copy Intel QAT configuration files and enable Virtual Functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
* [Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
* [Check Intel QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
* [Prepare Kata Containers](#prepare-kata-containers)
* [Download Kata kernel Source](#download-kata-kernel-source)
* [Build Kata kernel](#build-kata-kernel)
* [Copy Kata kernel](#copy-kata-kernel)
* [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
* [Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
* [Copy Kata rootfs](#copy-kata-rootfs)
* [Update Kata configuration to point to custom kernel and rootfs](#update-kata-configuration-to-point-to-custom-kernel-and-rootfs)
* [Verify Intel QAT works in a Docker Kata Containers container](#verify-intel-qat-works-in-a-docker-kata-containers-container)
* [Build OpenSSL Intel QAT engine container](#build-openssl-intel-qat-engine-container)
* [Test Intel QAT in Docker](#test-intel-qat-in-docker)
* [Troubleshooting](#troubleshooting)
* [Optional Scripts](#optional-scripts)
* [Verify Intel QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)
- [Table of Contents](#table-of-contents)
- [Introduction](#introduction)
- [Helpful Links before starting](#helpful-links-before-starting)
- [Steps to enable Intel® QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
- [Script variables](#script-variables)
- [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
- [Prepare the Ubuntu Host](#prepare-the-ubuntu-host)
- [Identify which PCI Bus the Intel® QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
- [Install necessary packages for Ubuntu](#install-necessary-packages-for-ubuntu)
- [Download Intel® QAT drivers](#download-intel-qat-drivers)
- [Copy Intel® QAT configuration files and enable virtual functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
- [Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
- [Check Intel® QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
- [Prepare Kata Containers](#prepare-kata-containers)
- [Download Kata kernel Source](#download-kata-kernel-source)
- [Build Kata kernel](#build-kata-kernel)
- [Copy Kata kernel](#copy-kata-kernel)
- [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
- [Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
- [Copy Kata rootfs](#copy-kata-rootfs)
- [Verify Intel® QAT works in a container](#verify-intel-qat-works-in-a-container)
- [Build OpenSSL Intel® QAT engine container](#build-openssl-intel-qat-engine-container)
- [Test Intel® QAT with the ctr tool](#test-intel-qat-with-the-ctr-tool)
- [Test Intel® QAT in Kubernetes](#test-intel-qat-in-kubernetes)
- [Troubleshooting](#troubleshooting)
- [Optional Scripts](#optional-scripts)
- [Verify Intel® QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)
# Introduction
Intel QuickAssist Technology (Intel QAT) provides hardware acceleration
Intel® QuickAssist Technology (QAT) provides hardware acceleration
for security (cryptography) and compression. These instructions cover the
steps for [Clear Linux](https://clearlinux.org) but can be adapted to any
Linux distribution. Your distribution may already have the Intel QAT
drivers, but it is likely they do not contain the necessary user space
components. These instructions guide the user on how to download the kernel
sources, compile kernel driver modules against those sources, and load them
onto the host as well as preparing a specially built Kata Containers kernel
and custom Kata Containers rootfs.
steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop)
which already include the QAT host driver. These instructions can be adapted to
any Linux distribution. These instructions guide the user on how to download
the kernel sources, compile kernel driver modules against those sources, and
load them onto the host as well as preparing a specially built Kata Containers
kernel and custom Kata Containers rootfs.
* Download kernel sources
* Compile Kata kernel
* Compile kernel driver modules against those sources
* Download rootfs
* Add driver modules to rootfs
* Build rootfs image
## Helpful Links before starting
[Intel QAT Engine](https://github.com/intel/QAT_Engine)
[Intel® QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)
[Intel QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)
[Intel® QuickAssist Technology Engine for OpenSSL](https://github.com/intel/QAT_Engine)
[Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)
[Intel QuickAssist Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
## Steps to enable Intel QAT in Kata Containers
## Steps to enable Intel® QAT in Kata Containers
There are some steps to complete only once, some steps to complete with every
reboot, and some steps to complete when the host kernel changes.
@@ -67,91 +73,95 @@ needed to point to updated drivers or different install locations.
Make sure to check [`01.org`](https://01.org/intel-quickassist-technology) for
the latest driver.
```sh
$ export QAT_DRIVER_VER=qat1.7.l.4.8.0-00005.tar.gz
$ export QAT_DRIVER_URL=https://01.org/sites/default/files/downloads/${QAT_DRIVER_VER}
```bash
$ export QAT_DRIVER_VER=qat1.7.l.4.12.0-00011.tar.gz
$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
$ export QAT_CONF_LOCATION=~/QAT_conf
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile
$ export QAT_SRC=~/src/QAT
$ export GOPATH=~/src/go
$ export OSBUILDER=~/src/osbuilder
$ export KATA_KERNEL_LOCATION=~/kata
$ export KATA_ROOTFS_LOCATION=~/kata
```
## Prepare the Clear Linux Host
## Prepare the Ubuntu Host
The host could be a bare metal instance or a virtual machine. If using a
virtual machine, make sure that KVM nesting is enabled. The following
instructions reference an Intel QAT. Some of the instructions must be
modified if using a different Intel QAT device. You can identify the Intel QAT
chipset by executing the following.
instructions reference an Intel® C62X chipset. Some of the instructions must be
modified if using a different Intel® QAT device. The Intel® QAT chipset can be
identified by executing the following.
### Identify which PCI Bus the Intel QAT card is on
### Identify which PCI Bus the Intel® QAT card is on
```sh
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
```
### Install necessary bundles for Clear Linux
### Install necessary packages for Ubuntu
Clear Linux version 30780 (Released August 13, 2019) includes a
`linux-firmware-qat` bundle that has the necessary QAT firmware along with a
functional QAT host driver that works with Kata Containers.
These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
also needs to be installed to be able to build the rootfs. To test that
everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
pass through of the virtual functions the kernel boot parameter needs to have
`INTEL_IOMMU=on`.
```sh
$ sudo swupd bundle-add network-basic linux-firmware-qat make c-basic go-basic containers-virt dev-utils devpkg-elfutils devpkg-systemd devpkg-ssl
$ sudo clr-boot-manager update
$ sudo systemctl enable --now docker
```bash
$ sudo apt update
$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
$ sudo update-grub
$ sudo reboot
```
### Download Intel QAT drivers
### Download Intel® QAT drivers
This will download the Intel QAT drivers from [`01.org`](https://01.org/intel-quickassist-technology).
This will download the [Intel® QAT drivers](https://01.org/intel-quickassist-technology).
Make sure to check the website for the latest version.
```sh
```bash
$ mkdir -p $QAT_SRC
$ cd $QAT_SRC
$ curl -L $QAT_DRIVER_URL | tar zx
```
### Copy Intel QAT configuration files and enable Virtual Functions
### Copy Intel® QAT configuration files and enable virtual functions
Modify the instructions below as necessary if using a different QAT hardware
Modify the instructions below as necessary if using a different Intel® QAT hardware
platform. You can learn more about customizing configuration files at the
[Intel QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
This section starts from a base config file and changes the `SSL` section to
`SHIM` to support the OpenSSL engine. There are more tweaks that you can make
depending on the use case and how many Intel QAT engines should be run. You
depending on the use case and how many Intel® QAT engines should be run. You
can find more information about how to customize in the
[Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://01.org/sites/default/files/downloads/336210qatswprogrammersguiderev006.pdf)
> **Note: This section assumes that a QAT `c6xx` platform is used.**
> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**
```sh
```bash
$ mkdir -p $QAT_CONF_LOCATION
$ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
$ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
```
### Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)
### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)
To enable virtual functions, the host OS should have IOMMU groups enabled. In
the UEFI Firmware Intel Virtualization Technology for Directed I/O
(Intel VT-d) must be enabled. Also, the kernel boot parameter should be
`intel_iommu=on` or `intel_iommu=ifgx_off`. The default in Clear Linux currently
is `intel_iommu=igfx_off` which should work with the Intel QAT device. The
following commands assume you installed an Intel QAT card, IOMMU is on, and
the UEFI Firmware Intel® Virtualization Technology for Directed I/O
(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be
`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
the instructions above. Check the output of `/proc/cmdline` to confirm. The
following commands assume you installed an Intel® QAT card, IOMMU is on, and
VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
complete, each virtual function passes into a Kata Containers container using
the PCIe device passthrough feature. For Kubernetes, the Intel device plugin
for Kubernetes handles the binding of the driver but the VFs still must be
the PCIe device passthrough feature. For Kubernetes, the
[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
for Kubernetes handles the binding of the driver, but the VFs still must be
enabled.
```sh
```bash
$ sudo modprobe vfio-pci
$ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
$ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
@@ -160,8 +170,10 @@ $ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/ueve
$ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
$ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
```
Loop through all the virtual functions and bind to the VFIO driver
```sh
```bash
$ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
do QAT_PCI_BUS_VF=$(basename $(readlink $f))
echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
@@ -169,22 +181,23 @@ $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
done
```
### Check Intel QAT virtual functions are enabled
### Check Intel® QAT virtual functions are enabled
If the following command returns empty, then the virtual functions are not
properly enabled. This command checks the enumerated device IDs for just the
virtual functions. Using the Intel QAT as an example, the physical device ID
virtual functions. Using the Intel® QAT as an example, the physical device ID
is `37c8` and virtual function device ID is `37c9`. The following command checks
if VF's are enabled for any of the currently known Intel QAT device ID's. The
if VF's are enabled for any of the currently known Intel® QAT device ID's. The
following `ls` command should show the 16 VF's bound to `VFIO-PCI`.
```sh
```bash
$ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
```
Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
It should match the device ID's of the VF's.
```sh
```bash
$ ls -la /sys/bus/pci/drivers/vfio-pci
```
@@ -201,16 +214,16 @@ There are some patches that must be installed as well, which the
`build-kernel.sh` script should automatically apply. If you are using a
different kernel version, then you might need to manually apply them. Since
the Kata Containers kernel has a minimal set of kernel flags set, you must
create a QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
Update the config to set some of the `CRYPTO` flags to enabled. This might
change with different kernel versions. We tested the following instructions
with kernel `v4.19.28-41`.
change with different kernel versions. The following instructions were tested
with kernel `v5.4.0-64-generic`.
```sh
```bash
$ mkdir -p $GOPATH
$ cd $GOPATH
$ go get -v github.com/kata-containers/packaging
$ cat << EOF > $GOPATH/src/github.com/kata-containers/packaging/kernel/configs/fragments/common/qat.conf
$ go get -v github.com/kata-containers/kata-containers
$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
CONFIG_PCIEAER=y
CONFIG_UIO=y
CONFIG_CRYPTO_HW=y
@@ -221,61 +234,70 @@ CONFIG_MODULE_SIG=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_DH=y
EOF
$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh setup
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
```
### Build Kata kernel
```sh
$ export LINUX_VER=$(ls -d kata*)
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata-linux-*)
$ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh build
$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
```
### Copy Kata kernel
```sh
```bash
$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
$ mkdir -p $KATA_KERNEL_LOCATION
$ cp $LINUX_VER/arch/x86/boot/bzImage $KATA_KERNEL_LOCATION/vmlinuz-${LINUX_VER}_qat
$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
```
### Prepare Kata root filesystem
These instructions build upon the OS builder instructions located in the
[Developer Guide](../Developer-Guide.md). The following instructions use Clear
Linux (Kata Containers default) as the root filesystem with systemd as the
init and will add in the `kmod` binary, which is not a standard binary in a
Kata rootfs image. The `kmod` binary is necessary to load the QAT kernel
modules when the virtual machine rootfs boots. You should install Docker on
your system before running the following commands. If you need to use a custom
`kata-agent`, then refer to the previous link on how to add it in.
[Developer Guide](../Developer-Guide.md). At this point it is recommended that
[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
then [Kata-deploy](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy)
is use to install Kata. This will make sure that the correct `agent` version
is installed into the rootfs in the steps below.
```sh
$ mkdir -p $OSBUILDER
$ cd $OSBUILDER
$ git clone https://github.com/kata-containers/osbuilder.git
$ export ROOTFS_DIR=${OSBUILDER}/osbuilder/rootfs-builder/rootfs
The following instructions use Debian as the root filesystem with systemd as
the init and will add in the `kmod` binary, which is not a standard binary in
a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT
kernel modules when the virtual machine rootfs boots.
```bash
$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
$ export EXTRA_PKGS='kmod'
```
Make sure that the `kata-agent` version matches the installed `kata-runtime`
version.
```sh
version. Also make sure the `kata-runtime` install location is in your `PATH`
variable. The following `AGENT_VERSION` can be set manually to match
the `kata-runtime` version if the following commands don't work.
```bash
$ export PATH=$PATH:/opt/kata/bin
$ cd $GOPATH
$ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
$ cd ${OSBUILDER}/osbuilder/rootfs-builder
$ cd ${OSBUILDER}/rootfs-builder
$ sudo rm -rf ${ROOTFS_DIR}
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh clearlinux'
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh debian'
```
### Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
After the Kata Containers kernel builds with the proper configuration flags,
you must build the Intel QAT drivers against that Kata Containers kernel
you must build the Intel® QAT drivers against that Kata Containers kernel
version in a similar way they were previously built for the host OS. You must
set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source
directory and build the Intel QAT drivers again.
directory and build the Intel® QAT drivers again. The `make` command will
install the Intel® QAT modules into the Kata rootfs.
```sh
```bash
$ cd $GOPATH
$ export LINUX_VER=$(ls -d kata*)
$ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
@@ -284,16 +306,18 @@ $ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Mak
$ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
$ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
$ cd $QAT_SRC
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --disable-qat-lkcf --enable-icp-sriov=guest
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
$ sudo -E make all -j$(nproc)
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j$(nproc)
```
The `usdm_drv` module also needs to be copied into the rootfs modules path and
`depmod` should be run.
```sh
$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/usr/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
```bash
$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers
$ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
$ cd ${OSBUILDER}/osbuilder/image-builder
$ cd ${OSBUILDER}/image-builder
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
```
@@ -302,84 +326,225 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
### Copy Kata rootfs
```sh
```bash
$ mkdir -p $KATA_ROOTFS_LOCATION
$ cp ${OSBUILDER}/osbuilder/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
```
### Update Kata configuration to point to custom kernel and rootfs
## Verify Intel® QAT works in a container
You must update the `configuration.toml` for Kata Containers to point to the
custom kernel, custom rootfs, and to specify which modules to load when the
virtual machine is booted when a container is run. The following example
assumes you installed an Intel QAT, and you need to load those modules.
```sh
$ sudo mkdir -p /etc/kata-containers
$ sudo cp /usr/share/defaults/kata-containers/configuration-qemu.toml /etc/kata-containers/configuration.toml
$ sudo sed -i "s|kernel_params = \"\"|kernel_params = \"modules-load=usdm_drv,qat_c62xvf\"|g" /etc/kata-containers/configuration.toml
$ sudo sed -i "s|\/usr\/share\/kata-containers\/kata-containers.img|${KATA_KERNEL_LOCATION}\/kata-containers.img|g" /etc/kata-containers/configuration.toml
$ sudo sed -i "s|\/usr\/share\/kata-containers\/vmlinuz.container|${KATA_ROOTFS_LOCATION}\/vmlinuz-${LINUX_VER}_qat|g" /etc/kata-containers/configuration.toml
```
## Verify Intel QAT works in a Docker Kata Containers container
The following instructions leverage an OpenSSL Dockerfile that builds the
Intel QAT engine to allow OpenSSL to offload crypto functions. It is a
convenient way to test that VFIO device passthrough for the Intel QAT VFs are
The following instructions uses a OpenSSL Dockerfile that builds the
Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a
convenient way to test that VFIO device passthrough for the Intel® QAT VFs are
working properly with the Kata Containers VM.
## Build OpenSSL Intel QAT engine container
### Build OpenSSL Intel® QAT engine container
Use the OpenSSL Intel QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine)
Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine)
to build a container image with an optimized OpenSSL engine for
Intel QAT. Using `docker build` with the Kata Containers runtime can sometimes
have issues. Therefore, we recommended you change the default runtime to
`runc` before doing a build. Instructions for this are below.
Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
have issues. Therefore, make sure that `runc` is the default Docker container
runtime.
```sh
```bash
$ cd $QAT_SRC
$ curl -O $QAT_DOCKERFILE
$ sudo sed -i 's/kata-runtime/runc/g' /etc/systemd/system/docker.service.d/50-runtime.conf
$ sudo systemctl daemon-reload && sudo systemctl restart docker
$ sudo docker build -t openssl-qat-engine .
```
> **Note: The Intel QAT driver version in this container might not match the
> Intel QAT driver compiled and loaded on the host when compiling.**
> **Note: The Intel® QAT driver version in this container might not match the
> Intel® QAT driver compiled and loaded on the host when compiling.**
### Test Intel QAT in Docker
### Test Intel® QAT with the ctr tool
The host should already be setup with 16 virtual functions of the Intel QAT
card bound to `VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing
of devices. Replace the number 90 with one of the VFs exposed in `/dev/vfio`.
It might require you to add an `IPC_LOCK` capability to your Docker runtime
depending on which rootfs you use.
The `ctr` tool can be used to interact with the containerd daemon. It may be
more convenient to use this tool to verify the kernel and image instead of
setting up a Kubernetes cluster. The correct Kata runtimes need to be added
to the containerd `config.toml`. Below is a sample snippet that can be added
to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.
```sh
$ sudo docker run -it --runtime=kata-runtime --cap-add=IPC_LOCK --cap-add=SYS_ADMIN --device=/dev/vfio/90 -v /dev:/dev -v ${QAT_CONF_LOCATION}:/etc openssl-qat-engine bash
```
[plugins.cri.containerd.runtimes.kata-qemu]
runtime_type = "io.containerd.kata-qemu.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-qemu.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
[plugins.cri.containerd.runtimes.kata-clh]
runtime_type = "io.containerd.kata-clh.v2"
privileged_without_host_devices = true
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata-clh.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
```
Below are some commands to run in the container image to verify Intel QAT is
In addition, containerd expects the binary to be in `/usr/local/bin` so add
this small script so that it redirects to be able to use either QEMU or
Cloud Hypervisor with Kata.
```bash
$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
```
After the OpenSSL image is built and imported into containerd, a Intel® QAT
virtual function exposed in the step above can be added to the `ctr` command.
Make sure to change the `/dev/vfio` number to one that actually exists on the
host system. When using the `ctr` tool, the`configuration.toml` for Kata needs
to point to the custom Kata kernel and rootfs built above and the Intel® QAT
modules in the Kata rootfs need to load at boot. The following steps assume that
`kata-deploy` was used to install Kata and QEMU is being tested. If using a
different hypervisor, different install method for Kata, or a different
Intel® QAT chipset then the command will need to be modified.
> **Note: The following was tested with
[containerd v1.3.9](https://github.com/containerd/containerd/releases/tag/v1.3.9).**
```bash
$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr images import openssl-qat-engine.tar
$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw docker.io/library/openssl-qat-engine:latest bash
```
Below are some commands to run in the container image to verify Intel® QAT is
working
```sh
bash-5.0# cat /proc/modules
bash-5.0# adf_ctl restart
bash-5.0# adf_ctl status
bash-5.0# openssl engine -c -t qat
root@67561dc2757a/ # cat /proc/modules
qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
root@67561dc2757a/ # adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
root@67561dc2757a/ # adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
root@67561dc2757a/ # openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
Test with Intel QAT card acceleration
### Test Intel® QAT in Kubernetes
```sh
bash-5.0# openssl speed -engine qat -elapsed -async_jobs 72 rsa2048
Start a Kubernetes cluster with containerd as the CRI. The host should
already be setup with 16 virtual functions of the Intel® QAT card bound to
`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices.
You might need to disable Docker before initializing Kubernetes. Be aware
that the OpenSSL container image built above will need to be exported from
Docker and imported into containerd.
If Kata is installed through [`kata-deploy`](https://github.com/kata-containers/kata-containers/blob/stable-2.0/tools/packaging/kata-deploy/README.md)
there will be multiple `configuration.toml` files associated with different
hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and
kernel modules to each `configuration.toml` as the default, instead use
[annotations](https://github.com/kata-containers/kata-containers/blob/stable-2.0/docs/how-to/how-to-load-kernel-modules-with-kata.md)
in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The
easy way to do this is to use `kata-deploy` which will install the Kata binaries
to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation
support. However, the `configuration.toml` needs to enable support for
annotations as well. The following configures both QEMU and Cloud Hypervisor
`configuration.toml` files that are currently available with Kata Container
versions 2.0 and higher.
```bash
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
```
Test with CPU acceleration
Export the OpenSSL image from Docker and import into containerd.
```bash
$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
```
The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/master/cmd/qat_plugin/README.md)
needs to be started so that the virtual functions can be discovered and
used by Kubernetes.
The following YAML file can be used to start a Kata container with Intel® QAT
support. If Kata is installed with `kata-deploy`, then the containerd
`configuration.toml` should have all of the Kata runtime classes already
populated and annotations supported. To use a Intel® QAT virtual function, the
Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as
described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot).
Edit the following to point to the correct Kata kernel and rootfs location
built with Intel® QAT support.
```bash
$ cat << EOF > kata-openssl-qat.yaml
apiVersion: v1
kind: Pod
metadata:
name: kata-openssl-qat
labels:
app: kata-openssl-qat
annotations:
io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
spec:
runtimeClassName: kata-qemu
containers:
- name: kata-openssl-qat
image: docker.io/library/openssl-qat-engine:latest
imagePullPolicy: IfNotPresent
resources:
limits:
qat.intel.com/generic: 1
cpu: 1
securityContext:
capabilities:
add: ["IPC_LOCK", "SYS_ADMIN"]
volumeMounts:
- mountPath: /etc/c6xxvf_dev0.conf
name: etc-mount
- mountPath: /dev
name: dev-mount
volumes:
- name: dev-mount
hostPath:
path: /dev
- name: etc-mount
hostPath:
path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
EOF
```
Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is
working with the Intel® QAT engine.
```bash
$ kubectl apply -f kata-openssl-qat.yaml
```
```sh
bash-5.0# openssl speed -elapsed rsa2048
$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
Restarting all devices.
Processing /etc/c6xxvf_dev0.conf
$ kubectl exec -it kata-openssl-qat -- adf_ctl status
Checking status of all devices.
There is 1 QAT acceleration device(s) in the system:
qat_dev0 - type: c6xxvf, inst_id: 0, node_id: 0, bsf: 0000:01:01.0, #accel: 1 #engines: 1 state: up
$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
(qat-hw) Reference implementation of QAT crypto engine v0.6.1
[RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
[ available ]
```
### Troubleshooting
@@ -412,9 +577,9 @@ c6xxvf_dev10.conf c6xxvf_dev13.conf c6xxvf_dev2.conf c6xxvf_dev5.conf c6xxvf
```
* Check `dmesg` inside the container to see if there are any issues with the
Intel QAT driver.
Intel® QAT driver.
* If there are issues building the OpenSSL Intel QAT container image, then
* If there are issues building the OpenSSL Intel® QAT container image, then
check to make sure that runc is the default runtime for building container.
```sh
@@ -425,17 +590,18 @@ Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"
## Optional Scripts
### Verify Intel QAT card counters are incremented
### Verify Intel® QAT card counters are incremented
Use the `lspci` command to figure out which PCI bus the Intel QAT accelerators
are on. The counters will increase when the accelerator is actively being
used. To verify QAT is actively accelerating the containerized application,
use the following instructions to check if any of the counters are
incrementing. You will have to change the PCI device ID to match your system.
To check the built in firmware counters, the Intel® QAT driver has to be compiled
and installed to the host and can't rely on the built in host driver. The
counters will increase when the accelerator is actively being used. To verify
Intel® QAT is actively accelerating the containerized application, use the
following instructions to check if any of the counters increment. Make
sure to change the PCI Device ID to match whats in the system.
```sh
```bash
$ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
$ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
```
```

View File

@@ -0,0 +1,112 @@
# Kata Containers with SGX
- [Check if SGX is enabled](#check-if-sgx-is-enabled)
- [Install Host kernel with SGX support](#install-host-kernel-with-sgx-support)
- [Install Guest kernel with SGX support](#install-guest-kernel-with-sgx-support)
- [Run Kata Containers with SGX enabled](#run-kata-containers-with-sgx-enabled)
Intel® Software Guard Extensions (SGX) is a set of instructions that increases the security
of applications code and data, giving them more protections from disclosure or modification.
> **Note:** At the time of writing this document, SGX patches have not landed on the Linux kernel
> project, so specific versions for guest and host kernels must be installed to enable SGX.
## Check if SGX is enabled
Run the following command to check if your host supports SGX.
```sh
$ grep -o sgx /proc/cpuinfo
```
Continue to the following section if the output of the above command is empty,
otherwise continue to section [Install Guest kernel with SGX support](#install-guest-kernel-with-sgx-support)
## Install Host kernel with SGX support
The following commands were tested on Fedora 32, they might work on other distros too.
```sh
$ git clone --depth=1 https://github.com/intel/kvm-sgx
$ pushd kvm-sgx
$ cp /boot/config-$(uname -r) .config
$ yes "" | make oldconfig
$ # In the following step, enable: INTEL_SGX and INTEL_SGX_VIRTUALIZATION
$ make menuconfig
$ make -j$(($(nproc)-1)) bzImage
$ make -j$(($(nproc)-1)) modules
$ sudo make modules_install
$ sudo make install
$ popd
$ sudo reboot
```
> **Notes:**
> * Run: `mokutil --sb-state` to check whether secure boot is enabled, if so, you will need to sign the kernel.
> * You'll lose SGX support when a new distro kernel is installed and the system rebooted.
Once you have restarted your system with the new brand Linux Kernel with SGX support, run
the following command to make sure it's enabled. If the output is empty, go to the BIOS
setup and enable SGX manually.
```sh
$ grep -o sgx /proc/cpuinfo
```
## Install Guest kernel with SGX support
Install the guest kernel in the Kata Containers directory, this way it can be used to run
Kata Containers.
```sh
$ curl -LOk https://github.com/devimc/kvm-sgx/releases/download/v0.0.1/kata-virtiofs-sgx.tar.gz
$ sudo tar -xf kata-virtiofs-sgx.tar.gz -C /usr/share/kata-containers/
$ sudo sed -i 's|kernel =|kernel = "/usr/share/kata-containers/vmlinux-virtiofs-sgx.container"|g' \
/usr/share/defaults/kata-containers/configuration.toml
```
## Run Kata Containers with SGX enabled
Before running a Kata Container make sure that your version of `crio` or `containerd`
supports annotations.
For `containerd` check in `/etc/containerd/config.toml` that the list of `pod_annotations` passed
to the `sandbox` are: `["io.katacontainers.*", "sgx.intel.com/epc"]`.
> `sgx.yaml`
```yaml
apiVersion: v1
kind: Pod
metadata:
name: sgx
annotations:
sgx.intel.com/epc: "32Mi"
spec:
terminationGracePeriodSeconds: 0
runtimeClassName: kata
containers:
- name: c1
image: busybox
command:
- sh
stdin: true
tty: true
volumeMounts:
- mountPath: /dev/sgx/
name: test-volume
volumes:
- name: test-volume
hostPath:
path: /dev/sgx/
type: Directory
```
```sh
$ kubectl apply -f sgx.yaml
$ kubectl exec -ti sgx ls /dev/sgx/
enclave provision
```
The output of the latest command shouldn't be empty, otherwise check
your system environment to make sure SGX is fully supported.
[1]: github.com/cloud-hypervisor/cloud-hypervisor/

View File

@@ -10,6 +10,9 @@ Currently, the instructions are based on the following links:
- https://docs.openstack.org/zun/latest/admin/clear-containers.html
- ../install/ubuntu-installation-guide.md
## Install Git to use with DevStack
```sh
@@ -51,7 +54,7 @@ $ zun delete test
## Install Kata Containers
Follow [these instructions](../install/README.md)
Follow [these instructions](../install/ubuntu-installation-guide.md)
to install the Kata Containers components.
## Update Docker with new Kata Containers runtime

View File

@@ -21,7 +21,12 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[
];
// XXX: 'writer' param used to make testing possible.
pub fn create_logger<W>(name: &str, source: &str, level: slog::Level, writer: W) -> slog::Logger
pub fn create_logger<W>(
name: &str,
source: &str,
level: slog::Level,
writer: W,
) -> (slog::Logger, slog_async::AsyncGuard)
where
W: Write + Send + Sync + 'static,
{
@@ -37,17 +42,21 @@ where
let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();
// Ensure the logger is thread-safe
let async_drain = slog_async::Async::new(filter_drain).build().fuse();
let (async_drain, guard) = slog_async::Async::new(filter_drain)
.thread_name("slog-async-logger".into())
.build_with_guard();
// Add some "standard" fields
slog::Logger::root(
let logger = slog::Logger::root(
async_drain.fuse(),
o!("version" => env!("CARGO_PKG_VERSION"),
"subsystem" => "root",
"pid" => process::id().to_string(),
"name" => name.to_string(),
"source" => source.to_string()),
)
);
(logger, guard)
}
pub fn get_log_levels() -> Vec<&'static str> {

View File

@@ -228,6 +228,7 @@ parts:
- libffi-dev
- libmount-dev
- libselinux1-dev
- ninja-build
override-build: |
yq=${SNAPCRAFT_STAGE}/yq
export GOPATH=${SNAPCRAFT_STAGE}/gopath
@@ -244,10 +245,11 @@ parts:
;;
*)
branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.tag)"
branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)"
url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)"
commit=""
patches_dir="${kata_dir}/tools/packaging/qemu/patches/$(echo ${branch} | sed -e 's/.[[:digit:]]*$//' -e 's/^v//').x"
patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}"
;;
esac
@@ -260,31 +262,23 @@ parts:
[ -n "$(ls -A ui/keycodemapdb)" ] || git clone https://github.com/qemu/keycodemapdb ui/keycodemapdb/
[ -n "$(ls -A capstone)" ] || git clone https://github.com/qemu/capstone capstone
# Apply patches
for patch in ${patches_dir}/*.patch; do
echo "Applying $(basename "$patch") ..."
patch \
--batch \
--forward \
--strip 1 \
--input "$patch"
done
# Apply branch patches
${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_dir}"
${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"
# Only x86_64 supports libpmem
[ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev libseccomp-dev
configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
chmod +x ${configure_hypervisor}
# static build
echo "$(${configure_hypervisor} -s qemu) \
--disable-rbd
--prefix=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr \
--datadir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share \
--libexecdir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/libexec/qemu" \
# static build. The --prefix, --libdir, --libexecdir, --datadir arguments are
# based on PREFIX and set by configure-hypervisor.sh
echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \
--disable-rbd " \
| xargs ./configure
# Copy QEMU configurations (Kconfigs)
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/
cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/
# build and install
make -j $(($(nproc)-1))
@@ -295,7 +289,6 @@ parts:
- -usr/bin/qemu-pr-helper
- -usr/bin/virtfs-proxy-helper
- -usr/include/
- -usr/libexec/
- -usr/share/applications/
- -usr/share/icons/
- -usr/var/
@@ -307,4 +300,8 @@ parts:
apps:
runtime:
command: usr/bin/kata-runtime
shim:
command: usr/bin/containerd-shim-kata-v2
collect-data:
command: usr/bin/kata-collect-data.sh

842
src/agent/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,35 +9,50 @@ oci = { path = "oci" }
logging = { path = "../../pkg/logging" }
rustjail = { path = "rustjail" }
protocols = { path = "protocols" }
netlink = { path = "netlink", features = ["with-log", "with-agent-handler"] }
lazy_static = "1.3.0"
ttrpc = "0.3.0"
ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
protobuf = "=2.14.0"
libc = "0.2.58"
nix = "0.17.0"
prctl = "1.0.0"
serde_json = "1.0.39"
signal-hook = "0.1.9"
scan_fmt = "0.2.3"
scopeguard = "1.0.0"
regex = "1"
async-trait = "0.1.42"
tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
futures = "0.3.12"
netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
tokio-vsock = "0.3.0"
rtnetlink = "0.7.0"
netlink-packet-utils = "0.4.0"
ipnetwork = "0.17.0"
# slog:
# - Dynamic keys required to allow HashMap keys to be slog::Serialized.
# - The 'max_*' features allow changing the log level at runtime
# (by stopping the compiler from removing log calls).
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
slog-scope = "4.1.2"
# Redirect ttrpc log calls
slog-stdlog = "4.0.0"
log = "0.4.11"
# for testing
tempfile = "3.1.0"
prometheus = { version = "0.9.0", features = ["process"] }
procfs = "0.7.9"
anyhow = "1.0.32"
cgroups = { git = "https://github.com/kata-containers/cgroups-rs", branch = "stable-0.1.1"}
cgroups = { package = "cgroups-rs", version = "0.2.2" }
[workspace]
members = [
"netlink",
"oci",
"protocols",
"rustjail",
]
[profile.release]
lto = true

View File

@@ -21,7 +21,6 @@ SOURCES := \
VERSION_FILE := ./VERSION
VERSION := $(shell grep -v ^\# $(VERSION_FILE))
COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true)
COMMIT_NO_SHORT := $(shell git rev-parse --short HEAD 2>/dev/null || true)
COMMIT := $(if $(shell git status --porcelain --untracked-files=no 2>/dev/null || true),${COMMIT_NO}-dirty,${COMMIT_NO})
COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)
@@ -133,6 +132,7 @@ $(GENERATED_FILES): %: %.in
optimize: $(SOURCES) | show-summary show-header
@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)
##TARGET clippy: run clippy linter
clippy: $(GENERATED_CODE)
cargo clippy --all-targets --all-features --release \

View File

@@ -39,11 +39,27 @@ After that, we drafted the initial code here, and any contributions are welcome.
## Getting Started
### Build from Source
The rust-agent need to be built with rust newer than 1.37, and static linked with `musl`.
The rust-agent needs to be built statically and linked with `musl`
> **Note:** skip this step for ppc64le, the build scripts explicitly use gnu for ppc64le.
```bash
rustup target add x86_64-unknown-linux-musl
sudo ln -s /usr/bin/g++ /bin/musl-g++
cargo build --target x86_64-unknown-linux-musl --release
$ arch=$(uname -m)
$ rustup target add "${arch}-unknown-linux-musl"
$ sudo ln -s /usr/bin/g++ /bin/musl-g++
```
ppc64le-only: Manually install `protoc`, e.g.
```bash
$ sudo dnf install protobuf-compiler
```
Download the source files in the Kata containers repository and build the agent:
```bash
$ GOPATH="${GOPATH:-$HOME/go}"
$ dir="$GOPATH/src/github.com/kata-containers"
$ git -C ${dir} clone --depth 1 https://github.com/kata-containers/kata-containers
$ make -C ${dir}/kata-containers/src/agent
```
## Run Kata CI with rust-agent

View File

@@ -1 +0,0 @@
../../VERSION

1
src/agent/VERSION Normal file
View File

@@ -0,0 +1 @@
2.0.0

View File

@@ -20,3 +20,5 @@ LimitNOFILE=infinity
# the runtime handles shutting down the VM.
ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff
FailureAction=poweroff
# Discourage OOM-killer from touching the agent
OOMScoreAdjust=-997

View File

@@ -1,20 +0,0 @@
[package]
name = "netlink"
version = "0.1.0"
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
libc = "0.2.58"
nix = "0.17.0"
protobuf = { version = "=2.14.0", optional = true }
protocols = { path = "../protocols", optional = true }
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"], optional = true }
slog-scope = { version = "4.1.2", optional = true }
[features]
with-log = ["slog", "slog-scope"]
with-agent-handler = ["protobuf", "protocols"]

View File

@@ -1,572 +0,0 @@
// Copyright (c) 2020 Ant Financial
// Copyright (C) 2020 Alibaba Cloud. All rights reserved.
//
// SPDX-License-Identifier: Apache-2.0
//
//! Dedicated Netlink interfaces for Kata agent protocol handler.
use std::convert::TryFrom;
use protobuf::RepeatedField;
use protocols::types::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
use super::*;
#[cfg(feature = "with-log")]
// Convenience macro to obtain the scope logger
macro_rules! sl {
() => {
slog_scope::logger().new(o!("subsystem" => "netlink"))
};
}
impl super::RtnlHandle {
pub fn update_interface(&mut self, iface: &Interface) -> Result<Interface> {
// the reliable way to find link is using hardware address
// as filter. However, hardware filter might not be supported
// by netlink, we may have to dump link list and the find the
// target link. filter using name or family is supported, but
// we cannot use that to find target link.
// let's try if hardware address filter works. -_-
let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
// bring down interface if it is up
if ifinfo.ifi_flags & libc::IFF_UP as u32 != 0 {
self.set_link_status(&ifinfo, false)?;
}
// delete all addresses associated with the link
let del_addrs: Vec<RtIPAddr> = self.get_link_addresses(&ifinfo)?;
self.delete_all_addrs(&ifinfo, del_addrs.as_ref())?;
// add new ip addresses in request
for grpc_addr in &iface.IPAddresses {
let rtip = RtIPAddr::try_from(grpc_addr.clone())?;
self.add_one_address(&ifinfo, &rtip)?;
}
let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
// Safe because we have allocated enough buffer space.
let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
// set name, set mtu, IFF_NOARP. in one rtnl_talk.
nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>() as u32) as __u32;
nlh.nlmsg_type = RTM_NEWLINK;
nlh.nlmsg_flags = NLM_F_REQUEST;
self.assign_seqnum(nlh);
ifi.ifi_family = ifinfo.ifi_family;
ifi.ifi_type = ifinfo.ifi_type;
ifi.ifi_index = ifinfo.ifi_index;
if iface.raw_flags & libc::IFF_NOARP as u32 != 0 {
ifi.ifi_change |= libc::IFF_NOARP as u32;
ifi.ifi_flags |= libc::IFF_NOARP as u32;
}
// Safe because we have allocated enough buffer space.
unsafe {
nlh.addattr32(IFLA_MTU, iface.mtu as u32);
// if str is null terminated, use addattr_var.
// otherwise, use addattr_str
nlh.addattr_var(IFLA_IFNAME, iface.name.as_ref());
}
self.rtnl_talk(v.as_mut_slice(), false)?;
// TODO: why the result is ignored here?
let _ = self.set_link_status(&ifinfo, true);
Ok(iface.clone())
}
/// Delete this interface/link per request
pub fn remove_interface(&mut self, iface: &Interface) -> Result<Interface> {
let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
self.set_link_status(&ifinfo, false)?;
let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
// Safe because we have allocated enough buffer space.
let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
// No attributes needed?
nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>()) as __u32;
nlh.nlmsg_type = RTM_DELLINK;
nlh.nlmsg_flags = NLM_F_REQUEST;
self.assign_seqnum(nlh);
ifi.ifi_family = ifinfo.ifi_family;
ifi.ifi_index = ifinfo.ifi_index;
ifi.ifi_type = ifinfo.ifi_type;
self.rtnl_talk(v.as_mut_slice(), false)?;
Ok(iface.clone())
}
pub fn list_interfaces(&mut self) -> Result<Vec<Interface>> {
let mut ifaces: Vec<Interface> = Vec::new();
let (_slv, lv) = self.dump_all_links()?;
let (_sav, av) = self.dump_all_addresses(0)?;
for link in &lv {
// Safe because dump_all_links() returns valid pointers.
let nlh = unsafe { &**link };
if nlh.nlmsg_type != RTM_NEWLINK && nlh.nlmsg_type != RTM_DELLINK {
continue;
}
if nlh.nlmsg_len < NLMSG_SPACE!(mem::size_of::<ifinfomsg>()) {
info!(
sl!(),
"invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}",
nlh.nlmsg_len,
NLMSG_SPACE!(mem::size_of::<ifinfomsg>())
);
break;
}
// Safe because we have just validated available buffer space above.
let ifi = unsafe { &*(NLMSG_DATA!(nlh) as *const ifinfomsg) };
let rta: *mut rtattr = IFLA_RTA!(ifi as *const ifinfomsg) as *mut rtattr;
let rtalen = IFLA_PAYLOAD!(nlh) as u32;
let attrs = unsafe { parse_attrs(rta, rtalen, (IFLA_MAX + 1) as usize)? };
// fill out some fields of Interface,
let mut iface: Interface = Interface::default();
// Safe because parse_attrs() returns valid pointers.
unsafe {
if !attrs[IFLA_IFNAME as usize].is_null() {
let t = attrs[IFLA_IFNAME as usize];
iface.name = String::from_utf8(getattr_var(t as *const rtattr))?;
}
if !attrs[IFLA_MTU as usize].is_null() {
let t = attrs[IFLA_MTU as usize];
iface.mtu = getattr32(t) as u64;
}
if !attrs[IFLA_ADDRESS as usize].is_null() {
let alen = RTA_PAYLOAD!(attrs[IFLA_ADDRESS as usize]);
let a: *const u8 = RTA_DATA!(attrs[IFLA_ADDRESS as usize]) as *const u8;
iface.hwAddr = parser::format_address(a, alen as u32)?;
}
}
// get ip address info from av
let mut ads: Vec<IPAddress> = Vec::new();
for address in &av {
// Safe because dump_all_addresses() returns valid pointers.
let alh = unsafe { &**address };
if alh.nlmsg_type != RTM_NEWADDR {
continue;
}
let tlen = NLMSG_SPACE!(mem::size_of::<ifaddrmsg>());
if alh.nlmsg_len < tlen {
info!(
sl!(),
"invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}", alh.nlmsg_len, tlen
);
break;
}
// Safe becahse we have checked avialable buffer space by NLMSG_SPACE above.
let ifa = unsafe { &*(NLMSG_DATA!(alh) as *const ifaddrmsg) };
let arta: *mut rtattr = IFA_RTA!(ifa) as *mut rtattr;
let artalen = IFA_PAYLOAD!(alh) as u32;
if ifa.ifa_index as u32 == ifi.ifi_index as u32 {
// found target addresses, parse attributes and fill out Interface
let addrs = unsafe { parse_attrs(arta, artalen, (IFA_MAX + 1) as usize)? };
// fill address field of Interface
let mut one: IPAddress = IPAddress::default();
let tattr: *const rtattr = if !addrs[IFA_ADDRESS as usize].is_null() {
addrs[IFA_ADDRESS as usize]
} else {
addrs[IFA_LOCAL as usize]
};
one.mask = format!("{}", ifa.ifa_prefixlen);
one.family = IPFamily::v4;
if ifa.ifa_family == libc::AF_INET6 as u8 {
one.family = IPFamily::v6;
}
// Safe because parse_attrs() returns valid pointers.
unsafe {
let a: *const u8 = RTA_DATA!(tattr) as *const u8;
let alen = RTA_PAYLOAD!(tattr);
one.address = parser::format_address(a, alen as u32)?;
}
ads.push(one);
}
}
iface.IPAddresses = RepeatedField::from_vec(ads);
ifaces.push(iface);
}
Ok(ifaces)
}
pub fn update_routes(&mut self, rt: &[Route]) -> Result<Vec<Route>> {
let rs = self.get_all_routes()?;
self.delete_all_routes(&rs)?;
for grpcroute in rt {
if grpcroute.gateway.as_str() == "" {
let r = RtRoute::try_from(grpcroute.clone())?;
if r.index == -1 {
continue;
}
self.add_one_route(&r)?;
}
}
for grpcroute in rt {
if grpcroute.gateway.as_str() != "" {
let r = RtRoute::try_from(grpcroute.clone())?;
if r.index == -1 {
continue;
}
self.add_one_route(&r)?;
}
}
Ok(rt.to_owned())
}
pub fn list_routes(&mut self) -> Result<Vec<Route>> {
// currently, only dump routes from main table for ipv4
// ie, rtmsg.rtmsg_family = AF_INET, set RT_TABLE_MAIN
// attribute in dump request
// Fix Me: think about othe tables, ipv6..
let mut rs: Vec<Route> = Vec::new();
let (_srv, rv) = self.dump_all_routes()?;
// parse out routes and store in rs
for r in &rv {
// Safe because dump_all_routes() returns valid pointers.
let nlh = unsafe { &**r };
if nlh.nlmsg_type != RTM_NEWROUTE && nlh.nlmsg_type != RTM_DELROUTE {
info!(sl!(), "not route message!");
continue;
}
let tlen = NLMSG_SPACE!(mem::size_of::<rtmsg>());
if nlh.nlmsg_len < tlen {
info!(
sl!(),
"invalid nlmsg! nlmsg_len: {}, nlmsg_spae: {}", nlh.nlmsg_len, tlen
);
break;
}
// Safe because we have just validated available buffer space above.
let rtm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut rtmsg) };
if rtm.rtm_table != RT_TABLE_MAIN as u8 {
continue;
}
let rta: *mut rtattr = RTM_RTA!(rtm) as *mut rtattr;
let rtalen = RTM_PAYLOAD!(nlh) as u32;
let attrs = unsafe { parse_attrs(rta, rtalen, (RTA_MAX + 1) as usize)? };
let t = attrs[RTA_TABLE as usize];
if !t.is_null() {
// Safe because parse_attrs() returns valid pointers
let table = unsafe { getattr32(t) };
if table != RT_TABLE_MAIN {
continue;
}
}
// find source, destination, gateway, scope, and and device name
let mut t = attrs[RTA_DST as usize];
let mut rte: Route = Route::default();
// Safe because parse_attrs() returns valid pointers
unsafe {
// destination
if !t.is_null() {
let data: *const u8 = RTA_DATA!(t) as *const u8;
let len = RTA_PAYLOAD!(t) as u32;
rte.dest =
format!("{}/{}", parser::format_address(data, len)?, rtm.rtm_dst_len);
}
// gateway
t = attrs[RTA_GATEWAY as usize];
if !t.is_null() {
let data: *const u8 = RTA_DATA!(t) as *const u8;
let len = RTA_PAYLOAD!(t) as u32;
rte.gateway = parser::format_address(data, len)?;
// for gateway, destination is 0.0.0.0
rte.dest = "0.0.0.0".to_string();
}
// source
t = attrs[RTA_SRC as usize];
if t.is_null() {
t = attrs[RTA_PREFSRC as usize];
}
if !t.is_null() {
let data: *const u8 = RTA_DATA!(t) as *const u8;
let len = RTA_PAYLOAD!(t) as u32;
rte.source = parser::format_address(data, len)?;
if rtm.rtm_src_len != 0 {
rte.source = format!("{}/{}", rte.source.as_str(), rtm.rtm_src_len);
}
}
// scope
rte.scope = rtm.rtm_scope as u32;
// oif
t = attrs[RTA_OIF as usize];
if !t.is_null() {
let data = &*(RTA_DATA!(t) as *const i32);
assert_eq!(RTA_PAYLOAD!(t), 4);
rte.device = self
.get_name_by_index(*data)
.unwrap_or_else(|_| "unknown".to_string());
}
}
rs.push(rte);
}
Ok(rs)
}
pub fn add_arp_neighbors(&mut self, neighs: &[ARPNeighbor]) -> Result<()> {
for neigh in neighs {
self.add_one_arp_neighbor(&neigh)?;
}
Ok(())
}
pub fn add_one_arp_neighbor(&mut self, neigh: &ARPNeighbor) -> Result<()> {
let to_ip = match neigh.toIPAddress.as_ref() {
None => return nix_errno(Errno::EINVAL),
Some(v) => {
if v.address.is_empty() {
return nix_errno(Errno::EINVAL);
}
v.address.as_ref()
}
};
let dev = self.find_link_by_name(&neigh.device)?;
let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
// Safe because we have allocated enough buffer space.
let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
let ndm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ndmsg) };
nlh.nlmsg_len = NLMSG_LENGTH!(std::mem::size_of::<ndmsg>()) as u32;
nlh.nlmsg_type = RTM_NEWNEIGH;
nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
self.assign_seqnum(nlh);
ndm.ndm_family = libc::AF_UNSPEC as __u8;
ndm.ndm_state = IFA_F_PERMANENT as __u16;
// process lladdr
if neigh.lladdr != "" {
let llabuf = parser::parse_mac_addr(&neigh.lladdr)?;
// Safe because we have allocated enough buffer space.
unsafe { nlh.addattr_var(NDA_LLADDR, llabuf.as_ref()) };
}
let (family, ip_data) = parser::parse_ip_addr_with_family(&to_ip)?;
ndm.ndm_family = family;
// Safe because we have allocated enough buffer space.
unsafe { nlh.addattr_var(NDA_DST, ip_data.as_ref()) };
// process state
if neigh.state != 0 {
ndm.ndm_state = neigh.state as __u16;
}
// process flags
ndm.ndm_flags = (*ndm).ndm_flags | neigh.flags as __u8;
// process dev
ndm.ndm_ifindex = dev.ifi_index;
// send
self.rtnl_talk(v.as_mut_slice(), false)?;
Ok(())
}
}
impl TryFrom<IPAddress> for RtIPAddr {
type Error = nix::Error;
fn try_from(ipi: IPAddress) -> std::result::Result<Self, Self::Error> {
let ip_family = if ipi.family == IPFamily::v4 {
libc::AF_INET
} else {
libc::AF_INET6
} as __u8;
let ip_mask = parser::parse_u8(ipi.mask.as_str(), 10)?;
let addr = parser::parse_ip_addr(ipi.address.as_ref())?;
Ok(Self {
ip_family,
ip_mask,
addr,
})
}
}
impl TryFrom<Route> for RtRoute {
type Error = nix::Error;
fn try_from(r: Route) -> std::result::Result<Self, Self::Error> {
// only handle ipv4
let index = {
let mut rh = RtnlHandle::new(NETLINK_ROUTE, 0)?;
match rh.find_link_by_name(r.device.as_str()) {
Ok(ifi) => ifi.ifi_index,
Err(_) => -1,
}
};
let (dest, dst_len) = if r.dest.is_empty() {
(Some(vec![0 as u8; 4]), 0)
} else {
let (dst, mask) = parser::parse_cidr(r.dest.as_str())?;
(Some(dst), mask)
};
let (source, src_len) = if r.source.is_empty() {
(None, 0)
} else {
let (src, mask) = parser::parse_cidr(r.source.as_str())?;
(Some(src), mask)
};
let gateway = if r.gateway.is_empty() {
None
} else {
Some(parser::parse_ip_addr(r.gateway.as_str())?)
};
Ok(Self {
dest,
source,
src_len,
dst_len,
index,
gateway,
scope: r.scope as u8,
protocol: RTPROTO_UNSPEC,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{RtnlHandle, NETLINK_ROUTE};
use protocols::types::IPAddress;
use std::process::Command;
fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
// ip link delete dummy
Command::new("ip")
.args(&["link", "delete", dummy_name])
.output()
.expect("prepare: failed to delete dummy");
// ip neigh del dev dummy ip
Command::new("ip")
.args(&["neigh", "del", dummy_name, ip])
.output()
.expect("prepare: failed to delete neigh");
}
fn prepare_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
clean_env_for_test_add_one_arp_neighbor(dummy_name, ip);
// modprobe dummy
Command::new("modprobe")
.arg("dummy")
.output()
.expect("failed to run modprobe dummy");
// ip link add dummy type dummy
Command::new("ip")
.args(&["link", "add", dummy_name, "type", "dummy"])
.output()
.expect("failed to add dummy interface");
// ip addr add 192.168.0.2/16 dev dummy
Command::new("ip")
.args(&["addr", "add", "192.168.0.2/16", "dev", dummy_name])
.output()
.expect("failed to add ip for dummy");
// ip link set dummy up;
Command::new("ip")
.args(&["link", "set", dummy_name, "up"])
.output()
.expect("failed to up dummy");
}
#[test]
fn test_add_one_arp_neighbor() {
// skip_if_not_root
if !nix::unistd::Uid::effective().is_root() {
println!("INFO: skipping {} which needs root", module_path!());
return;
}
let mac = "6a:92:3a:59:70:aa";
let to_ip = "169.254.1.1";
let dummy_name = "dummy_for_arp";
prepare_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
let mut ip_address = IPAddress::new();
ip_address.set_address(to_ip.to_string());
let mut neigh = ARPNeighbor::new();
neigh.set_toIPAddress(ip_address);
neigh.set_device(dummy_name.to_string());
neigh.set_lladdr(mac.to_string());
neigh.set_state(0x80);
let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
rtnl.add_one_arp_neighbor(&neigh).unwrap();
// ip neigh show dev dummy ip
let stdout = Command::new("ip")
.args(&["neigh", "show", "dev", dummy_name, to_ip])
.output()
.expect("failed to show neigh")
.stdout;
let stdout = std::str::from_utf8(&stdout).expect("failed to conveert stdout");
assert_eq!(stdout, format!("{} lladdr {} PERMANENT\n", to_ip, mac));
clean_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,201 +0,0 @@
// Copyright (c) 2019 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//! Parser for IPv4/IPv6/MAC addresses.
use std::net::{Ipv4Addr, Ipv6Addr};
use std::str::FromStr;
use super::{Errno, Result, __u8, nix_errno};
#[inline]
pub(crate) fn parse_u8(s: &str, radix: u32) -> Result<u8> {
if radix >= 2 && radix <= 36 {
u8::from_str_radix(s, radix).map_err(|_| nix::Error::Sys(Errno::EINVAL))
} else {
u8::from_str(s).map_err(|_| nix::Error::Sys(Errno::EINVAL))
}
}
pub fn parse_ipv4_addr(s: &str) -> Result<Vec<u8>> {
match Ipv4Addr::from_str(s) {
Ok(v) => Ok(Vec::from(v.octets().as_ref())),
Err(_e) => nix_errno(Errno::EINVAL),
}
}
pub fn parse_ip_addr(s: &str) -> Result<Vec<u8>> {
if let Ok(v6) = Ipv6Addr::from_str(s) {
Ok(Vec::from(v6.octets().as_ref()))
} else {
parse_ipv4_addr(s)
}
}
pub fn parse_ip_addr_with_family(ip_address: &str) -> Result<(__u8, Vec<u8>)> {
if let Ok(v6) = Ipv6Addr::from_str(ip_address) {
Ok((libc::AF_INET6 as __u8, Vec::from(v6.octets().as_ref())))
} else {
parse_ipv4_addr(ip_address).map(|v| (libc::AF_INET as __u8, v))
}
}
pub fn parse_ipv4_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
let fields: Vec<&str> = s.split('/').collect();
if fields.len() != 2 {
nix_errno(Errno::EINVAL)
} else {
Ok((parse_ipv4_addr(fields[0])?, parse_u8(fields[1], 10)?))
}
}
pub fn parse_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
let fields: Vec<&str> = s.split('/').collect();
if fields.len() != 2 {
nix_errno(Errno::EINVAL)
} else {
Ok((parse_ip_addr(fields[0])?, parse_u8(fields[1], 10)?))
}
}
pub fn parse_mac_addr(hwaddr: &str) -> Result<Vec<u8>> {
let fields: Vec<&str> = hwaddr.split(':').collect();
if fields.len() != 6 {
nix_errno(Errno::EINVAL)
} else {
Ok(vec![
parse_u8(fields[0], 16)?,
parse_u8(fields[1], 16)?,
parse_u8(fields[2], 16)?,
parse_u8(fields[3], 16)?,
parse_u8(fields[4], 16)?,
parse_u8(fields[5], 16)?,
])
}
}
/// Format an IPv4/IPv6/MAC address.
///
/// # Safety
/// Caller needs to ensure that addr and len are valid.
pub unsafe fn format_address(addr: *const u8, len: u32) -> Result<String> {
let mut a: String;
if len == 4 {
// ipv4
let mut i = 1;
let mut p = addr as i64;
a = format!("{}", *(p as *const u8));
while i < len {
p += 1;
i += 1;
a.push_str(format!(".{}", *(p as *const u8)).as_str());
}
return Ok(a);
}
if len == 6 {
// hwaddr
let mut i = 1;
let mut p = addr as i64;
a = format!("{:0>2X}", *(p as *const u8));
while i < len {
p += 1;
i += 1;
a.push_str(format!(":{:0>2X}", *(p as *const u8)).as_str());
}
return Ok(a);
}
if len == 16 {
// ipv6
let p = addr as *const u8 as *const libc::c_void;
let mut ar: [u8; 16] = [0; 16];
let mut v: Vec<u8> = vec![0; 16];
let dp: *mut libc::c_void = v.as_mut_ptr() as *mut libc::c_void;
libc::memcpy(dp, p, 16);
ar.copy_from_slice(v.as_slice());
return Ok(Ipv6Addr::from(ar).to_string());
}
nix_errno(Errno::EINVAL)
}
#[cfg(test)]
mod tests {
use super::*;
use libc;
#[test]
fn test_ip_addr() {
let ip = parse_ipv4_addr("1.2.3.4").unwrap();
assert_eq!(ip, vec![0x1u8, 0x2u8, 0x3u8, 0x4u8]);
parse_ipv4_addr("1.2.3.4.5").unwrap_err();
parse_ipv4_addr("1.2.3-4").unwrap_err();
parse_ipv4_addr("1.2.3.a").unwrap_err();
parse_ipv4_addr("1.2.3.x").unwrap_err();
parse_ipv4_addr("-1.2.3.4").unwrap_err();
parse_ipv4_addr("+1.2.3.4").unwrap_err();
let (family, _) = parse_ip_addr_with_family("192.168.1.1").unwrap();
assert_eq!(family, libc::AF_INET as __u8);
let (family, ip) =
parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap();
assert_eq!(family, libc::AF_INET6 as __u8);
assert_eq!(ip.len(), 16);
parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:73345").unwrap_err();
let ip = parse_ip_addr("::1").unwrap();
assert_eq!(ip[0], 0x0);
assert_eq!(ip[15], 0x1);
}
#[test]
fn test_parse_cidr() {
let (_, mask) = parse_ipv4_cidr("1.2.3.4/31").unwrap();
assert_eq!(mask, 31);
parse_ipv4_cidr("1.2.3/4/31").unwrap_err();
parse_ipv4_cidr("1.2.3.4/f").unwrap_err();
parse_ipv4_cidr("1.2.3/8").unwrap_err();
parse_ipv4_cidr("1.2.3.4.8").unwrap_err();
let (ip, mask) = parse_cidr("2001:db8:a::123/64").unwrap();
assert_eq!(mask, 64);
assert_eq!(ip[0], 0x20);
assert_eq!(ip[15], 0x23);
}
#[test]
fn test_parse_mac_addr() {
let mac = parse_mac_addr("FF:FF:FF:FF:FF:FE").unwrap();
assert_eq!(mac.len(), 6);
assert_eq!(mac[0], 0xff);
assert_eq!(mac[5], 0xfe);
parse_mac_addr("FF:FF:FF:FF:FF:FE:A0").unwrap_err();
parse_mac_addr("FF:FF:FF:FF:FF:FX").unwrap_err();
parse_mac_addr("FF:FF:FF:FF:FF").unwrap_err();
}
#[test]
fn test_format_address() {
let buf = [1u8, 2u8, 3u8, 4u8];
let addr = unsafe { format_address(&buf as *const u8, 4).unwrap() };
assert_eq!(addr, "1.2.3.4");
let buf = [1u8, 2u8, 3u8, 4u8, 5u8, 6u8];
let addr = unsafe { format_address(&buf as *const u8, 6).unwrap() };
assert_eq!(addr, "01:02:03:04:05:06");
}
}

View File

@@ -5,9 +5,9 @@ authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
edition = "2018"
[dependencies]
ttrpc = "0.3.0"
ttrpc = { version = "0.5.0", features = ["async"] }
async-trait = "0.1.42"
protobuf = "=2.14.0"
futures = "0.1.27"
[build-dependencies]
ttrpc-codegen = "0.1.2"
ttrpc-codegen = "0.2.0"

View File

@@ -3,8 +3,8 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::fs::File;
use std::io::{Read, Write};
use std::fs;
use ttrpc_codegen::{Codegen, Customize};
fn main() {
let protos = vec![
@@ -15,16 +15,15 @@ fn main() {
"protos/oci.proto",
];
// Tell Cargo that if the .proto files changed, to rerun this build script.
protos
.iter()
.for_each(|p| println!("cargo:rerun-if-changed={}", &p));
ttrpc_codegen::Codegen::new()
Codegen::new()
.out_dir("src")
.inputs(&protos)
.include("protos")
.rust_protobuf()
.customize(Customize {
async_server: true,
..Default::default()
})
.run()
.expect("Gen codes failed.");
@@ -40,15 +39,6 @@ fn main() {
}
fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> {
let mut src = File::open(file_name)?;
let mut contents = String::new();
src.read_to_string(&mut contents).unwrap();
drop(src);
let new_contents = contents.replace(from, to);
let mut dst = File::create(&file_name)?;
dst.write_all(new_contents.as_bytes())?;
Ok(())
let new_contents = fs::read_to_string(file_name)?.replace(from, to);
fs::write(&file_name, new_contents.as_bytes())
}

View File

@@ -47,7 +47,7 @@ show_usage() {
}
generate_go_sources() {
local cmd="protoc -I$GOPATH/src/github.com/kata-containers/agent/vendor/github.com/gogo/protobuf:$GOPATH/src/github.com/kata-containers/agent/vendor:$GOPATH/src/github.com/gogo/protobuf:$GOPATH/src/github.com/gogo/googleapis:$GOPATH/src:$GOPATH/src/github.com/kata-containers/kata-containers/src/agent/protocols/protos \
local cmd="protoc -I$GOPATH/src:$GOPATH/src/github.com/kata-containers/kata-containers/src/agent/protocols/protos \
--gogottrpc_out=plugins=ttrpc+fieldpath,\
import_path=github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc,\
\
@@ -80,12 +80,6 @@ fi;
which protoc
[ $? -eq 0 ] || die "Please install protoc from github.com/protocolbuffers/protobuf"
which protoc-gen-rust
[ $? -eq 0 ] || die "Please install protobuf-codegen from github.com/pingcap/grpc-rs"
which ttrpc_rust_plugin
[ $? -eq 0 ] || die "Please install ttrpc_rust_plugin from https://github.com/containerd/ttrpc-rust"
which protoc-gen-gogottrpc
[ $? -eq 0 ] || die "Please install protoc-gen-gogottrpc from https://github.com/containerd/ttrpc"

View File

@@ -12,7 +12,6 @@ option go_package = "github.com/kata-containers/kata-containers/src/runtime/virt
package grpc;
import "gogo/protobuf/gogoproto/gogo.proto";
import "google/protobuf/wrappers.proto";
option (gogoproto.equal_all) = true;
option (gogoproto.populate_all) = true;

View File

@@ -29,10 +29,8 @@ message Interface {
uint64 mtu = 4;
string hwAddr = 5;
// pciAddr is the PCI address in the format "bridgeAddr/deviceAddr".
// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
// while deviceAddr is the address at which the network device is attached on the bridge.
string pciAddr = 6;
// PCI path for the device (see the pci::Path (Rust) or types.PciPath (Go) type for format details)
string pciPath = 6;
// Type defines the type of interface described by this structure.
// The expected values are the one that are defined by the netlink

View File

@@ -10,22 +10,27 @@ serde_json = "1.0.39"
serde_derive = "1.0.91"
oci = { path = "../oci" }
protocols = { path ="../protocols" }
caps = "0.3.0"
caps = "0.5.0"
nix = "0.17.0"
scopeguard = "1.0.0"
prctl = "1.0.0"
lazy_static = "1.3.0"
libc = "0.2.58"
protobuf = "2.8.1"
protobuf = "=2.14.0"
slog = "2.5.2"
slog-scope = "4.1.2"
scan_fmt = "0.2"
regex = "1.1"
path-absolutize = "1.2.0"
dirs = "3.0.1"
anyhow = "1.0.32"
cgroups = { git = "https://github.com/kata-containers/cgroups-rs", branch = "stable-0.1.1"}
cgroups = { package = "cgroups-rs", version = "0.2.1" }
tempfile = "3.1.0"
rlimit = "0.5.3"
tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
futures = "0.3"
async-trait = "0.1.31"
inotify = "0.9.2"
[dev-dependencies]
serial_test = "0.5.0"

View File

@@ -9,97 +9,44 @@
use crate::log_child;
use crate::sync::write_count;
use anyhow::{anyhow, Result};
use caps::{self, CapSet, Capability, CapsHashSet};
use caps::{self, runtime, CapSet, Capability, CapsHashSet};
use oci::LinuxCapabilities;
use std::collections::HashMap;
use std::os::unix::io::RawFd;
lazy_static! {
pub static ref CAPSMAP: HashMap<String, Capability> = {
let mut m = HashMap::new();
m.insert("CAP_CHOWN".to_string(), Capability::CAP_CHOWN);
m.insert("CAP_DAC_OVERRIDE".to_string(), Capability::CAP_DAC_OVERRIDE);
m.insert(
"CAP_DAC_READ_SEARCH".to_string(),
Capability::CAP_DAC_READ_SEARCH,
);
m.insert("CAP_FOWNER".to_string(), Capability::CAP_FOWNER);
m.insert("CAP_FSETID".to_string(), Capability::CAP_FSETID);
m.insert("CAP_KILL".to_string(), Capability::CAP_KILL);
m.insert("CAP_SETGID".to_string(), Capability::CAP_SETGID);
m.insert("CAP_SETUID".to_string(), Capability::CAP_SETUID);
m.insert("CAP_SETPCAP".to_string(), Capability::CAP_SETPCAP);
m.insert(
"CAP_LINUX_IMMUTABLE".to_string(),
Capability::CAP_LINUX_IMMUTABLE,
);
m.insert(
"CAP_NET_BIND_SERVICE".to_string(),
Capability::CAP_NET_BIND_SERVICE,
);
m.insert(
"CAP_NET_BROADCAST".to_string(),
Capability::CAP_NET_BROADCAST,
);
m.insert("CAP_NET_ADMIN".to_string(), Capability::CAP_NET_ADMIN);
m.insert("CAP_NET_RAW".to_string(), Capability::CAP_NET_RAW);
m.insert("CAP_IPC_LOCK".to_string(), Capability::CAP_IPC_LOCK);
m.insert("CAP_IPC_OWNER".to_string(), Capability::CAP_IPC_OWNER);
m.insert("CAP_SYS_MODULE".to_string(), Capability::CAP_SYS_MODULE);
m.insert("CAP_SYS_RAWIO".to_string(), Capability::CAP_SYS_RAWIO);
m.insert("CAP_SYS_CHROOT".to_string(), Capability::CAP_SYS_CHROOT);
m.insert("CAP_SYS_PTRACE".to_string(), Capability::CAP_SYS_PTRACE);
m.insert("CAP_SYS_PACCT".to_string(), Capability::CAP_SYS_PACCT);
m.insert("CAP_SYS_ADMIN".to_string(), Capability::CAP_SYS_ADMIN);
m.insert("CAP_SYS_BOOT".to_string(), Capability::CAP_SYS_BOOT);
m.insert("CAP_SYS_NICE".to_string(), Capability::CAP_SYS_NICE);
m.insert("CAP_SYS_RESOURCE".to_string(), Capability::CAP_SYS_RESOURCE);
m.insert("CAP_SYS_TIME".to_string(), Capability::CAP_SYS_TIME);
m.insert(
"CAP_SYS_TTY_CONFIG".to_string(),
Capability::CAP_SYS_TTY_CONFIG,
);
m.insert("CAP_MKNOD".to_string(), Capability::CAP_MKNOD);
m.insert("CAP_LEASE".to_string(), Capability::CAP_LEASE);
m.insert("CAP_AUDIT_WRITE".to_string(), Capability::CAP_AUDIT_WRITE);
m.insert("CAP_AUDIT_CONTROL".to_string(), Capability::CAP_AUDIT_WRITE);
m.insert("CAP_SETFCAP".to_string(), Capability::CAP_SETFCAP);
m.insert("CAP_MAC_OVERRIDE".to_string(), Capability::CAP_MAC_OVERRIDE);
m.insert("CAP_SYSLOG".to_string(), Capability::CAP_SYSLOG);
m.insert("CAP_WAKE_ALARM".to_string(), Capability::CAP_WAKE_ALARM);
m.insert(
"CAP_BLOCK_SUSPEND".to_string(),
Capability::CAP_BLOCK_SUSPEND,
);
m.insert("CAP_AUDIT_READ".to_string(), Capability::CAP_AUDIT_READ);
m
};
}
use std::str::FromStr;
fn to_capshashset(cfd_log: RawFd, caps: &[String]) -> CapsHashSet {
let mut r = CapsHashSet::new();
for cap in caps.iter() {
let c = CAPSMAP.get(cap);
if c.is_none() {
log_child!(cfd_log, "{} is not a cap", cap);
continue;
}
r.insert(*c.unwrap());
match Capability::from_str(cap) {
Err(_) => {
log_child!(cfd_log, "{} is not a cap", cap);
continue;
}
Ok(c) => r.insert(c),
};
}
r
}
pub fn get_all_caps() -> CapsHashSet {
let mut caps_set =
runtime::procfs_all_supported(None).unwrap_or_else(|_| runtime::thread_all_supported());
if caps_set.is_empty() {
caps_set = caps::all();
}
caps_set
}
pub fn reset_effective() -> Result<()> {
caps::set(None, CapSet::Effective, caps::all()).map_err(|e| anyhow!(e.to_string()))?;
let all = get_all_caps();
caps::set(None, CapSet::Effective, &all).map_err(|e| anyhow!(e.to_string()))?;
Ok(())
}
pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
let all = caps::all();
let all = get_all_caps();
for c in all.difference(&to_capshashset(cfd_log, caps.bounding.as_ref())) {
caps::drop(None, CapSet::Bounding, *c).map_err(|e| anyhow!(e.to_string()))?;
@@ -108,26 +55,26 @@ pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
caps::set(
None,
CapSet::Effective,
to_capshashset(cfd_log, caps.effective.as_ref()),
&to_capshashset(cfd_log, caps.effective.as_ref()),
)
.map_err(|e| anyhow!(e.to_string()))?;
caps::set(
None,
CapSet::Permitted,
to_capshashset(cfd_log, caps.permitted.as_ref()),
&to_capshashset(cfd_log, caps.permitted.as_ref()),
)
.map_err(|e| anyhow!(e.to_string()))?;
caps::set(
None,
CapSet::Inheritable,
to_capshashset(cfd_log, caps.inheritable.as_ref()),
&to_capshashset(cfd_log, caps.inheritable.as_ref()),
)
.map_err(|e| anyhow!(e.to_string()))?;
let _ = caps::set(
None,
CapSet::Ambient,
to_capshashset(cfd_log, caps.ambient.as_ref()),
&to_capshashset(cfd_log, caps.ambient.as_ref()),
)
.map_err(|_| log_child!(cfd_log, "failed to set ambient capability"));

View File

@@ -37,6 +37,8 @@ use std::collections::HashMap;
use std::fs;
use std::path::Path;
const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online";
// Convenience macro to obtain the scope logger
macro_rules! sl {
() => {
@@ -44,29 +46,6 @@ macro_rules! sl {
};
}
pub fn load_or_create<'a>(h: Box<&'a dyn cgroups::Hierarchy>, path: &str) -> Cgroup<'a> {
let valid_path = path.trim_start_matches('/').to_string();
let cg = load(h.clone(), &valid_path);
match cg {
Some(cg) => cg,
None => {
info!(sl!(), "create new cgroup: {}", &valid_path);
cgroups::Cgroup::new(h, valid_path.as_str())
}
}
}
pub fn load<'a>(h: Box<&'a dyn cgroups::Hierarchy>, path: &str) -> Option<Cgroup<'a>> {
let valid_path = path.trim_start_matches('/').to_string();
let cg = cgroups::Cgroup::load(h, valid_path.as_str());
let cpu_controller: &CpuController = cg.controller_of().unwrap();
if cpu_controller.exists() {
Some(cg)
} else {
None
}
}
macro_rules! get_controller_or_return_singular_none {
($cg:ident) => {
match $cg.controller_of() {
@@ -80,8 +59,9 @@ macro_rules! get_controller_or_return_singular_none {
pub struct Manager {
pub paths: HashMap<String, String>,
pub mounts: HashMap<String, String>,
// pub rels: HashMap<String, String>,
pub cpath: String,
#[serde(skip)]
cgroup: cgroups::Cgroup,
}
// set_resource is used to set reources by cgroup controller.
@@ -96,17 +76,11 @@ macro_rules! set_resource {
impl CgroupManager for Manager {
fn apply(&self, pid: pid_t) -> Result<()> {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
cg.add_task(CgroupPid::from(pid as u64))?;
self.cgroup.add_task(CgroupPid::from(pid as u64))?;
Ok(())
}
fn set(&self, r: &LinuxResources, update: bool) -> Result<()> {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
info!(
sl!(),
"cgroup manager set resources for container. Resources input {:?}", r
@@ -116,53 +90,49 @@ impl CgroupManager for Manager {
// set cpuset and cpu reources
if let Some(cpu) = &r.cpu {
set_cpu_resources(&cg, cpu)?;
set_cpu_resources(&self.cgroup, cpu)?;
}
// set memory resources
if let Some(memory) = &r.memory {
set_memory_resources(&cg, memory, update)?;
set_memory_resources(&self.cgroup, memory, update)?;
}
// set pids resources
if let Some(pids_resources) = &r.pids {
set_pids_resources(&cg, pids_resources)?;
set_pids_resources(&self.cgroup, pids_resources)?;
}
// set block_io resources
if let Some(blkio) = &r.block_io {
set_block_io_resources(&cg, blkio, res);
set_block_io_resources(&self.cgroup, blkio, res);
}
// set hugepages resources
if !r.hugepage_limits.is_empty() {
set_hugepages_resources(&cg, &r.hugepage_limits, res);
set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res);
}
// set network resources
if let Some(network) = &r.network {
set_network_resources(&cg, network, res);
set_network_resources(&self.cgroup, network, res);
}
// set devices resources
set_devices_resources(&cg, &r.devices, res);
set_devices_resources(&self.cgroup, &r.devices, res);
info!(sl!(), "resources after processed {:?}", res);
// apply resources
cg.apply(res)?;
self.cgroup.apply(res)?;
Ok(())
}
fn get_stats(&self) -> Result<CgroupStats> {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
// CpuStats
let cpu_usage = get_cpuacct_stats(&cg);
let cpu_usage = get_cpuacct_stats(&self.cgroup);
let throttling_data = get_cpu_stats(&cg);
let throttling_data = get_cpu_stats(&self.cgroup);
let cpu_stats = SingularPtrField::some(CpuStats {
cpu_usage,
@@ -172,17 +142,17 @@ impl CgroupManager for Manager {
});
// Memorystats
let memory_stats = get_memory_stats(&cg);
let memory_stats = get_memory_stats(&self.cgroup);
// PidsStats
let pids_stats = get_pids_stats(&cg);
let pids_stats = get_pids_stats(&self.cgroup);
// BlkioStats
// note that virtiofs has no blkio stats
let blkio_stats = get_blkio_stats(&cg);
let blkio_stats = get_blkio_stats(&self.cgroup);
// HugetlbStats
let hugetlb_stats = get_hugetlb_stats(&cg);
let hugetlb_stats = get_hugetlb_stats(&self.cgroup);
Ok(CgroupStats {
cpu_stats,
@@ -196,10 +166,7 @@ impl CgroupManager for Manager {
}
fn freeze(&self, state: FreezerState) -> Result<()> {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
let freezer_controller: &FreezerController = cg.controller_of().unwrap();
let freezer_controller: &FreezerController = self.cgroup.controller_of().unwrap();
match state {
FreezerState::Thawed => {
freezer_controller.thaw()?;
@@ -216,20 +183,12 @@ impl CgroupManager for Manager {
}
fn destroy(&mut self) -> Result<()> {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load(h, &self.cpath);
if let Some(cg) = cg {
cg.delete();
}
let _ = self.cgroup.delete();
Ok(())
}
fn get_pids(&self) -> Result<Vec<pid_t>> {
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
let mem_controller: &MemController = cg.controller_of().unwrap();
let mem_controller: &MemController = self.cgroup.controller_of().unwrap();
let pids = mem_controller.tasks();
let result = pids.iter().map(|x| x.pid as i32).collect::<Vec<i32>>();
@@ -248,7 +207,7 @@ fn set_network_resources(
// description can be found at https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/net_cls.html
let class_id = network.class_id.unwrap_or(0) as u64;
if class_id != 0 {
res.network.class_id = class_id;
res.network.class_id = Some(class_id);
}
// set network priorities
@@ -261,7 +220,6 @@ fn set_network_resources(
});
}
res.network.update_values = true;
res.network.priorities = priorities;
}
@@ -291,7 +249,6 @@ fn set_devices_resources(
}
}
res.devices.update_values = true;
res.devices.devices = devices;
}
@@ -301,7 +258,6 @@ fn set_hugepages_resources(
res: &mut cgroups::Resources,
) {
info!(sl!(), "cgroup manager set hugepage");
res.hugepages.update_values = true;
let mut limits = vec![];
for l in hugepage_limits.iter() {
@@ -320,7 +276,6 @@ fn set_block_io_resources(
res: &mut cgroups::Resources,
) {
info!(sl!(), "cgroup manager set block io");
res.blkio.update_values = true;
res.blkio.weight = blkio.weight;
res.blkio.leaf_weight = blkio.leaf_weight;
@@ -353,7 +308,9 @@ fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
if !cpu.cpus.is_empty() {
cpuset_controller.set_cpus(&cpu.cpus)?;
if let Err(e) = cpuset_controller.set_cpus(&cpu.cpus) {
warn!(sl!(), "write cpuset failed: {:?}", e);
}
}
if !cpu.mems.is_empty() {
@@ -969,6 +926,11 @@ pub fn get_mounts() -> Result<HashMap<String, String>> {
Ok(m)
}
fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Cgroup {
let valid_path = path.trim_start_matches('/').to_string();
cgroups::Cgroup::new(h, valid_path.as_str())
}
impl Manager {
pub fn new(cpath: &str) -> Result<Self> {
let mut m = HashMap::new();
@@ -976,18 +938,14 @@ impl Manager {
let paths = get_paths()?;
let mounts = get_mounts()?;
for (key, value) in &paths {
for key in paths.keys() {
let mnt = mounts.get(key);
if mnt.is_none() {
continue;
}
let p = if value == "/" {
format!("{}/{}", mnt.unwrap(), cpath)
} else {
format!("{}{}/{}", mnt.unwrap(), value, cpath)
};
let p = format!("{}/{}", mnt.unwrap(), cpath);
m.insert(key.to_string(), p);
}
@@ -997,6 +955,7 @@ impl Manager {
mounts,
// rels: paths,
cpath: cpath.to_string(),
cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath),
})
}
@@ -1007,18 +966,14 @@ impl Manager {
info!(sl!(), "update_cpuset_path to: {}", guest_cpuset);
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let root_cg = load_or_create(h, "");
let root_cg = h.root_control_group();
let root_cpuset_controller: &CpuSetController = root_cg.controller_of().unwrap();
let path = root_cpuset_controller.path();
let root_path = Path::new(path);
info!(sl!(), "root cpuset path: {:?}", &path);
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
let cg = load_or_create(h, &self.cpath);
let container_cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
let container_cpuset_controller: &CpuSetController = self.cgroup.controller_of().unwrap();
let path = container_cpuset_controller.path();
let container_path = Path::new(path);
info!(sl!(), "container cpuset path: {:?}", &path);
@@ -1038,8 +993,6 @@ impl Manager {
break;
}
i -= 1;
let h = cgroups::hierarchies::auto();
let h = Box::new(&*h);
// remove cgroup root from path
let r_path = &paths[i]
@@ -1047,7 +1000,7 @@ impl Manager {
.unwrap()
.trim_start_matches(root_path.to_str().unwrap());
info!(sl!(), "updating cpuset for parent path {:?}", &r_path);
let cg = load_or_create(h, &r_path);
let cg = new_cgroup(cgroups::hierarchies::auto(), &r_path);
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
cpuset_controller.set_cpus(guest_cpuset)?;
}
@@ -1076,23 +1029,10 @@ impl Manager {
}
}
// get the guest's online cpus.
pub fn get_guest_cpuset() -> Result<String> {
// for cgroup v2
if cgroups::hierarchies::is_cgroup2_unified_mode() {
let c = fs::read_to_string("/sys/fs/cgroup/cpuset.cpus.effective")?;
return Ok(c);
}
// for cgroup v1
let m = get_mounts()?;
if m.get("cpuset").is_none() {
warn!(sl!(), "no cpuset cgroup!");
return Err(nix::Error::Sys(Errno::ENOENT).into());
}
let p = format!("{}/cpuset.cpus", m.get("cpuset").unwrap());
let c = fs::read_to_string(p.as_str())?;
Ok(c)
let c = fs::read_to_string(GUEST_CPUS_PATH)?;
Ok(c.trim().to_string())
}
// Since the OCI spec is designed for cgroup v1, in some cases

View File

@@ -10,6 +10,7 @@ use protocols::agent::CgroupStats;
use cgroups::freezer::FreezerState;
pub mod fs;
pub mod mock;
pub mod notifier;
pub mod systemd;

View File

@@ -3,16 +3,18 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use eventfd::{eventfd, EfdFlags};
use nix::sys::eventfd;
use nix::sys::inotify::{AddWatchFlags, InitFlags, Inotify};
use std::fs::{self, File};
use std::io::Read;
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::path::{Path, PathBuf};
use std::sync::mpsc::{self, Receiver};
use std::thread;
use crate::pipestream::PipeStream;
use futures::StreamExt as _;
use inotify::{Inotify, WatchMask};
use tokio::io::AsyncReadExt;
use tokio::sync::mpsc::{channel, Receiver};
// Convenience macro to obtain the scope logger
macro_rules! sl {
@@ -21,11 +23,11 @@ macro_rules! sl {
};
}
pub fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
pub async fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
if cgroups::hierarchies::is_cgroup2_unified_mode() {
return notify_on_oom_v2(cid, cg_dir);
return notify_on_oom_v2(cid, cg_dir).await;
}
notify_on_oom(cid, cg_dir)
notify_on_oom(cid, cg_dir).await
}
// get_value_from_cgroup parse cgroup file with `Flat keyed`
@@ -52,11 +54,11 @@ fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {
// notify_on_oom returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
pub fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events")
pub async fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events").await
}
fn register_memory_event_v2(
async fn register_memory_event_v2(
containere_id: &str,
cg_dir: String,
memory_event_name: &str,
@@ -73,54 +75,54 @@ fn register_memory_event_v2(
"register_memory_event_v2 cgroup_event_control_path: {:?}", &cgroup_event_control_path
);
let fd = Inotify::init(InitFlags::empty()).unwrap();
let mut inotify = Inotify::init().context("Failed to initialize inotify")?;
// watching oom kill
let ev_fd = fd
.add_watch(&event_control_path, AddWatchFlags::IN_MODIFY)
.unwrap();
let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?;
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
let cg_fd = fd
.add_watch(&cgroup_event_control_path, AddWatchFlags::IN_MODIFY)
.unwrap();
info!(sl!(), "ev_fd: {:?}", ev_fd);
info!(sl!(), "cg_fd: {:?}", cg_fd);
let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?;
let (sender, receiver) = mpsc::channel();
info!(sl!(), "ev_wd: {:?}", ev_wd);
info!(sl!(), "cg_wd: {:?}", cg_wd);
let (sender, receiver) = channel(100);
let containere_id = containere_id.to_string();
thread::spawn(move || {
loop {
let events = fd.read_events().unwrap();
tokio::spawn(async move {
let mut buffer = [0; 32];
let mut stream = inotify
.event_stream(&mut buffer)
.expect("create inotify event stream failed");
while let Some(event_or_error) = stream.next().await {
let event = event_or_error.unwrap();
info!(
sl!(),
"container[{}] get events for container: {:?}", &containere_id, &events
"container[{}] get event for container: {:?}", &containere_id, &event
);
// info!("is1: {}", event.wd == wd1);
info!(sl!(), "event.wd: {:?}", event.wd);
for event in events {
if event.mask & AddWatchFlags::IN_MODIFY != AddWatchFlags::IN_MODIFY {
continue;
if event.wd == ev_wd {
let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
if oom.unwrap_or(0) > 0 {
let _ = sender.send(containere_id.clone()).await.map_err(|e| {
error!(sl!(), "send containere_id failed, error: {:?}", e);
});
return;
}
info!(sl!(), "event.wd: {:?}", event.wd);
if event.wd == ev_fd {
let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
if oom.unwrap_or(0) > 0 {
sender.send(containere_id.clone()).unwrap();
return;
}
} else if event.wd == cg_fd {
let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
if pids.unwrap_or(-1) == 0 {
return;
}
} else if event.wd == cg_wd {
let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
if pids.unwrap_or(-1) == 0 {
return;
}
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if !Path::new(&event_control_path).exists() {
return;
}
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if !Path::new(&event_control_path).exists() {
return;
}
});
@@ -129,16 +131,16 @@ fn register_memory_event_v2(
// notify_on_oom returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
async fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
if dir.is_empty() {
return Err(anyhow!("memory controller missing"));
}
register_memory_event(cid, dir, "memory.oom_control", "")
register_memory_event(cid, dir, "memory.oom_control", "").await
}
// level is one of "low", "medium", or "critical"
fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
async fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
if dir.is_empty() {
return Err(anyhow!("memory controller missing"));
}
@@ -147,10 +149,10 @@ fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receive
return Err(anyhow!("invalid pressure level {}", level));
}
register_memory_event(cid, dir, "memory.pressure_level", level)
register_memory_event(cid, dir, "memory.pressure_level", level).await
}
fn register_memory_event(
async fn register_memory_event(
cid: &str,
cg_dir: String,
event_name: &str,
@@ -171,15 +173,16 @@ fn register_memory_event(
fs::write(&event_control_path, data)?;
let mut eventfd_file = unsafe { File::from_raw_fd(eventfd) };
let mut eventfd_stream = unsafe { PipeStream::from_raw_fd(eventfd) };
let (sender, receiver) = mpsc::channel();
let (sender, receiver) = tokio::sync::mpsc::channel(100);
let containere_id = cid.to_string();
thread::spawn(move || {
tokio::spawn(async move {
loop {
let mut buf = [0; 8];
match eventfd_file.read(&mut buf) {
let sender = sender.clone();
let mut buf = [0u8; 8];
match eventfd_stream.read(&mut buf).await {
Err(err) => {
warn!(sl!(), "failed to read from eventfd: {:?}", err);
return;
@@ -188,7 +191,10 @@ fn register_memory_event(
let content = fs::read_to_string(path.clone());
info!(
sl!(),
"OOM event for container: {}, content: {:?}", &containere_id, content
"cgroup event for container: {}, path: {:?}, content: {:?}",
&containere_id,
&path,
content
);
}
}
@@ -198,7 +204,10 @@ fn register_memory_event(
if !Path::new(&event_control_path).exists() {
return;
}
sender.send(containere_id.clone()).unwrap();
let _ = sender.send(containere_id.clone()).await.map_err(|e| {
error!(sl!(), "send containere_id failed, error: {:?}", e);
});
}
});

File diff suppressed because it is too large Load Diff

View File

@@ -40,11 +40,13 @@ pub mod capabilities;
pub mod cgroups;
pub mod container;
pub mod mount;
pub mod pipestream;
pub mod process;
pub mod specconv;
pub mod sync;
pub mod sync_with_async;
pub mod utils;
pub mod validator;
// pub mod factory;
//pub mod configs;
// pub mod devices;

View File

@@ -112,7 +112,12 @@ lazy_static! {
#[inline(always)]
#[allow(unused_variables)]
fn mount<P1: ?Sized + NixPath, P2: ?Sized + NixPath, P3: ?Sized + NixPath, P4: ?Sized + NixPath>(
pub fn mount<
P1: ?Sized + NixPath,
P2: ?Sized + NixPath,
P3: ?Sized + NixPath,
P4: ?Sized + NixPath,
>(
source: Option<&P1>,
target: &P2,
fstype: Option<&P3>,
@@ -127,7 +132,7 @@ fn mount<P1: ?Sized + NixPath, P2: ?Sized + NixPath, P3: ?Sized + NixPath, P4: ?
#[inline(always)]
#[allow(unused_variables)]
fn umount2<P: ?Sized + NixPath>(
pub fn umount2<P: ?Sized + NixPath>(
target: &P,
flags: MntFlags,
) -> std::result::Result<(), nix::Error> {
@@ -731,10 +736,10 @@ fn mount_from(
let src = if m.r#type.as_str() == "bind" {
let src = fs::canonicalize(m.source.as_str())?;
let dir = if src.is_file() {
Path::new(&dest).parent().unwrap()
} else {
let dir = if src.is_dir() {
Path::new(&dest)
} else {
Path::new(&dest).parent().unwrap()
};
let _ = fs::create_dir_all(&dir).map_err(|e| {
@@ -747,7 +752,7 @@ fn mount_from(
});
// make sure file exists so we can bind over it
if src.is_file() {
if !src.is_dir() {
let _ = OpenOptions::new().create(true).write(true).open(&dest);
}
src.to_str().unwrap().to_string()

View File

@@ -0,0 +1,170 @@
// Copyright (c) 2020 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
//! Async support for pipe or something has file descriptor
use nix::unistd;
use std::{
fmt, io,
io::{Read, Result, Write},
mem,
os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd},
pin::Pin,
task::{Context, Poll},
};
use futures::ready;
use tokio::io::{unix::AsyncFd, AsyncRead, AsyncWrite, ReadBuf};
fn set_nonblocking(fd: RawFd) {
unsafe {
libc::fcntl(fd, libc::F_SETFL, libc::O_NONBLOCK);
}
}
struct StreamFd(RawFd);
impl io::Read for &StreamFd {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match unistd::read(self.0, buf) {
Ok(l) => Ok(l),
Err(e) => Err(e.as_errno().unwrap().into()),
}
}
}
impl io::Write for &StreamFd {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match unistd::write(self.0, buf) {
Ok(l) => Ok(l),
Err(e) => Err(e.as_errno().unwrap().into()),
}
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl StreamFd {
fn close(&mut self) -> io::Result<()> {
match unistd::close(self.0) {
Ok(()) => Ok(()),
Err(e) => Err(e.as_errno().unwrap().into()),
}
}
}
impl Drop for StreamFd {
fn drop(&mut self) {
self.close().ok();
}
}
impl AsRawFd for StreamFd {
fn as_raw_fd(&self) -> RawFd {
self.0
}
}
pub struct PipeStream(AsyncFd<StreamFd>);
impl PipeStream {
pub fn new(fd: RawFd) -> Result<Self> {
set_nonblocking(fd);
Ok(Self(AsyncFd::new(StreamFd(fd))?))
}
pub fn shutdown(&mut self) -> io::Result<()> {
self.0.get_mut().close()
}
pub fn from_fd(fd: RawFd) -> Self {
unsafe { Self::from_raw_fd(fd) }
}
}
impl AsRawFd for PipeStream {
fn as_raw_fd(&self) -> RawFd {
self.0.as_raw_fd()
}
}
impl IntoRawFd for PipeStream {
fn into_raw_fd(self) -> RawFd {
let fd = self.as_raw_fd();
mem::forget(self);
fd
}
}
impl FromRawFd for PipeStream {
unsafe fn from_raw_fd(fd: RawFd) -> Self {
Self::new(fd).unwrap()
}
}
impl fmt::Debug for PipeStream {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "PipeStream({})", self.as_raw_fd())
}
}
impl AsyncRead for PipeStream {
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<Result<()>> {
let b;
unsafe {
b = &mut *(buf.unfilled_mut() as *mut [mem::MaybeUninit<u8>] as *mut [u8]);
};
loop {
let mut guard = ready!(self.0.poll_read_ready(cx))?;
match guard.try_io(|inner| inner.get_ref().read(b)) {
Ok(Ok(n)) => {
unsafe {
buf.assume_init(n);
}
buf.advance(n);
return Ok(()).into();
}
Ok(Err(e)) => return Err(e).into(),
Err(_would_block) => {
continue;
}
}
}
}
}
impl AsyncWrite for PipeStream {
fn poll_write(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<io::Result<usize>> {
loop {
let mut guard = ready!(self.0.poll_write_ready(cx))?;
match guard.try_io(|inner| inner.get_ref().write(buf)) {
Ok(result) => return Poll::Ready(result),
Err(_would_block) => continue,
}
}
}
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Poll::Ready(Ok(()))
}
fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
self.get_mut().shutdown()?;
Poll::Ready(Ok(()))
}
}

View File

@@ -6,7 +6,7 @@
use libc::pid_t;
use std::fs::File;
use std::os::unix::io::RawFd;
use std::sync::mpsc::Sender;
use tokio::sync::mpsc::Sender;
use nix::fcntl::{fcntl, FcntlArg, OFlag};
use nix::sys::signal::{self, Signal};
@@ -17,6 +17,28 @@ use nix::Result;
use oci::Process as OCIProcess;
use slog::Logger;
use crate::pipestream::PipeStream;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::io::{split, ReadHalf, WriteHalf};
use tokio::sync::Mutex;
use tokio::sync::Notify;
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub enum StreamType {
Stdin,
Stdout,
Stderr,
ExitPipeR,
TermMaster,
ParentStdin,
ParentStdout,
ParentStderr,
}
type Reader = Arc<Mutex<ReadHalf<PipeStream>>>;
type Writer = Arc<Mutex<WriteHalf<PipeStream>>>;
#[derive(Debug)]
pub struct Process {
pub exec_id: String,
@@ -40,6 +62,10 @@ pub struct Process {
pub exit_watchers: Vec<Sender<i32>>,
pub oci: OCIProcess,
pub logger: Logger,
pub term_exit_notifier: Arc<Notify>,
readers: HashMap<StreamType, Reader>,
writers: HashMap<StreamType, Writer>,
}
pub trait ProcessOperations {
@@ -91,6 +117,9 @@ impl Process {
exit_watchers: Vec::new(),
oci: ocip.clone(),
logger: logger.clone(),
term_exit_notifier: Arc::new(Notify::new()),
readers: HashMap::new(),
writers: HashMap::new(),
};
info!(logger, "before create console socket!");
@@ -112,6 +141,61 @@ impl Process {
}
Ok(p)
}
pub fn notify_term_close(&mut self) {
let notify = self.term_exit_notifier.clone();
notify.notify_one();
}
fn get_fd(&self, stream_type: &StreamType) -> Option<RawFd> {
match stream_type {
StreamType::Stdin => self.stdin,
StreamType::Stdout => self.stdout,
StreamType::Stderr => self.stderr,
StreamType::ExitPipeR => self.exit_pipe_r,
StreamType::TermMaster => self.term_master,
StreamType::ParentStdin => self.parent_stdin,
StreamType::ParentStdout => self.parent_stdout,
StreamType::ParentStderr => self.parent_stderr,
}
}
fn get_stream_and_store(&mut self, stream_type: StreamType) -> Option<(Reader, Writer)> {
let fd = self.get_fd(&stream_type)?;
let stream = PipeStream::from_fd(fd);
let (reader, writer) = split(stream);
let reader = Arc::new(Mutex::new(reader));
let writer = Arc::new(Mutex::new(writer));
self.readers.insert(stream_type.clone(), reader.clone());
self.writers.insert(stream_type, writer.clone());
Some((reader, writer))
}
pub fn get_reader(&mut self, stream_type: StreamType) -> Option<Reader> {
if let Some(reader) = self.readers.get(&stream_type) {
return Some(reader.clone());
}
let (reader, _) = self.get_stream_and_store(stream_type)?;
Some(reader)
}
pub fn get_writer(&mut self, stream_type: StreamType) -> Option<Writer> {
if let Some(writer) = self.writers.get(&stream_type) {
return Some(writer.clone());
}
let (_, writer) = self.get_stream_and_store(stream_type)?;
Some(writer)
}
pub fn close_stream(&mut self, stream_type: StreamType) {
let _ = self.readers.remove(&stream_type);
let _ = self.writers.remove(&stream_type);
}
}
fn create_extended_pipe(flags: OFlag, pipe_size: i32) -> Result<(RawFd, RawFd)> {
@@ -169,7 +253,6 @@ mod tests {
// -1 by default
assert_eq!(process.pid, -1);
assert!(process.wait().is_err());
// signal to every process in the process
// group of the calling process.
process.pid = 0;

View File

@@ -14,8 +14,8 @@ pub const SYNC_SUCCESS: i32 = 1;
pub const SYNC_FAILED: i32 = 2;
pub const SYNC_DATA: i32 = 3;
const DATA_SIZE: usize = 100;
const MSG_SIZE: usize = mem::size_of::<i32>();
pub const DATA_SIZE: usize = 100;
pub const MSG_SIZE: usize = mem::size_of::<i32>();
#[macro_export]
macro_rules! log_child {

View File

@@ -0,0 +1,148 @@
// Copyright (c) 2020 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
//! The async version of sync module used for IPC
use crate::pipestream::PipeStream;
use anyhow::{anyhow, Result};
use nix::errno::Errno;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use crate::sync::{DATA_SIZE, MSG_SIZE, SYNC_DATA, SYNC_FAILED, SYNC_SUCCESS};
async fn write_count(pipe_w: &mut PipeStream, buf: &[u8], count: usize) -> Result<usize> {
let mut len = 0;
loop {
match pipe_w.write(&buf[len..]).await {
Ok(l) => {
len += l;
if len == count {
break;
}
}
Err(e) => {
if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
return Err(e.into());
}
}
}
}
Ok(len)
}
async fn read_count(pipe_r: &mut PipeStream, count: usize) -> Result<Vec<u8>> {
let mut v: Vec<u8> = vec![0; count];
let mut len = 0;
loop {
match pipe_r.read(&mut v[len..]).await {
Ok(l) => {
len += l;
if len == count || l == 0 {
break;
}
}
Err(e) => {
if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
return Err(e.into());
}
}
}
}
Ok(v[0..len].to_vec())
}
pub async fn read_async(pipe_r: &mut PipeStream) -> Result<Vec<u8>> {
let buf = read_count(pipe_r, MSG_SIZE).await?;
if buf.len() != MSG_SIZE {
return Err(anyhow!(
"process: {} failed to receive async message from peer: got msg length: {}, expected: {}",
std::process::id(),
buf.len(),
MSG_SIZE
));
}
let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
let msg: i32 = i32::from_be_bytes(buf_array);
match msg {
SYNC_SUCCESS => Ok(Vec::new()),
SYNC_DATA => {
let buf = read_count(pipe_r, MSG_SIZE).await?;
let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
let msg_length: i32 = i32::from_be_bytes(buf_array);
let data_buf = read_count(pipe_r, msg_length as usize).await?;
Ok(data_buf)
}
SYNC_FAILED => {
let mut error_buf = vec![];
loop {
let buf = read_count(pipe_r, DATA_SIZE).await?;
error_buf.extend(&buf);
if DATA_SIZE == buf.len() {
continue;
} else {
break;
}
}
let error_str = match std::str::from_utf8(&error_buf) {
Ok(v) => String::from(v),
Err(e) => {
return Err(
anyhow!(e).context("receive error message from child process failed")
);
}
};
Err(anyhow!(error_str))
}
_ => Err(anyhow!("error in receive sync message")),
}
}
pub async fn write_async(pipe_w: &mut PipeStream, msg_type: i32, data_str: &str) -> Result<()> {
let buf = msg_type.to_be_bytes();
let count = write_count(pipe_w, &buf, MSG_SIZE).await?;
if count != MSG_SIZE {
return Err(anyhow!("error in send sync message"));
}
match msg_type {
SYNC_FAILED => match write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
Ok(_) => pipe_w.shutdown()?,
Err(e) => {
pipe_w.shutdown()?;
return Err(anyhow!(e).context("error in send message to process"));
}
},
SYNC_DATA => {
let length: i32 = data_str.len() as i32;
write_count(pipe_w, &length.to_be_bytes(), MSG_SIZE)
.await
.or_else(|e| {
pipe_w.shutdown()?;
Err(anyhow!(e).context("error in send message to process"))
})?;
write_count(pipe_w, data_str.as_bytes(), data_str.len())
.await
.or_else(|e| {
pipe_w.shutdown()?;
Err(anyhow!(e).context("error in send message to process"))
})?;
}
_ => (),
};
Ok(())
}

View File

@@ -0,0 +1,119 @@
// Copyright (c) 2021 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::{anyhow, Context, Result};
use libc::gid_t;
use libc::uid_t;
use std::fs::File;
use std::io::{BufRead, BufReader};
const PASSWD_FILE: &str = "/etc/passwd";
// An entry from /etc/passwd
#[derive(Debug, PartialEq, PartialOrd)]
pub struct PasswdEntry {
// username
pub name: String,
// user password
pub passwd: String,
// user id
pub uid: uid_t,
// group id
pub gid: gid_t,
// user Information
pub gecos: String,
// home directory
pub dir: String,
// User's Shell
pub shell: String,
}
// get an entry for a given `uid` from `/etc/passwd`
fn get_entry_by_uid(uid: uid_t, path: &str) -> Result<PasswdEntry> {
let file = File::open(path).with_context(|| format!("open file {}", path))?;
let mut reader = BufReader::new(file);
let mut line = String::new();
loop {
line.clear();
match reader.read_line(&mut line) {
Ok(0) => return Err(anyhow!(format!("file {} is empty", path))),
Ok(_) => (),
Err(e) => {
return Err(anyhow!(format!(
"failed to read file {} with {:?}",
path, e
)))
}
}
if line.starts_with('#') {
continue;
}
let parts: Vec<&str> = line.split(':').map(|part| part.trim()).collect();
if parts.len() != 7 {
continue;
}
match parts[2].parse() {
Err(_e) => continue,
Ok(new_uid) => {
if uid != new_uid {
continue;
}
let entry = PasswdEntry {
name: parts[0].to_string(),
passwd: parts[1].to_string(),
uid: new_uid,
gid: parts[3].parse().unwrap_or(0),
gecos: parts[4].to_string(),
dir: parts[5].to_string(),
shell: parts[6].to_string(),
};
return Ok(entry);
}
}
}
}
pub fn home_dir(uid: uid_t) -> Result<String> {
get_entry_by_uid(uid, PASSWD_FILE).map(|entry| entry.dir)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::Builder;
#[test]
fn test_get_entry_by_uid() {
let tmpdir = Builder::new().tempdir().unwrap();
let tmpdir_path = tmpdir.path().to_str().unwrap();
let temp_passwd = format!("{}/passwd", tmpdir_path);
let mut tempf = File::create(temp_passwd.as_str()).unwrap();
writeln!(tempf, "root:x:0:0:root:/root0:/bin/bash").unwrap();
writeln!(tempf, "root:x:1:0:root:/root1:/bin/bash").unwrap();
writeln!(tempf, "#root:x:1:0:root:/rootx:/bin/bash").unwrap();
writeln!(tempf, "root:x:2:0:root:/root2:/bin/bash").unwrap();
writeln!(tempf, "root:x:3:0:root:/root3").unwrap();
writeln!(tempf, "root:x:3:0:root:/root3:/bin/bash").unwrap();
let entry = get_entry_by_uid(0, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root0");
let entry = get_entry_by_uid(1, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root1");
let entry = get_entry_by_uid(2, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root2");
let entry = get_entry_by_uid(3, temp_passwd.as_str()).unwrap();
assert_eq!(entry.dir.as_str(), "/root3");
}
}

View File

@@ -4,12 +4,20 @@
//
use crate::container::Config;
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, Context, Error, Result};
use nix::errno::Errno;
use oci::{LinuxIDMapping, LinuxNamespace, Spec};
use oci::{Linux, LinuxIDMapping, LinuxNamespace, Spec};
use std::collections::HashMap;
use std::path::{Component, PathBuf};
fn einval() -> Error {
anyhow!(nix::Error::from_errno(Errno::EINVAL))
}
fn get_linux(oci: &Spec) -> Result<&Linux> {
oci.linux.as_ref().ok_or_else(einval)
}
fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool {
for ns in nses {
if ns.r#type.as_str() == key {
@@ -27,14 +35,14 @@ fn get_namespace_path(nses: &[LinuxNamespace], key: &str) -> Result<String> {
}
}
Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)))
Err(einval())
}
fn rootfs(root: &str) -> Result<()> {
let path = PathBuf::from(root);
// not absolute path or not exists
if !path.exists() || !path.is_absolute() {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
// symbolic link? ..?
@@ -52,7 +60,7 @@ fn rootfs(root: &str) -> Result<()> {
if let Some(v) = c.as_os_str().to_str() {
stack.push(v.to_string());
} else {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
}
@@ -64,7 +72,7 @@ fn rootfs(root: &str) -> Result<()> {
let canon = path.canonicalize().context("canonicalize")?;
if cleaned != canon {
// There is symbolic in path
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
Ok(())
@@ -75,28 +83,23 @@ fn hostname(oci: &Spec) -> Result<()> {
return Ok(());
}
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
if !contain_namespace(&linux.namespaces, "uts") {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
Ok(())
}
fn security(oci: &Spec) -> Result<()> {
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() {
return Ok(());
}
if !contain_namespace(&linux.namespaces, "mount") {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
// don't care about selinux at present
@@ -111,14 +114,12 @@ fn idmapping(maps: &[LinuxIDMapping]) -> Result<()> {
}
}
Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)))
Err(einval())
}
fn usernamespace(oci: &Spec) -> Result<()> {
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
if contain_namespace(&linux.namespaces, "user") {
let user_ns = PathBuf::from("/proc/self/ns/user");
if !user_ns.exists() {
@@ -131,7 +132,7 @@ fn usernamespace(oci: &Spec) -> Result<()> {
} else {
// no user namespace but idmap
if !linux.uid_mappings.is_empty() || !linux.gid_mappings.is_empty() {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
}
@@ -139,10 +140,8 @@ fn usernamespace(oci: &Spec) -> Result<()> {
}
fn cgroupnamespace(oci: &Spec) -> Result<()> {
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
if contain_namespace(&linux.namespaces, "cgroup") {
let path = PathBuf::from("/proc/self/ns/cgroup");
if !path.exists() {
@@ -186,23 +185,21 @@ fn check_host_ns(path: &str) -> Result<()> {
.read_link()
.context(format!("read link {:?}", cpath))?;
if real_cpath == real_hpath {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
Ok(())
}
fn sysctl(oci: &Spec) -> Result<()> {
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
for (key, _) in linux.sysctl.iter() {
if SYSCTLS.contains_key(key.as_str()) || key.starts_with("fs.mqueue.") {
if contain_namespace(&linux.namespaces, "ipc") {
continue;
} else {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
}
@@ -217,27 +214,25 @@ fn sysctl(oci: &Spec) -> Result<()> {
}
if key == "kernel.hostname" {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
}
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
Ok(())
}
fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
if !contain_namespace(&linux.namespaces, "user") {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
if linux.uid_mappings.is_empty() || linux.gid_mappings.is_empty() {
// rootless containers requires at least one UID/GID mapping
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
Ok(())
@@ -253,10 +248,7 @@ fn has_idmapping(maps: &[LinuxIDMapping], id: u32) -> bool {
}
fn rootless_euid_mount(oci: &Spec) -> Result<()> {
let linux = oci
.linux
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let linux = get_linux(oci)?;
for mnt in oci.mounts.iter() {
for opt in mnt.options.iter() {
@@ -264,7 +256,7 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> {
let fields: Vec<&str> = opt.split('=').collect();
if fields.len() != 2 {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
let id = fields[1]
@@ -273,11 +265,11 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> {
.context(format!("parse field {}", &fields[1]))?;
if opt.starts_with("uid=") && !has_idmapping(&linux.uid_mappings, id) {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
if opt.starts_with("gid=") && !has_idmapping(&linux.gid_mappings, id) {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
}
}
@@ -293,18 +285,15 @@ fn rootless_euid(oci: &Spec) -> Result<()> {
pub fn validate(conf: &Config) -> Result<()> {
lazy_static::initialize(&SYSCTLS);
let oci = conf
.spec
.as_ref()
.ok_or(anyhow!(nix::Error::from_errno(Errno::EINVAL)))?;
let oci = conf.spec.as_ref().ok_or_else(einval)?;
if oci.linux.is_none() {
return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL)));
return Err(einval());
}
let root = match oci.root.as_ref() {
Some(v) => v.path.as_str(),
None => return Err(anyhow!(nix::Error::from_errno(Errno::EINVAL))),
None => return Err(einval()),
};
rootfs(root).context("rootfs")?;
@@ -320,3 +309,274 @@ pub fn validate(conf: &Config) -> Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use oci::Mount;
#[test]
fn test_namespace() {
let namespaces = [
LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
},
LinuxNamespace {
r#type: "uts".to_owned(),
path: "/sys/cgroups/uts".to_owned(),
},
];
assert_eq!(contain_namespace(&namespaces, "net"), true);
assert_eq!(contain_namespace(&namespaces, "uts"), true);
assert_eq!(contain_namespace(&namespaces, ""), false);
assert_eq!(contain_namespace(&namespaces, "Net"), false);
assert_eq!(contain_namespace(&namespaces, "ipc"), false);
assert_eq!(
get_namespace_path(&namespaces, "net").unwrap(),
"/sys/cgroups/net"
);
assert_eq!(
get_namespace_path(&namespaces, "uts").unwrap(),
"/sys/cgroups/uts"
);
get_namespace_path(&namespaces, "").unwrap_err();
get_namespace_path(&namespaces, "Uts").unwrap_err();
get_namespace_path(&namespaces, "ipc").unwrap_err();
}
#[test]
fn test_rootfs() {
rootfs("/_no_exit_fs_xxxxxxxxxxx").unwrap_err();
rootfs("sys").unwrap_err();
rootfs("/proc/self/root").unwrap_err();
rootfs("/proc/self/root/sys").unwrap_err();
rootfs("/proc/self").unwrap_err();
rootfs("/./proc/self").unwrap_err();
rootfs("/proc/././self").unwrap_err();
rootfs("/proc/.././self").unwrap_err();
rootfs("/proc/uptime").unwrap();
rootfs("/../proc/uptime").unwrap();
rootfs("/../../proc/uptime").unwrap();
rootfs("/proc/../proc/uptime").unwrap();
rootfs("/proc/../../proc/uptime").unwrap();
}
#[test]
fn test_hostname() {
let mut spec = Spec::default();
hostname(&spec).unwrap();
spec.hostname = "a.test.com".to_owned();
hostname(&spec).unwrap_err();
let mut linux = Linux::default();
linux.namespaces = vec![
LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
},
LinuxNamespace {
r#type: "uts".to_owned(),
path: "/sys/cgroups/uts".to_owned(),
},
];
spec.linux = Some(linux);
hostname(&spec).unwrap();
}
#[test]
fn test_security() {
let mut spec = Spec::default();
let linux = Linux::default();
spec.linux = Some(linux);
security(&spec).unwrap();
let mut linux = Linux::default();
linux.masked_paths.push("/test".to_owned());
linux.namespaces = vec![
LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
},
LinuxNamespace {
r#type: "uts".to_owned(),
path: "/sys/cgroups/uts".to_owned(),
},
];
spec.linux = Some(linux);
security(&spec).unwrap_err();
let mut linux = Linux::default();
linux.masked_paths.push("/test".to_owned());
linux.namespaces = vec![
LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
},
LinuxNamespace {
r#type: "mount".to_owned(),
path: "/sys/cgroups/mount".to_owned(),
},
];
spec.linux = Some(linux);
security(&spec).unwrap();
}
#[test]
fn test_usernamespace() {
let mut spec = Spec::default();
usernamespace(&spec).unwrap_err();
let linux = Linux::default();
spec.linux = Some(linux);
usernamespace(&spec).unwrap();
let mut linux = Linux::default();
linux.uid_mappings = vec![LinuxIDMapping {
container_id: 0,
host_id: 1000,
size: 0,
}];
spec.linux = Some(linux);
usernamespace(&spec).unwrap_err();
let mut linux = Linux::default();
linux.uid_mappings = vec![LinuxIDMapping {
container_id: 0,
host_id: 1000,
size: 100,
}];
spec.linux = Some(linux);
usernamespace(&spec).unwrap_err();
}
#[test]
fn test_rootless_euid() {
let mut spec = Spec::default();
// Test case: without linux
rootless_euid_mapping(&spec).unwrap_err();
rootless_euid_mount(&spec).unwrap_err();
// Test case: without user namespace
let linux = Linux::default();
spec.linux = Some(linux);
rootless_euid_mapping(&spec).unwrap_err();
// Test case: without user namespace
let linux = spec.linux.as_mut().unwrap();
linux.namespaces = vec![
LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
},
LinuxNamespace {
r#type: "uts".to_owned(),
path: "/sys/cgroups/uts".to_owned(),
},
];
rootless_euid_mapping(&spec).unwrap_err();
let linux = spec.linux.as_mut().unwrap();
linux.namespaces = vec![
LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
},
LinuxNamespace {
r#type: "user".to_owned(),
path: "/sys/cgroups/user".to_owned(),
},
];
linux.uid_mappings = vec![LinuxIDMapping {
container_id: 0,
host_id: 1000,
size: 1000,
}];
linux.gid_mappings = vec![LinuxIDMapping {
container_id: 0,
host_id: 1000,
size: 1000,
}];
rootless_euid_mapping(&spec).unwrap();
spec.mounts.push(Mount {
destination: "/app".to_owned(),
r#type: "tmpfs".to_owned(),
source: "".to_owned(),
options: vec!["uid=10000".to_owned()],
});
rootless_euid_mount(&spec).unwrap_err();
spec.mounts = vec![
(Mount {
destination: "/app".to_owned(),
r#type: "tmpfs".to_owned(),
source: "".to_owned(),
options: vec!["uid=500".to_owned(), "gid=500".to_owned()],
}),
];
rootless_euid(&spec).unwrap();
}
#[test]
fn test_check_host_ns() {
check_host_ns("/proc/self/ns/net").unwrap_err();
check_host_ns("/proc/sys/net/ipv4/tcp_sack").unwrap();
}
#[test]
fn test_sysctl() {
let mut spec = Spec::default();
let mut linux = Linux::default();
linux.namespaces = vec![LinuxNamespace {
r#type: "net".to_owned(),
path: "/sys/cgroups/net".to_owned(),
}];
linux
.sysctl
.insert("kernel.domainname".to_owned(), "test.com".to_owned());
spec.linux = Some(linux);
sysctl(&spec).unwrap_err();
spec.linux
.as_mut()
.unwrap()
.namespaces
.push(LinuxNamespace {
r#type: "uts".to_owned(),
path: "/sys/cgroups/uts".to_owned(),
});
sysctl(&spec).unwrap();
}
#[test]
fn test_validate() {
let spec = Spec::default();
let mut config = Config {
cgroup_name: "container1".to_owned(),
use_systemd_cgroup: false,
no_pivot_root: true,
no_new_keyring: true,
rootless_euid: false,
rootless_cgroup: false,
spec: Some(spec),
};
validate(&config).unwrap_err();
let linux = Linux::default();
config.spec.as_mut().unwrap().linux = Some(linux);
validate(&config).unwrap_err();
}
}

View File

@@ -10,6 +10,7 @@ use std::time;
const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
const DEV_MODE_FLAG: &str = "agent.devmode";
const LOG_LEVEL_OPTION: &str = "agent.log";
const SERVER_ADDR_OPTION: &str = "agent.server_addr";
const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
const LOG_VPORT_OPTION: &str = "agent.log_vport";
@@ -21,14 +22,29 @@ const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
const DEFAULT_CONTAINER_PIPE_SIZE: i32 = 0;
const VSOCK_ADDR: &str = "vsock://-1";
const VSOCK_PORT: u16 = 1024;
const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
// FIXME: unused
const TRACE_MODE_FLAG: &str = "agent.trace";
const USE_VSOCK_FLAG: &str = "agent.use_vsock";
// Environment variables used for development and testing
const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";
const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
const ERR_INVALID_GET_VALUE_PARAM: &str = "expected name=value";
const ERR_INVALID_GET_VALUE_NO_NAME: &str = "name=value parameter missing name";
const ERR_INVALID_GET_VALUE_NO_VALUE: &str = "name=value parameter missing value";
const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
#[derive(Debug)]
pub struct agentConfig {
pub struct AgentConfig {
pub debug_console: bool,
pub dev_mode: bool,
pub log_level: slog::Level,
@@ -70,9 +86,9 @@ macro_rules! parse_cmdline_param {
};
}
impl agentConfig {
pub fn new() -> agentConfig {
agentConfig {
impl AgentConfig {
pub fn new() -> AgentConfig {
AgentConfig {
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
@@ -95,6 +111,12 @@ impl agentConfig {
// parse cmdline options
parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
parse_cmdline_param!(
param,
SERVER_ADDR_OPTION,
self.server_addr,
get_string_value
);
// ensure the timeout is a positive value
parse_cmdline_param!(
@@ -102,7 +124,7 @@ impl agentConfig {
HOTPLUG_TIMOUT_OPTION,
self.hotplug_timeout,
get_hotplug_timeout,
|hotplugTimeout: time::Duration| hotplugTimeout.as_secs() > 0
|hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
);
// vsock port should be positive values
@@ -139,6 +161,12 @@ impl agentConfig {
self.server_addr = addr;
}
if let Ok(addr) = env::var(LOG_LEVEL_ENV_VAR) {
if let Ok(level) = logrus_to_slog_level(&addr) {
self.log_level = level;
}
}
Ok(())
}
}
@@ -172,7 +200,7 @@ fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
"trace" => slog::Level::Trace,
_ => {
return Err(anyhow!("invalid log level"));
return Err(anyhow!(ERR_INVALID_LOG_LEVEL));
}
};
@@ -183,11 +211,11 @@ fn get_log_level(param: &str) -> Result<slog::Level> {
let fields: Vec<&str> = param.split('=').collect();
if fields.len() != 2 {
return Err(anyhow!("invalid log level parameter"));
return Err(anyhow!(ERR_INVALID_LOG_LEVEL_PARAM));
}
if fields[0] != LOG_LEVEL_OPTION {
Err(anyhow!("invalid log level key name"))
Err(anyhow!(ERR_INVALID_LOG_LEVEL_KEY))
} else {
Ok(logrus_to_slog_level(fields[1])?)
}
@@ -197,17 +225,17 @@ fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
let fields: Vec<&str> = param.split('=').collect();
if fields.len() != 2 {
return Err(anyhow!("invalid hotplug timeout parameter"));
return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT));
}
let key = fields[0];
if key != HOTPLUG_TIMOUT_OPTION {
return Err(anyhow!("invalid hotplug timeout key name"));
return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_KEY));
}
let value = fields[1].parse::<u64>();
if value.is_err() {
return Err(anyhow!("unable to parse hotplug timeout"));
return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_PARAM));
}
Ok(time::Duration::from_secs(value.unwrap()))
@@ -225,32 +253,58 @@ fn get_bool_value(param: &str) -> Result<bool> {
// first try to parse as bool value
v.parse::<bool>().or_else(|_err1| {
// then try to parse as integer value
v.parse::<u64>()
.or_else(|_err2| Ok(0))
.map(|v| !matches!(v, 0))
v.parse::<u64>().or(Ok(0)).map(|v| !matches!(v, 0))
})
}
// Return the value from a "name=value" string.
//
// Note:
//
// - A name *and* a value is required.
// - A value can contain any number of equal signs.
// - We could/should maybe check if the name is pure whitespace
// since this is considered to be invalid.
fn get_string_value(param: &str) -> Result<String> {
let fields: Vec<&str> = param.split('=').collect();
if fields.len() < 2 {
return Err(anyhow!(ERR_INVALID_GET_VALUE_PARAM));
}
// We need name (but the value can be blank)
if fields[0] == "" {
return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_NAME));
}
let value = fields[1..].join("=");
if value == "" {
return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_VALUE));
}
Ok(value)
}
fn get_container_pipe_size(param: &str) -> Result<i32> {
let fields: Vec<&str> = param.split('=').collect();
if fields.len() != 2 {
return Err(anyhow!("invalid container pipe size parameter"));
return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE));
}
let key = fields[0];
if key != CONTAINER_PIPE_SIZE_OPTION {
return Err(anyhow!("invalid container pipe size key name"));
return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_KEY));
}
let res = fields[1].parse::<i32>();
if res.is_err() {
return Err(anyhow!("unable to parse container pipe size"));
return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM));
}
let value = res.unwrap();
if value < 0 {
return Err(anyhow!("container pipe size should not be negative"));
return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_NEGATIVE));
}
Ok(value)
@@ -265,19 +319,6 @@ mod tests {
use std::time;
use tempfile::tempdir;
const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
// helper function to make errors less crazy-long
fn make_err(desc: &str) -> Error {
anyhow!(desc.to_string())
@@ -296,19 +337,22 @@ mod tests {
assert!(*expected_level == actual_level, $msg);
} else {
let expected_error = $expected_result.as_ref().unwrap_err();
let actual_error = $actual_result.unwrap_err();
let expected_error_msg = format!("{:?}", expected_error);
let actual_error_msg = format!("{:?}", actual_error);
assert!(expected_error_msg == actual_error_msg, $msg);
if let Err(actual_error) = $actual_result {
let actual_error_msg = format!("{:?}", actual_error);
assert!(expected_error_msg == actual_error_msg, $msg);
} else {
assert!(expected_error_msg == "expected error, got OK", $msg);
}
}
};
}
#[test]
fn test_new() {
let config = agentConfig::new();
let config = AgentConfig::new();
assert_eq!(config.debug_console, false);
assert_eq!(config.dev_mode, false);
assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -317,297 +361,550 @@ mod tests {
#[test]
fn test_parse_cmdline() {
const TEST_SERVER_ADDR: &str = "vsock://-1:1024";
#[derive(Debug)]
struct TestData<'a> {
contents: &'a str,
env_vars: Vec<&'a str>,
debug_console: bool,
dev_mode: bool,
log_level: slog::Level,
hotplug_timeout: time::Duration,
container_pipe_size: i32,
server_addr: &'a str,
unified_cgroup_hierarchy: bool,
}
let tests = &[
TestData {
contents: "agent.debug_consolex agent.devmode",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.debug_console agent.devmodex",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.logx=debug",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.log=debug",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: slog::Level::Debug,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.log=debug",
env_vars: vec!["KATA_AGENT_LOG_LEVEL=trace"],
debug_console: false,
dev_mode: false,
log_level: slog::Level::Trace,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo bar",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo bar",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent bar",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo debug_console agent bar devmode",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.debug_console",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: " agent.debug_console ",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.debug_console foo",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: " agent.debug_console foo",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent.debug_console bar",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent.debug_console",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent.debug_console ",
env_vars: Vec::new(),
debug_console: true,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: " agent.devmode ",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode foo",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: " agent.devmode foo",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent.devmode bar",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent.devmode",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "foo agent.devmode ",
env_vars: Vec::new(),
debug_console: false,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode agent.debug_console",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode agent.debug_console agent.hotplug_timeout=100 agent.unified_cgroup_hierarchy=a",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: time::Duration::from_secs(100),
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode agent.debug_console agent.hotplug_timeout=0 agent.unified_cgroup_hierarchy=11",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: true,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pipe_size=2097152 agent.unified_cgroup_hierarchy=false",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: 2097152,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pipe_size=100 agent.unified_cgroup_hierarchy=true",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: 100,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: true,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pipe_size=0 agent.unified_cgroup_hierarchy=0",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pip_siz=100 agent.unified_cgroup_hierarchy=1",
env_vars: Vec::new(),
debug_console: true,
dev_mode: true,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: true,
},
TestData {
contents: "",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_SERVER_ADDR=foo"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "foo",
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_SERVER_ADDR=="],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "=",
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_SERVER_ADDR==foo"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "=foo",
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_SERVER_ADDR=foo=bar=baz="],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "foo=bar=baz=",
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_SERVER_ADDR=unix:///tmp/foo.socket"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_SERVER_ADDR=unix://@/tmp/foo.socket"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix://@/tmp/foo.socket",
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_LOG_LEVEL="],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_LOG_LEVEL=invalid"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_LOG_LEVEL=debug"],
debug_console: false,
dev_mode: false,
log_level: slog::Level::Debug,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_LOG_LEVEL=debugger"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "server_addr=unix:///tmp/foo.socket",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.server_address=unix:///tmp/foo.socket",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
},
TestData {
contents: "agent.server_addr=unix:///tmp/foo.socket",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
},
TestData {
contents: " agent.server_addr=unix:///tmp/foo.socket",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
},
TestData {
contents: " agent.server_addr=unix:///tmp/foo.socket a",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
},
];
let dir = tempdir().expect("failed to create tmpdir");
@@ -617,11 +914,12 @@ mod tests {
let filename = file_path.to_str().expect("failed to create filename");
let mut config = agentConfig::new();
let mut config = AgentConfig::new();
let result = config.parse_cmdline(&filename.to_owned());
assert!(result.is_err());
// Now, test various combinations of file contents
// Now, test various combinations of file contents and environment
// variables.
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
@@ -635,7 +933,20 @@ mod tests {
file.write_all(d.contents.as_bytes())
.unwrap_or_else(|_| panic!("{}: failed to write file contents", msg));
let mut config = agentConfig::new();
let mut vars_to_unset = Vec::new();
for v in &d.env_vars {
let fields: Vec<&str> = v.split('=').collect();
let name = fields[0];
let value = fields[1..].join("=");
env::set_var(name, value);
vars_to_unset.push(name);
}
let mut config = AgentConfig::new();
assert_eq!(config.debug_console, false, "{}", msg);
assert_eq!(config.dev_mode, false, "{}", msg);
assert_eq!(config.unified_cgroup_hierarchy, false, "{}", msg);
@@ -646,6 +957,7 @@ mod tests {
msg
);
assert_eq!(config.container_pipe_size, 0, "{}", msg);
assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
let result = config.parse_cmdline(filename);
assert!(result.is_ok(), "{}", msg);
@@ -660,6 +972,11 @@ mod tests {
assert_eq!(d.log_level, config.log_level, "{}", msg);
assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{}", msg);
assert_eq!(d.container_pipe_size, config.container_pipe_size, "{}", msg);
assert_eq!(d.server_addr, config.server_addr, "{}", msg);
for v in vars_to_unset {
env::remove_var(v);
}
}
}
@@ -974,4 +1291,82 @@ mod tests {
assert_result!(d.result, result, msg);
}
}
#[test]
fn test_get_string_value() {
#[derive(Debug)]
struct TestData<'a> {
param: &'a str,
result: Result<String>,
}
let tests = &[
TestData {
param: "",
result: Err(make_err(ERR_INVALID_GET_VALUE_PARAM)),
},
TestData {
param: "=",
result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
},
TestData {
param: "==",
result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
},
TestData {
param: "x=",
result: Err(make_err(ERR_INVALID_GET_VALUE_NO_VALUE)),
},
TestData {
param: "x==",
result: Ok("=".into()),
},
TestData {
param: "x===",
result: Ok("==".into()),
},
TestData {
param: "x==x",
result: Ok("=x".into()),
},
TestData {
param: "x=x",
result: Ok("x".into()),
},
TestData {
param: "x=x=",
result: Ok("x=".into()),
},
TestData {
param: "x=x=x",
result: Ok("x=x".into()),
},
TestData {
param: "foo=bar",
result: Ok("bar".into()),
},
TestData {
param: "x= =",
result: Ok(" =".into()),
},
TestData {
param: "x= =",
result: Ok(" =".into()),
},
TestData {
param: "x= = ",
result: Ok(" = ".into()),
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = get_string_value(d.param);
let msg = format!("{}: result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
}
}

View File

@@ -9,10 +9,13 @@ use std::collections::HashMap;
use std::fs;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use std::sync::{mpsc, Arc, Mutex};
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::Mutex;
use crate::linux_abi::*;
use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE};
use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
use crate::pci;
use crate::sandbox::Sandbox;
use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER};
use anyhow::{anyhow, Result};
@@ -35,22 +38,6 @@ struct DevIndexEntry {
struct DevIndex(HashMap<String, DevIndexEntry>);
// DeviceHandler is the type of callback to be defined to handle every type of device driver.
type DeviceHandler = fn(&Device, &mut Spec, &Arc<Mutex<Sandbox>>, &DevIndex) -> Result<()>;
// DEVICEHANDLERLIST lists the supported drivers.
#[rustfmt::skip]
lazy_static! {
static ref DEVICEHANDLERLIST: HashMap<&'static str, DeviceHandler> = {
let mut m: HashMap<&'static str, DeviceHandler> = HashMap::new();
m.insert(DRIVERBLKTYPE, virtio_blk_device_handler);
m.insert(DRIVERMMIOBLKTYPE, virtiommio_blk_device_handler);
m.insert(DRIVERNVDIMMTYPE, virtio_nvdimm_device_handler);
m.insert(DRIVERSCSITYPE, virtio_scsi_device_handler);
m
};
}
pub fn rescan_pci_bus() -> Result<()> {
online_device(SYSFS_PCI_BUS_RESCAN_FILE)
}
@@ -60,64 +47,50 @@ pub fn online_device(path: &str) -> Result<()> {
Ok(())
}
// get_pci_device_address fetches the complete PCI address in sysfs, based on the PCI
// identifier provided. This should be in the format: "bridgeAddr/deviceAddr".
// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
// while deviceAddr is the address at which the device is attached on the bridge.
fn get_pci_device_address(pci_id: &str) -> Result<String> {
let tokens: Vec<&str> = pci_id.split('/').collect();
// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
// the sysfs path for the PCI host bridge, based on the PCI path
// provided.
fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
let mut bus = "0000:00".to_string();
let mut relpath = String::new();
if tokens.len() != 2 {
return Err(anyhow!(
"PCI Identifier for device should be of format [bridgeAddr/deviceAddr], got {}",
pci_id
));
for i in 0..pcipath.len() {
let bdf = format!("{}:{}.0", bus, pcipath[i]);
relpath = format!("{}/{}", relpath, bdf);
if i == pcipath.len() - 1 {
// Final device need not be a bridge
break;
}
// Find out the bus exposed by bridge
let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath);
let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect();
if files.len() != 1 {
return Err(anyhow!(
"Expected exactly one PCI bus in {}, got {} instead",
bridgebuspath,
files.len()
));
}
// unwrap is safe, because of the length test above
let busfile = files.pop().unwrap()?;
bus = busfile
.file_name()
.into_string()
.map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?;
}
let bridge_id = tokens[0];
let device_id = tokens[1];
// Deduce the complete bridge address based on the bridge address identifier passed
// and the fact that bridges are attached on the main bus with function 0.
let pci_bridge_addr = format!("0000:00:{}.0", bridge_id);
// Find out the bus exposed by bridge
let bridge_bus_path = format!("{}/{}/pci_bus/", SYSFS_PCI_BUS_PREFIX, pci_bridge_addr);
let files_slice: Vec<_> = fs::read_dir(&bridge_bus_path)
.unwrap()
.map(|res| res.unwrap().path())
.collect();
let bus_num = files_slice.len();
if bus_num != 1 {
return Err(anyhow!(
"Expected an entry for bus in {}, got {} entries instead",
bridge_bus_path,
bus_num
));
}
let bus = files_slice[0].file_name().unwrap().to_str().unwrap();
// Device address is based on the bus of the bridge to which it is attached.
// We do not pass devices as multifunction, hence the trailing 0 in the address.
let pci_device_addr = format!("{}:{}.0", bus, device_id);
let bridge_device_pci_addr = format!("{}/{}", pci_bridge_addr, pci_device_addr);
info!(
sl!(),
"Fetched PCI address for device PCIAddr:{}\n", bridge_device_pci_addr
);
Ok(bridge_device_pci_addr)
Ok(relpath)
}
fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
async fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
// Keep the same lock order as uevent::handle_block_add_event(), otherwise it may cause deadlock.
let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
let sb = sandbox.lock().unwrap();
let mut w = GLOBAL_DEVICE_WATCHER.lock().await;
let sb = sandbox.lock().await;
for (key, value) in sb.pci_device_map.iter() {
if key.contains(dev_addr) {
info!(sl!(), "Device {} found in pci device map", dev_addr);
@@ -131,36 +104,58 @@ fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<Stri
// The key of the watchers map is the device we are interested in.
// Note this is done inside the lock, not to miss any events from the
// global udev listener.
let (tx, rx) = mpsc::channel::<String>();
w.insert(dev_addr.to_string(), tx);
let (tx, rx) = tokio::sync::oneshot::channel::<String>();
w.insert(dev_addr.to_string(), Some(tx));
drop(w);
info!(sl!(), "Waiting on channel for device notification\n");
let hotplug_timeout = AGENT_CONFIG.read().unwrap().hotplug_timeout;
let dev_name = rx.recv_timeout(hotplug_timeout).map_err(|_| {
GLOBAL_DEVICE_WATCHER.lock().unwrap().remove_entry(dev_addr);
anyhow!(
"Timeout reached after {:?} waiting for device {}",
hotplug_timeout,
dev_addr
)
})?;
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
let dev_name = match tokio::time::timeout(hotplug_timeout, rx).await {
Ok(v) => v?,
Err(_) => {
let watcher = GLOBAL_DEVICE_WATCHER.clone();
let mut w = watcher.lock().await;
w.remove_entry(dev_addr);
return Err(anyhow!(
"Timeout reached after {:?} waiting for device {}",
hotplug_timeout,
dev_addr
));
}
};
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
}
pub fn get_scsi_device_name(sandbox: &Arc<Mutex<Sandbox>>, scsi_addr: &str) -> Result<String> {
pub async fn get_scsi_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
scsi_addr: &str,
) -> Result<String> {
let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX);
scan_scsi_bus(scsi_addr)?;
get_device_name(sandbox, &dev_sub_path)
get_device_name(sandbox, &dev_sub_path).await
}
pub fn get_pci_device_name(sandbox: &Arc<Mutex<Sandbox>>, pci_id: &str) -> Result<String> {
let pci_addr = get_pci_device_address(pci_id)?;
pub async fn get_pci_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
pcipath: &pci::Path,
) -> Result<String> {
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
rescan_pci_bus()?;
get_device_name(sandbox, &pci_addr)
get_device_name(sandbox, &sysfs_rel_path).await
}
pub async fn get_pmem_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
pmem_devname: &str,
) -> Result<String> {
let dev_sub_path = format!("/{}/{}", SCSI_BLOCK_SUFFIX, pmem_devname);
get_device_name(sandbox, &dev_sub_path).await
}
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
@@ -274,7 +269,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)
// device.Id should be the predicted device name (vda, vdb, ...)
// device.VmPath already provides a way to send it in
fn virtiommio_blk_device_handler(
async fn virtiommio_blk_device_handler(
device: &Device,
spec: &mut Spec,
_sandbox: &Arc<Mutex<Sandbox>>,
@@ -287,10 +282,8 @@ fn virtiommio_blk_device_handler(
update_spec_device_list(device, spec, devidx)
}
// device.Id should be the PCI address in the format "bridgeAddr/deviceAddr".
// Here, bridgeAddr is the address at which the brige is attached on the root bus,
// while deviceAddr is the address at which the device is attached on the bridge.
fn virtio_blk_device_handler(
// device.Id should be a PCI path string
async fn virtio_blk_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
@@ -298,28 +291,30 @@ fn virtio_blk_device_handler(
) -> Result<()> {
let mut dev = device.clone();
// When "Id (PCIAddr)" is not set, we allow to use the predicted "VmPath" passed from kata-runtime
// Note this is a special code path for cloud-hypervisor when BDF information is not available
// When "Id (PCI path)" is not set, we allow to use the predicted
// "VmPath" passed from kata-runtime Note this is a special code
// path for cloud-hypervisor when BDF information is not available
if !device.id.is_empty() {
dev.vm_path = get_pci_device_name(sandbox, &device.id)?;
let pcipath = pci::Path::from_str(&device.id)?;
dev.vm_path = get_pci_device_name(sandbox, &pcipath).await?;
}
update_spec_device_list(&dev, spec, devidx)
}
// device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
fn virtio_scsi_device_handler(
async fn virtio_scsi_device_handler(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
dev.vm_path = get_scsi_device_name(sandbox, &device.id)?;
dev.vm_path = get_scsi_device_name(sandbox, &device.id).await?;
update_spec_device_list(&dev, spec, devidx)
}
fn virtio_nvdimm_device_handler(
async fn virtio_nvdimm_device_handler(
device: &Device,
spec: &mut Spec,
_sandbox: &Arc<Mutex<Sandbox>>,
@@ -357,7 +352,7 @@ impl DevIndex {
}
}
pub fn add_devices(
pub async fn add_devices(
devices: &[Device],
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
@@ -365,13 +360,13 @@ pub fn add_devices(
let devidx = DevIndex::new(spec);
for device in devices.iter() {
add_device(device, spec, sandbox, &devidx)?;
add_device(device, spec, sandbox, &devidx).await?;
}
Ok(())
}
fn add_device(
async fn add_device(
device: &Device,
spec: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
@@ -393,9 +388,12 @@ fn add_device(
return Err(anyhow!("invalid container path for device {:?}", device));
}
match DEVICEHANDLERLIST.get(device.field_type.as_str()) {
None => Err(anyhow!("Unknown device type {}", device.field_type)),
Some(dev_handler) => dev_handler(device, spec, sandbox, devidx),
match device.field_type.as_str() {
DRIVER_BLK_TYPE => virtio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
}
}
@@ -433,6 +431,7 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
mod tests {
use super::*;
use oci::Linux;
use tempfile::tempdir;
#[test]
fn test_update_device_cgroup() {
@@ -713,4 +712,68 @@ mod tests {
assert_eq!(Some(host_major), specresources.devices[1].major);
assert_eq!(Some(host_minor), specresources.devices[1].minor);
}
#[test]
fn test_pcipath_to_sysfs() {
let testdir = tempdir().expect("failed to create tmpdir");
let rootbuspath = testdir.path().to_str().unwrap();
let path2 = pci::Path::from_str("02").unwrap();
let path23 = pci::Path::from_str("02/03").unwrap();
let path234 = pci::Path::from_str("02/03/04").unwrap();
let relpath = pcipath_to_sysfs(rootbuspath, &path2);
assert_eq!(relpath.unwrap(), "/0000:00:02.0");
let relpath = pcipath_to_sysfs(rootbuspath, &path23);
assert!(relpath.is_err());
let relpath = pcipath_to_sysfs(rootbuspath, &path234);
assert!(relpath.is_err());
// Create mock sysfs files for the device at 0000:00:02.0
let bridge2path = format!("{}{}", rootbuspath, "/0000:00:02.0");
fs::create_dir_all(&bridge2path).unwrap();
let relpath = pcipath_to_sysfs(rootbuspath, &path2);
assert_eq!(relpath.unwrap(), "/0000:00:02.0");
let relpath = pcipath_to_sysfs(rootbuspath, &path23);
assert!(relpath.is_err());
let relpath = pcipath_to_sysfs(rootbuspath, &path234);
assert!(relpath.is_err());
// Create mock sysfs files to indicate that 0000:00:02.0 is a bridge to bus 01
let bridge2bus = "0000:01";
let bus2path = format!("{}/pci_bus/{}", bridge2path, bridge2bus);
fs::create_dir_all(bus2path).unwrap();
let relpath = pcipath_to_sysfs(rootbuspath, &path2);
assert_eq!(relpath.unwrap(), "/0000:00:02.0");
let relpath = pcipath_to_sysfs(rootbuspath, &path23);
assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
let relpath = pcipath_to_sysfs(rootbuspath, &path234);
assert!(relpath.is_err());
// Create mock sysfs files for a bridge at 0000:01:03.0 to bus 02
let bridge3path = format!("{}/0000:01:03.0", bridge2path);
let bridge3bus = "0000:02";
let bus3path = format!("{}/pci_bus/{}", bridge3path, bridge3bus);
fs::create_dir_all(bus3path).unwrap();
let relpath = pcipath_to_sysfs(rootbuspath, &path2);
assert_eq!(relpath.unwrap(), "/0000:00:02.0");
let relpath = pcipath_to_sysfs(rootbuspath, &path23);
assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
let relpath = pcipath_to_sysfs(rootbuspath, &path234);
assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0");
}
}

View File

@@ -5,9 +5,10 @@
/// Linux ABI related constants.
pub const SYSFS_DIR: &str = "/sys";
#[cfg(target_arch = "aarch64")]
use std::fs;
pub const SYSFS_PCI_BUS_PREFIX: &str = "/sys/bus/pci/devices";
pub const SYSFS_DIR: &str = "/sys";
pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
#[cfg(any(
target_arch = "powerpc64",
@@ -15,9 +16,61 @@ pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
target_arch = "x86_64",
target_arch = "x86"
))]
pub const PCI_ROOT_BUS_PATH: &str = "/devices/pci0000:00";
pub fn create_pci_root_bus_path() -> String {
String::from("/devices/pci0000:00")
}
#[cfg(target_arch = "aarch64")]
pub const PCI_ROOT_BUS_PATH: &str = "/devices/platform/4010000000.pcie/pci0000:00";
pub fn create_pci_root_bus_path() -> String {
let ret = String::from("/devices/platform/4010000000.pcie/pci0000:00");
let acpi_root_bus_path = String::from("/devices/pci0000:00");
let mut acpi_sysfs_dir = String::from(SYSFS_DIR);
let mut sysfs_dir = String::from(SYSFS_DIR);
let mut start_root_bus_path = String::from("/devices/platform/");
let end_root_bus_path = String::from("/pci0000:00");
// check if there is pci bus path for acpi
acpi_sysfs_dir.push_str(&acpi_root_bus_path);
if let Ok(_) = fs::metadata(&acpi_sysfs_dir) {
return acpi_root_bus_path;
}
sysfs_dir.push_str(&start_root_bus_path);
let entries = match fs::read_dir(sysfs_dir) {
Ok(e) => e,
Err(_) => return ret,
};
for entry in entries {
let pathname = match entry {
Ok(p) => p.path(),
Err(_) => return ret,
};
let dir_name = match pathname.file_name() {
Some(p) => p.to_str(),
None => return ret,
};
let dir_name = match dir_name {
Some(p) => p,
None => return ret,
};
let dir_name = String::from(dir_name);
if dir_name.ends_with(".pcie") {
start_root_bus_path.push_str(&dir_name);
start_root_bus_path.push_str(&end_root_bus_path);
return start_root_bus_path;
}
}
ret
}
// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
// The Linux kernel's core ACPI subsystem creates struct acpi_device
// objects for ACPI namespace objects representing devices, power resources
// processors, thermal zones. Those objects are exported to user space via
// sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00
pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM";
pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu";

View File

@@ -3,11 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_unsafe)]
#![allow(dead_code)]
#![allow(non_snake_case)]
#[macro_use]
extern crate lazy_static;
extern crate oci;
@@ -15,19 +10,15 @@ extern crate prctl;
extern crate prometheus;
extern crate protocols;
extern crate regex;
extern crate rustjail;
extern crate scan_fmt;
extern crate serde_json;
extern crate signal_hook;
#[macro_use]
extern crate scopeguard;
#[macro_use]
extern crate slog;
extern crate netlink;
use crate::netlink::{RtnlHandle, NETLINK_ROUTE};
use anyhow::{anyhow, Context, Result};
use nix::fcntl::{self, OFlag};
use nix::fcntl::{FcntlArg, FdFlag};
@@ -35,10 +26,8 @@ use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
use nix::pty;
use nix::sys::select::{select, FdSet};
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
use nix::sys::wait::{self, WaitStatus};
use nix::sys::wait;
use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
use prctl::set_child_subreaper;
use signal_hook::{iterator::Signals, SIGCHLD};
use std::collections::HashMap;
use std::env;
use std::ffi::{CStr, CString, OsStr};
@@ -48,9 +37,7 @@ use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs as unixfs;
use std::os::unix::io::AsRawFd;
use std::path::Path;
use std::sync::mpsc::{self, Sender};
use std::sync::{Arc, Mutex, RwLock};
use std::{io, thread, thread::JoinHandle};
use std::sync::Arc;
use unistd::Pid;
mod config;
@@ -59,19 +46,40 @@ mod linux_abi;
mod metrics;
mod mount;
mod namespace;
mod netlink;
mod network;
mod pci;
pub mod random;
mod sandbox;
mod signal;
#[cfg(test)]
mod test_utils;
mod uevent;
mod util;
mod version;
use mount::{cgroups_mount, general_mount};
use sandbox::Sandbox;
use signal::setup_signal_handler;
use slog::Logger;
use uevent::watch_uevents;
use std::sync::Mutex as SyncMutex;
use futures::future::join_all;
use futures::StreamExt as _;
use rustjail::pipestream::PipeStream;
use tokio::{
io::AsyncWrite,
sync::{
oneshot::Sender,
watch::{channel, Receiver},
Mutex, RwLock,
},
task::JoinHandle,
};
use tokio_vsock::{Incoming, VsockListener, VsockStream};
mod rpc;
const NAME: &str = "kata-agent";
@@ -81,13 +89,13 @@ const CONSOLE_PATH: &str = "/dev/console";
const DEFAULT_BUF_SIZE: usize = 8 * 1024;
lazy_static! {
static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Sender<String>>>> =
static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Option<Sender<String>>>>> =
Arc::new(Mutex::new(HashMap::new()));
static ref AGENT_CONFIG: Arc<RwLock<agentConfig>> =
Arc::new(RwLock::new(config::agentConfig::new()));
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
Arc::new(RwLock::new(config::AgentConfig::new()));
}
fn announce(logger: &Logger, config: &agentConfig) {
fn announce(logger: &Logger, config: &AgentConfig) {
info!(logger, "announce";
"agent-commit" => version::VERSION_COMMIT,
@@ -100,7 +108,168 @@ fn announce(logger: &Logger, config: &agentConfig) {
);
}
fn main() -> Result<()> {
fn set_fd_close_exec(fd: RawFd) -> Result<RawFd> {
if let Err(e) = fcntl::fcntl(fd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)) {
return Err(anyhow!("failed to set fd: {} as close-on-exec: {}", fd, e));
}
Ok(fd)
}
fn get_vsock_incoming(fd: RawFd) -> Incoming {
let incoming;
unsafe {
incoming = VsockListener::from_raw_fd(fd).incoming();
}
incoming
}
async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
let stream = get_vsock_incoming(fd).next().await.unwrap().unwrap();
set_fd_close_exec(stream.as_raw_fd())?;
Ok(stream)
}
// Create a thread to handle reading from the logger pipe. The thread will
// output to the vsock port specified, or stdout.
async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool>) -> Result<()> {
let mut reader = PipeStream::from_fd(rfd);
let mut writer: Box<dyn AsyncWrite + Unpin + Send>;
if vsock_port > 0 {
let listenfd = socket::socket(
AddressFamily::Vsock,
SockType::Stream,
SockFlag::SOCK_CLOEXEC,
None,
)?;
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, vsock_port);
socket::bind(listenfd, &addr).unwrap();
socket::listen(listenfd, 1).unwrap();
writer = Box::new(get_vsock_stream(listenfd).await.unwrap());
} else {
writer = Box::new(tokio::io::stdout());
}
let _ = util::interruptable_io_copier(&mut reader, &mut writer, shutdown).await;
Ok(())
}
async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
env::set_var("RUST_BACKTRACE", "full");
// List of tasks that need to be stopped for a clean shutdown
let mut tasks: Vec<JoinHandle<Result<()>>> = vec![];
lazy_static::initialize(&SHELLS);
lazy_static::initialize(&AGENT_CONFIG);
// support vsock log
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let (shutdown_tx, shutdown_rx) = channel(true);
let agent_config = AGENT_CONFIG.clone();
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
// dup a new file descriptor for this temporary logger writer,
// since this logger would be dropped and it's writer would
// be closed out of this code block.
let newwfd = dup(wfd)?;
let writer = unsafe { File::from_raw_fd(newwfd) };
// Init a temporary logger used by init agent as init process
// since before do the base mount, it wouldn't access "/proc/cmdline"
// to get the customzied debug level.
let (logger, logger_async_guard) =
logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
// Must mount proc fs before parsing kernel command line
general_mount(&logger).map_err(|e| {
error!(logger, "fail general mount: {}", e);
e
})?;
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
drop(logger_async_guard);
} else {
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
}
let config = agent_config.read().await;
let log_vport = config.log_vport as u32;
let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
tasks.push(log_handle);
let writer = unsafe { File::from_raw_fd(wfd) };
// Recreate a logger with the log level get from "/proc/cmdline".
let (logger, logger_async_guard) =
logging::create_logger(NAME, "agent", config.log_level, writer);
announce(&logger, &config);
// This variable is required as it enables the global (and crucially static) logger,
// which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
let global_logger = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
// Allow the global logger to be modified later (for shutdown)
global_logger.cancel_reset();
let mut ttrpc_log_guard: Result<(), log::SetLoggerError> = Ok(());
if config.log_level == slog::Level::Trace {
// Redirect ttrpc log calls to slog iff full debug requested
ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
}
// Start the sandbox and wait for its ttRPC server to end
start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
// Install a NOP logger for the remainder of the shutdown sequence
// to ensure any log calls made by local crates using the scope logger
// don't fail.
let global_logger_guard2 =
slog_scope::set_global_logger(slog::Logger::root(slog::Discard, o!()));
global_logger_guard2.cancel_reset();
drop(logger_async_guard);
drop(ttrpc_log_guard);
// Trigger a controlled shutdown
shutdown_tx
.send(true)
.map_err(|e| anyhow!(e).context("failed to request shutdown"))?;
// Wait for all threads to finish
let results = join_all(tasks).await;
for result in results {
if let Err(e) = result {
return Err(anyhow!(e).into());
}
}
eprintln!("{} shutdown complete", NAME);
Ok(())
}
fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let args: Vec<String> = env::args().collect();
if args.len() == 2 && args[1] == "--version" {
@@ -120,228 +289,78 @@ fn main() -> Result<()> {
exit(0);
}
env::set_var("RUST_BACKTRACE", "full");
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?;
lazy_static::initialize(&SHELLS);
lazy_static::initialize(&AGENT_CONFIG);
// support vsock log
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let agentConfig = AGENT_CONFIG.clone();
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
// dup a new file descriptor for this temporary logger writer,
// since this logger would be dropped and it's writer would
// be closed out of this code block.
let newwfd = dup(wfd)?;
let writer = unsafe { File::from_raw_fd(newwfd) };
// Init a temporary logger used by init agent as init process
// since before do the base mount, it wouldn't access "/proc/cmdline"
// to get the customzied debug level.
let logger = logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
// Must mount proc fs before parsing kernel command line
general_mount(&logger).map_err(|e| {
error!(logger, "fail general mount: {}", e);
e
})?;
let mut config = agentConfig.write().unwrap();
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
} else {
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
let mut config = agentConfig.write().unwrap();
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
}
let config = agentConfig.read().unwrap();
let log_vport = config.log_vport as u32;
let log_handle = thread::spawn(move || -> Result<()> {
let mut reader = unsafe { File::from_raw_fd(rfd) };
if log_vport > 0 {
let listenfd = socket::socket(
AddressFamily::Vsock,
SockType::Stream,
SockFlag::SOCK_CLOEXEC,
None,
)?;
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, log_vport);
socket::bind(listenfd, &addr)?;
socket::listen(listenfd, 1)?;
let datafd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
let mut log_writer = unsafe { File::from_raw_fd(datafd) };
let _ = io::copy(&mut reader, &mut log_writer)?;
let _ = unistd::close(listenfd);
let _ = unistd::close(datafd);
}
// copy log to stdout
let mut stdout_writer = io::stdout();
let _ = io::copy(&mut reader, &mut stdout_writer)?;
Ok(())
});
let writer = unsafe { File::from_raw_fd(wfd) };
// Recreate a logger with the log level get from "/proc/cmdline".
let logger = logging::create_logger(NAME, "agent", config.log_level, writer);
announce(&logger, &config);
// This "unused" variable is required as it enables the global (and crucially static) logger,
// which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
let _guard = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
start_sandbox(&logger, &config, init_mode)?;
let _ = log_handle.join();
Ok(())
rt.block_on(real_main())
}
fn start_sandbox(logger: &Logger, config: &agentConfig, init_mode: bool) -> Result<()> {
async fn start_sandbox(
logger: &Logger,
config: &AgentConfig,
init_mode: bool,
tasks: &mut Vec<JoinHandle<Result<()>>>,
shutdown: Receiver<bool>,
) -> Result<()> {
let shells = SHELLS.clone();
let debug_console_vport = config.debug_console_vport as u32;
let mut shell_handle: Option<JoinHandle<()>> = None;
if config.debug_console {
let shell_handle = if config.debug_console {
let thread_logger = logger.clone();
let shells = shells.lock().unwrap().to_vec();
let builder = thread::Builder::new();
let handle = builder.spawn(move || {
let shells = shells.lock().unwrap();
let result = setup_debug_console(&thread_logger, shells.to_vec(), debug_console_vport);
let handle = tokio::task::spawn_blocking(move || {
let result = setup_debug_console(&thread_logger, shells, debug_console_vport);
if result.is_err() {
// Report error, but don't fail
warn!(thread_logger, "failed to setup debug console";
"error" => format!("{}", result.unwrap_err()));
}
})?;
});
shell_handle = Some(handle);
}
Some(handle)
} else {
None
};
// Initialize unique sandbox structure.
let mut s = Sandbox::new(&logger).context("Failed to create sandbox")?;
let s = Sandbox::new(&logger).context("Failed to create sandbox")?;
if init_mode {
let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
rtnl.handle_localhost()?;
s.rtnl = Some(rtnl);
s.rtnl.handle_localhost().await?;
}
let sandbox = Arc::new(Mutex::new(s));
setup_signal_handler(&logger, sandbox.clone()).unwrap();
watch_uevents(sandbox.clone());
let signal_handler_task = tokio::spawn(setup_signal_handler(
logger.clone(),
sandbox.clone(),
shutdown.clone(),
));
let (tx, rx) = mpsc::channel::<i32>();
sandbox.lock().unwrap().sender = Some(tx);
tasks.push(signal_handler_task);
let uevents_handler_task = tokio::spawn(watch_uevents(sandbox.clone(), shutdown.clone()));
tasks.push(uevents_handler_task);
let (tx, rx) = tokio::sync::oneshot::channel();
sandbox.lock().await.sender = Some(tx);
// vsock:///dev/vsock, port
let mut server = rpc::start(sandbox, config.server_addr.as_str());
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
server.start().await?;
let _ = server.start().unwrap();
let _ = rx.recv()?;
server.shutdown();
let _ = rx.await?;
server.shutdown().await?;
if let Some(handle) = shell_handle {
handle.join().map_err(|e| anyhow!("{:?}", e))?;
handle.await.map_err(|e| anyhow!("{:?}", e))?;
}
Ok(())
}
use nix::sys::wait::WaitPidFlag;
fn setup_signal_handler(logger: &Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
let logger = logger.new(o!("subsystem" => "signals"));
set_child_subreaper(true)
.map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
let signals = Signals::new(&[SIGCHLD])?;
thread::spawn(move || {
'outer: for sig in signals.forever() {
info!(logger, "received signal"; "signal" => sig);
// sevral signals can be combined together
// as one. So loop around to reap all
// exited children
'inner: loop {
let wait_status = match wait::waitpid(
Some(Pid::from_raw(-1)),
Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
) {
Ok(s) => {
if s == WaitStatus::StillAlive {
continue 'outer;
}
s
}
Err(e) => {
info!(
logger,
"waitpid reaper failed";
"error" => e.as_errno().unwrap().desc()
);
continue 'outer;
}
};
let pid = wait_status.pid();
if let Some(pid) = pid {
let raw_pid = pid.as_raw();
let child_pid = format!("{}", raw_pid);
let logger = logger.new(o!("child-pid" => child_pid));
let mut sandbox = sandbox.lock().unwrap();
let process = sandbox.find_process(raw_pid);
if process.is_none() {
info!(logger, "child exited unexpectedly");
continue 'inner;
}
let mut p = process.unwrap();
if p.exit_pipe_w.is_none() {
error!(logger, "the process's exit_pipe_w isn't set");
continue 'inner;
}
let pipe_write = p.exit_pipe_w.unwrap();
let ret: i32;
match wait_status {
WaitStatus::Exited(_, c) => ret = c,
WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
_ => {
info!(logger, "got wrong status for process";
"child-status" => format!("{:?}", wait_status));
continue 'inner;
}
}
p.exit_code = ret;
let _ = unistd::close(pipe_write);
}
}
}
});
Ok(())
}
// init_agent_as_init will do the initializations such as setting up the rootfs
// when this agent has been run as the init process.
fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
@@ -359,7 +378,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
unistd::setsid()?;
unsafe {
libc::ioctl(io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
libc::ioctl(std::io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
}
env::set_var("PATH", "/bin:/sbin/:/usr/bin/:/usr/sbin/");
@@ -390,7 +409,7 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
}
lazy_static! {
static ref SHELLS: Arc<Mutex<Vec<String>>> = {
static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
let mut v = Vec::new();
if !cfg!(test) {
@@ -398,32 +417,21 @@ lazy_static! {
v.push("/bin/sh".to_string());
}
Arc::new(Mutex::new(v))
Arc::new(SyncMutex::new(v))
};
}
// pub static mut LOG_LEVEL: ;
// pub static mut TRACE_MODE: ;
use crate::config::agentConfig;
use crate::config::AgentConfig;
use nix::sys::stat::Mode;
use std::os::unix::io::{FromRawFd, RawFd};
use std::path::PathBuf;
use std::process::exit;
fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
let mut shell: &str = "";
for sh in shells.iter() {
let binary = PathBuf::from(sh);
if binary.exists() {
shell = sh;
break;
}
}
if shell == "" {
return Err(anyhow!("no shell found to launch debug console"));
}
let shell = shells
.iter()
.find(|sh| PathBuf::from(sh).exists())
.ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
if port > 0 {
let listenfd = socket::socket(
@@ -464,7 +472,7 @@ fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Resul
};
}
fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> std::io::Result<u64>
where
R: Read,
W: Write,
@@ -527,10 +535,10 @@ fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Re
let debug_shell_logger = logger.clone();
// channel that used to sync between thread and main process
let (tx, rx) = mpsc::channel::<i32>();
let (tx, rx) = std::sync::mpsc::channel::<i32>();
// start a thread to do IO copy between socket and pseduo.master
thread::spawn(move || {
std::thread::spawn(move || {
let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };

View File

@@ -187,9 +187,9 @@ fn update_guest_metrics() {
info!(sl!(), "failed to get guest KernelStats: {:?}", err);
}
Ok(kernel_stats) => {
set_gauge_vec_CPU_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
set_gauge_vec_cpu_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
for (i, cpu_time) in kernel_stats.cpu_time.iter().enumerate() {
set_gauge_vec_CPU_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
set_gauge_vec_cpu_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
}
}
}
@@ -332,7 +332,7 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
.set(meminfo.k_reclaimable.unwrap_or(0) as f64);
}
fn set_gauge_vec_CPU_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
gv.with_label_values(&[cpu, "user"])
.set(cpu_time.user as f64);
gv.with_label_values(&[cpu, "nice"])

View File

@@ -11,7 +11,9 @@ use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::ptr::null;
use std::sync::{Arc, Mutex};
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::Mutex;
use libc::{c_void, mount};
use nix::mount::{self, MsFlags};
@@ -20,23 +22,28 @@ use regex::Regex;
use std::fs::File;
use std::io::{BufRead, BufReader};
use crate::device::{get_pci_device_name, get_scsi_device_name, online_device};
use crate::device::{
get_pci_device_name, get_pmem_device_name, get_scsi_device_name, online_device,
};
use crate::linux_abi::*;
use crate::pci;
use crate::protocols::agent::Storage;
use crate::Sandbox;
use anyhow::{anyhow, Context, Result};
use slog::Logger;
pub const DRIVER9PTYPE: &str = "9p";
pub const DRIVERVIRTIOFSTYPE: &str = "virtio-fs";
pub const DRIVERBLKTYPE: &str = "blk";
pub const DRIVERMMIOBLKTYPE: &str = "mmioblk";
pub const DRIVERSCSITYPE: &str = "scsi";
pub const DRIVERNVDIMMTYPE: &str = "nvdimm";
pub const DRIVEREPHEMERALTYPE: &str = "ephemeral";
pub const DRIVERLOCALTYPE: &str = "local";
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const TYPEROOTFS: &str = "rootfs";
pub const TYPE_ROOTFS: &str = "rootfs";
pub const MOUNT_GUEST_TAG: &str = "kataShared";
#[rustfmt::skip]
lazy_static! {
@@ -80,7 +87,7 @@ lazy_static! {
}
#[derive(Debug, PartialEq)]
pub struct INIT_MOUNT {
pub struct InitMount {
fstype: &'static str,
src: &'static str,
dest: &'static str,
@@ -110,42 +117,26 @@ lazy_static!{
#[rustfmt::skip]
lazy_static! {
pub static ref INIT_ROOTFS_MOUNTS: Vec<INIT_MOUNT> = vec![
INIT_MOUNT{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
INIT_MOUNT{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
INIT_MOUNT{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
INIT_MOUNT{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount> = vec![
InitMount{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
InitMount{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
InitMount{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
InitMount{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
];
}
// StorageHandler is the type of callback to be defined to handle every
// type of storage driver.
type StorageHandler = fn(&Logger, &Storage, Arc<Mutex<Sandbox>>) -> Result<String>;
// STORAGEHANDLERLIST lists the supported drivers.
#[rustfmt::skip]
lazy_static! {
pub static ref STORAGEHANDLERLIST: HashMap<&'static str, StorageHandler> = {
let mut m = HashMap::new();
let blk: StorageHandler = virtio_blk_storage_handler;
m.insert(DRIVERBLKTYPE, blk);
let p9: StorageHandler= virtio9p_storage_handler;
m.insert(DRIVER9PTYPE, p9);
let virtiofs: StorageHandler = virtiofs_storage_handler;
m.insert(DRIVERVIRTIOFSTYPE, virtiofs);
let ephemeral: StorageHandler = ephemeral_storage_handler;
m.insert(DRIVEREPHEMERALTYPE, ephemeral);
let virtiommio: StorageHandler = virtiommio_blk_storage_handler;
m.insert(DRIVERMMIOBLKTYPE, virtiommio);
let local: StorageHandler = local_storage_handler;
m.insert(DRIVERLOCALTYPE, local);
let scsi: StorageHandler = virtio_scsi_storage_handler;
m.insert(DRIVERSCSITYPE, scsi);
m
};
}
pub const STORAGE_HANDLER_LIST: [&str; 8] = [
DRIVER_BLK_TYPE,
DRIVER_9P_TYPE,
DRIVER_VIRTIOFS_TYPE,
DRIVER_EPHEMERAL_TYPE,
DRIVER_MMIO_BLK_TYPE,
DRIVER_LOCAL_TYPE,
DRIVER_SCSI_TYPE,
DRIVER_NVDIMM_TYPE,
];
#[derive(Debug, Clone)]
pub struct BareMount<'a> {
@@ -237,12 +228,12 @@ impl<'a> BareMount<'a> {
}
}
fn ephemeral_storage_handler(
async fn ephemeral_storage_handler(
logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
) -> Result<String> {
let mut sb = sandbox.lock().unwrap();
let mut sb = sandbox.lock().await;
let new_storage = sb.set_sandbox_storage(&storage.mount_point);
if !new_storage {
@@ -255,12 +246,12 @@ fn ephemeral_storage_handler(
Ok("".to_string())
}
fn local_storage_handler(
async fn local_storage_handler(
_logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
) -> Result<String> {
let mut sb = sandbox.lock().unwrap();
let mut sb = sandbox.lock().await;
let new_storage = sb.set_sandbox_storage(&storage.mount_point);
if !new_storage {
@@ -288,7 +279,7 @@ fn local_storage_handler(
Ok("".to_string())
}
fn virtio9p_storage_handler(
async fn virtio9p_storage_handler(
logger: &Logger,
storage: &Storage,
_sandbox: Arc<Mutex<Sandbox>>,
@@ -297,7 +288,7 @@ fn virtio9p_storage_handler(
}
// virtiommio_blk_storage_handler handles the storage for mmio blk driver.
fn virtiommio_blk_storage_handler(
async fn virtiommio_blk_storage_handler(
logger: &Logger,
storage: &Storage,
_sandbox: Arc<Mutex<Sandbox>>,
@@ -307,7 +298,7 @@ fn virtiommio_blk_storage_handler(
}
// virtiofs_storage_handler handles the storage for virtio-fs.
fn virtiofs_storage_handler(
async fn virtiofs_storage_handler(
logger: &Logger,
storage: &Storage,
_sandbox: Arc<Mutex<Sandbox>>,
@@ -316,14 +307,14 @@ fn virtiofs_storage_handler(
}
// virtio_blk_storage_handler handles the storage for blk driver.
fn virtio_blk_storage_handler(
async fn virtio_blk_storage_handler(
logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
) -> Result<String> {
let mut storage = storage.clone();
// If hot-plugged, get the device node path based on the PCI address else
// use the virt path provided in Storage Source
// If hot-plugged, get the device node path based on the PCI path
// otherwise use the virt path provided in Storage Source
if storage.source.starts_with("/dev") {
let metadata = fs::metadata(&storage.source)
.context(format!("get metadata on file {:?}", &storage.source))?;
@@ -333,7 +324,8 @@ fn virtio_blk_storage_handler(
return Err(anyhow!("Invalid device {}", &storage.source));
}
} else {
let dev_path = get_pci_device_name(&sandbox, &storage.source)?;
let pcipath = pci::Path::from_str(&storage.source)?;
let dev_path = get_pci_device_name(&sandbox, &pcipath).await?;
storage.source = dev_path;
}
@@ -341,7 +333,7 @@ fn virtio_blk_storage_handler(
}
// virtio_scsi_storage_handler handles the storage for scsi driver.
fn virtio_scsi_storage_handler(
async fn virtio_scsi_storage_handler(
logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
@@ -349,7 +341,7 @@ fn virtio_scsi_storage_handler(
let mut storage = storage.clone();
// Retrieve the device path from SCSI address.
let dev_path = get_scsi_device_name(&sandbox, &storage.source)?;
let dev_path = get_scsi_device_name(&sandbox, &storage.source).await?;
storage.source = dev_path;
common_storage_handler(logger, &storage)
@@ -362,12 +354,46 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
mount_storage(logger, storage).and(Ok(mount_point))
}
// nvdimm_storage_handler handles the storage for NVDIMM driver.
async fn nvdimm_storage_handler(
logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
) -> Result<String> {
let mut storage = storage.clone();
// If hot-plugged, get the device node path based on the PCI address else
// use the virt path provided in Storage Source
let pmem_devname = match storage.source.strip_prefix("/dev/") {
Some(dev) => dev,
None => {
return Err(anyhow!(
"Storage source '{}' must start with /dev/",
storage.source
))
}
};
// Retrieve the device path from NVDIMM address.
let dev_path = get_pmem_device_name(&sandbox, pmem_devname).await?;
storage.source = dev_path;
common_storage_handler(logger, &storage)
}
// mount_storage performs the mount described by the storage structure.
fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount"));
// Check share before attempting to mount to see if the destination is already a mount point.
// If so, skip doing the mount. This facilitates mounting the sharedfs automatically
// in the guest before the agent service starts.
if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? {
warn!(logger, "kataShared already mounted, ignoring...");
return Ok(());
}
match storage.fstype.as_str() {
DRIVER9PTYPE | DRIVERVIRTIOFSTYPE => {
DRIVER_9P_TYPE | DRIVER_VIRTIOFS_TYPE => {
let dest_path = Path::new(storage.mount_point.as_str());
if !dest_path.exists() {
fs::create_dir_all(dest_path).context("Create mount destination failed")?;
@@ -401,6 +427,24 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
bare_mount.mount()
}
/// Looks for `mount_point` entry in the /proc/mounts.
fn is_mounted(mount_point: &str) -> Result<bool> {
let mount_point = mount_point.trim_end_matches('/');
let found = fs::metadata(mount_point).is_ok()
// Looks through /proc/mounts and check if the mount exists
&& fs::read_to_string("/proc/mounts")?
.lines()
.any(|line| {
// The 2nd column reveals the mount point.
line.split_whitespace()
.nth(1)
.map(|target| mount_point.eq(target))
.unwrap_or(false)
});
Ok(found)
}
fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
let mut flags = MsFlags::empty();
let mut options: String = "".to_string();
@@ -429,7 +473,7 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
// associated operations such as waiting for the device to show up, and mount
// it to a specific location, according to the type of handler chosen, and for
// each storage.
pub fn add_storages(
pub async fn add_storages(
logger: Logger,
storages: Vec<Storage>,
sandbox: Arc<Mutex<Sandbox>>,
@@ -442,17 +486,33 @@ pub fn add_storages(
"subsystem" => "storage",
"storage-type" => handler_name.to_owned()));
let handler = STORAGEHANDLERLIST
.get(&handler_name.as_str())
.ok_or_else(|| {
anyhow!(
let res = match handler_name.as_str() {
DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, &storage, sandbox.clone()).await,
DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, &storage, sandbox.clone()).await,
DRIVER_VIRTIOFS_TYPE => {
virtiofs_storage_handler(&logger, &storage, sandbox.clone()).await
}
DRIVER_EPHEMERAL_TYPE => {
ephemeral_storage_handler(&logger, &storage, sandbox.clone()).await
}
DRIVER_MMIO_BLK_TYPE => {
virtiommio_blk_storage_handler(&logger, &storage, sandbox.clone()).await
}
DRIVER_LOCAL_TYPE => local_storage_handler(&logger, &storage, sandbox.clone()).await,
DRIVER_SCSI_TYPE => {
virtio_scsi_storage_handler(&logger, &storage, sandbox.clone()).await
}
DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await,
_ => {
return Err(anyhow!(
"Failed to find the storage handler {}",
storage.driver.to_owned()
)
})?;
));
}
};
// Todo need to rollback the mounted storage if err met.
let mount_point = handler(&logger, &storage, sandbox.clone())?;
let mount_point = res?;
if !mount_point.is_empty() {
mount_list.push(mount_point);
@@ -462,7 +522,7 @@ pub fn add_storages(
Ok(mount_list)
}
fn mount_to_rootfs(logger: &Logger, m: &INIT_MOUNT) -> Result<()> {
fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
let options_vec: Vec<&str> = m.options.clone();
let (flags, options) = parse_mount_flags_and_options(options_vec);
@@ -538,11 +598,11 @@ pub fn get_cgroup_mounts(
logger: &Logger,
cg_path: &str,
unified_cgroup_hierarchy: bool,
) -> Result<Vec<INIT_MOUNT>> {
) -> Result<Vec<InitMount>> {
// cgroup v2
// https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249
if unified_cgroup_hierarchy {
return Ok(vec![INIT_MOUNT {
return Ok(vec![InitMount {
fstype: "cgroup2",
src: "cgroup2",
dest: "/sys/fs/cgroup",
@@ -554,7 +614,7 @@ pub fn get_cgroup_mounts(
let reader = BufReader::new(file);
let mut has_device_cgroup = false;
let mut cg_mounts: Vec<INIT_MOUNT> = vec![INIT_MOUNT {
let mut cg_mounts: Vec<InitMount> = vec![InitMount {
fstype: "tmpfs",
src: "tmpfs",
dest: SYSFS_CGROUPPATH,
@@ -600,7 +660,7 @@ pub fn get_cgroup_mounts(
if let Some(value) = CGROUPS.get(&fields[0]) {
let key = CGROUPS.keys().find(|&&f| f == fields[0]).unwrap();
cg_mounts.push(INIT_MOUNT {
cg_mounts.push(InitMount {
fstype: "cgroup",
src: "cgroup",
dest: *value,
@@ -614,7 +674,7 @@ pub fn get_cgroup_mounts(
return Ok(Vec::new());
}
cg_mounts.push(INIT_MOUNT {
cg_mounts.push(InitMount {
fstype: "tmpfs",
src: "tmpfs",
dest: SYSFS_CGROUPPATH,
@@ -864,6 +924,14 @@ mod tests {
}
}
#[test]
fn test_is_mounted() {
assert!(is_mounted("/proc").unwrap());
assert!(!is_mounted("").unwrap());
assert!(!is_mounted("!").unwrap());
assert!(!is_mounted("/not_existing_path").unwrap());
}
#[test]
fn test_remove_mounts() {
skip_if_not_root!();
@@ -1112,21 +1180,21 @@ mod tests {
let drain = slog::Discard;
let logger = slog::Logger::root(drain, o!());
let first_mount = INIT_MOUNT {
let first_mount = InitMount {
fstype: "tmpfs",
src: "tmpfs",
dest: SYSFS_CGROUPPATH,
options: vec!["nosuid", "nodev", "noexec", "mode=755"],
};
let last_mount = INIT_MOUNT {
let last_mount = InitMount {
fstype: "tmpfs",
src: "tmpfs",
dest: SYSFS_CGROUPPATH,
options: vec!["remount", "ro", "nosuid", "nodev", "noexec", "mode=755"],
};
let cg_devices_mount = INIT_MOUNT {
let cg_devices_mount = InitMount {
fstype: "cgroup",
src: "cgroup",
dest: "/sys/fs/cgroup/devices",

View File

@@ -11,7 +11,6 @@ use std::fmt;
use std::fs;
use std::fs::File;
use std::path::{Path, PathBuf};
use std::thread::{self};
use crate::mount::{BareMount, FLAGS};
use slog::Logger;
@@ -69,6 +68,7 @@ impl Namespace {
self
}
#[allow(dead_code)]
pub fn set_root_dir(mut self, dir: &str) -> Self {
self.persistent_ns_dir = dir.to_string();
self
@@ -76,7 +76,7 @@ impl Namespace {
// setup creates persistent namespace without switching to it.
// Note, pid namespaces cannot be persisted.
pub fn setup(mut self) -> Result<Self> {
pub async fn setup(mut self) -> Result<Self> {
fs::create_dir_all(&self.persistent_ns_dir)?;
let ns_path = PathBuf::from(&self.persistent_ns_dir);
@@ -93,45 +93,51 @@ impl Namespace {
self.path = new_ns_path.clone().into_os_string().into_string().unwrap();
let hostname = self.hostname.clone();
let new_thread = thread::spawn(move || -> Result<()> {
let origin_ns_path = get_current_thread_ns_path(&ns_type.get());
let new_thread = tokio::spawn(async move {
if let Err(err) = || -> Result<()> {
let origin_ns_path = get_current_thread_ns_path(&ns_type.get());
File::open(Path::new(&origin_ns_path))?;
File::open(Path::new(&origin_ns_path))?;
// Create a new netns on the current thread.
let cf = ns_type.get_flags();
// Create a new netns on the current thread.
let cf = ns_type.get_flags();
unshare(cf)?;
unshare(cf)?;
if ns_type == NamespaceType::UTS && hostname.is_some() {
nix::unistd::sethostname(hostname.unwrap())?;
if ns_type == NamespaceType::UTS && hostname.is_some() {
nix::unistd::sethostname(hostname.unwrap())?;
}
// Bind mount the new namespace from the current thread onto the mount point to persist it.
let source: &str = origin_ns_path.as_str();
let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
let mut flags = MsFlags::empty();
if let Some(x) = FLAGS.get("rbind") {
let (_, f) = *x;
flags |= f;
};
let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
bare_mount.mount().map_err(|e| {
anyhow!(
"Failed to mount {} to {} with err:{:?}",
source,
destination,
e
)
})?;
Ok(())
}() {
return Err(err);
}
// Bind mount the new namespace from the current thread onto the mount point to persist it.
let source: &str = origin_ns_path.as_str();
let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
let mut flags = MsFlags::empty();
if let Some(x) = FLAGS.get("rbind") {
let (_, f) = *x;
flags |= f;
};
let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
bare_mount.mount().map_err(|e| {
anyhow!(
"Failed to mount {} to {} with err:{:?}",
source,
destination,
e
)
})?;
Ok(())
});
new_thread
.join()
.await
.map_err(|e| anyhow!("Failed to join thread {:?}!", e))??;
Ok(self)
@@ -185,8 +191,8 @@ mod tests {
use nix::sched::CloneFlags;
use tempfile::Builder;
#[test]
fn test_setup_persistent_ns() {
#[tokio::test]
async fn test_setup_persistent_ns() {
skip_if_not_root!();
// Create dummy logger and temp folder.
let logger = slog::Logger::root(slog::Discard, o!());
@@ -195,7 +201,8 @@ mod tests {
let ns_ipc = Namespace::new(&logger)
.get_ipc()
.set_root_dir(tmpdir.path().to_str().unwrap())
.setup();
.setup()
.await;
assert!(ns_ipc.is_ok());
assert!(remove_mounts(&[ns_ipc.unwrap().path]).is_ok());
@@ -206,7 +213,8 @@ mod tests {
let ns_uts = Namespace::new(&logger)
.get_uts("test_hostname")
.set_root_dir(tmpdir.path().to_str().unwrap())
.setup();
.setup()
.await;
assert!(ns_uts.is_ok());
assert!(remove_mounts(&[ns_uts.unwrap().path]).is_ok());
@@ -218,7 +226,8 @@ mod tests {
let ns_pid = Namespace::new(&logger)
.get_pid()
.set_root_dir(tmpdir.path().to_str().unwrap())
.setup();
.setup()
.await;
assert!(ns_pid.is_err());
}

View File

@@ -322,9 +322,18 @@ impl Handle {
use packet::nlas::route::Nla;
// Build a common indeterminate ip request
let request = self
.handle
.route()
.add()
.table(MAIN_TABLE)
.kind(UNICAST)
.protocol(BOOT_PROT)
.scope(scope);
// `rtnetlink` offers a separate request builders for different IP versions (IP v4 and v6).
// This if branch is a bit clumsy because it does almost the same.
// TODO: Simplify this once https://github.com/little-dude/netlink/pull/140 is merged and released
if is_v6 {
let dest_addr = if !route.dest.is_empty() {
Ipv6Network::from_str(&route.dest)?
@@ -333,14 +342,8 @@ impl Handle {
};
// Build IP v6 request
let mut request = self
.handle
.route()
.add_v6()
.table(MAIN_TABLE)
.kind(UNICAST)
.protocol(BOOT_PROT)
.scope(scope)
let mut request = request
.v6()
.destination_prefix(dest_addr.ip(), dest_addr.prefix())
.output_interface(link.index());
@@ -377,14 +380,8 @@ impl Handle {
};
// Build IP v4 request
let mut request = self
.handle
.route()
.add_v4()
.table(MAIN_TABLE)
.kind(UNICAST)
.protocol(BOOT_PROT)
.scope(scope)
let mut request = request
.v4()
.destination_prefix(dest_addr.ip(), dest_addr.prefix())
.output_interface(link.index());

View File

@@ -139,10 +139,10 @@ mod tests {
assert_eq!(true, content.is_ok());
let content = content.unwrap();
let expected_DNS: Vec<&str> = content.split('\n').collect();
let expected_dns: Vec<&str> = content.split('\n').collect();
// assert the data are the same as /run/kata-containers/sandbox/resolv.conf
assert_eq!(dns, expected_DNS);
assert_eq!(dns, expected_dns);
// umount /etc/resolv.conf
let _ = mount::umount(dst_filename);

168
src/agent/src/pci.rs Normal file
View File

@@ -0,0 +1,168 @@
// Copyright Red Hat.
//
// SPDX-License-Identifier: Apache-2.0
//
use std::convert::TryInto;
use std::fmt;
use std::ops::Deref;
use std::str::FromStr;
use anyhow::anyhow;
// The PCI spec reserves 5 bits for slot number (a.k.a. device
// number), giving slots 0..31
const SLOT_BITS: u8 = 5;
const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
// Represents a PCI function's slot number (a.k.a. device number),
// giving its location on a single bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Slot(u8);
impl Slot {
pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
if let Ok(v8) = v.try_into() {
if v8 <= SLOT_MAX {
return Ok(Slot(v8));
}
}
Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
v,
SLOT_MAX
))
}
}
impl FromStr for Slot {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let v = isize::from_str_radix(s, 16)?;
Slot::new(v)
}
}
impl fmt::Display for Slot {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:02x}", self.0)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path(Vec<Slot>);
impl Path {
pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
if slots.is_empty() {
return Err(anyhow!("PCI path must have at least one element"));
}
Ok(Path(slots))
}
}
// Let Path be treated as a slice of Slots
impl Deref for Path {
type Target = [Slot];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl fmt::Display for Path {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let sslots: Vec<String> = self
.0
.iter()
.map(std::string::ToString::to_string)
.collect();
write!(f, "{}", sslots.join("/"))
}
}
impl FromStr for Path {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
Path::new(rslots?)
}
}
#[cfg(test)]
mod tests {
use crate::pci::{Path, Slot};
use std::str::FromStr;
#[test]
fn test_slot() {
// Valid slots
let slot = Slot::new(0x00).unwrap();
assert_eq!(format!("{}", slot), "00");
let slot = Slot::from_str("00").unwrap();
assert_eq!(format!("{}", slot), "00");
let slot = Slot::new(31).unwrap();
let slot2 = Slot::from_str("1f").unwrap();
assert_eq!(slot, slot2);
// Bad slots
let slot = Slot::new(-1);
assert!(slot.is_err());
let slot = Slot::new(32);
assert!(slot.is_err());
let slot = Slot::from_str("20");
assert!(slot.is_err());
let slot = Slot::from_str("xy");
assert!(slot.is_err());
let slot = Slot::from_str("00/");
assert!(slot.is_err());
let slot = Slot::from_str("");
assert!(slot.is_err());
}
#[test]
fn test_path() {
let slot3 = Slot::new(0x03).unwrap();
let slot4 = Slot::new(0x04).unwrap();
let slot5 = Slot::new(0x05).unwrap();
// Valid paths
let pcipath = Path::new(vec![slot3]).unwrap();
assert_eq!(format!("{}", pcipath), "03");
let pcipath2 = Path::from_str("03").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 1);
assert_eq!(pcipath[0], slot3);
let pcipath = Path::new(vec![slot3, slot4]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04");
let pcipath2 = Path::from_str("03/04").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 2);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04/05");
let pcipath2 = Path::from_str("03/04/05").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[2], slot5);
// Bad paths
assert!(Path::new(vec!()).is_err());
assert!(Path::from_str("20").is_err());
assert!(Path::from_str("//").is_err());
assert!(Path::from_str("xyz").is_err());
}
}

View File

@@ -8,6 +8,7 @@ use nix::errno::Errno;
use nix::fcntl::{self, OFlag};
use nix::sys::stat::Mode;
use std::fs;
use std::os::unix::io::{AsRawFd, FromRawFd};
pub const RNGDEV: &str = "/dev/random";
pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
@@ -23,18 +24,22 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
let len = data.len() as libc::c_long;
fs::write(RNGDEV, data)?;
let fd = fcntl::open(RNGDEV, OFlag::O_RDWR, Mode::from_bits_truncate(0o022))?;
let f = {
let fd = fcntl::open(RNGDEV, OFlag::O_RDWR, Mode::from_bits_truncate(0o022))?;
// Wrap fd with `File` to properly close descriptor on exit
unsafe { fs::File::from_raw_fd(fd) }
};
let ret = unsafe {
libc::ioctl(
fd,
f.as_raw_fd(),
RNDADDTOENTCNT as IoctlRequestType,
&len as *const libc::c_long,
)
};
let _ = Errno::result(ret).map(drop)?;
let ret = unsafe { libc::ioctl(fd, RNDRESEEDRNG as IoctlRequestType, 0) };
let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
let _ = Errno::result(ret).map(drop)?;
Ok(())

File diff suppressed because it is too large Load Diff

View File

@@ -4,12 +4,12 @@
//
use crate::linux_abi::*;
use crate::mount::{get_mount_fs_type, remove_mounts, TYPEROOTFS};
use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
use crate::namespace::Namespace;
use crate::netlink::Handle;
use crate::network::Network;
use anyhow::{anyhow, Context, Result};
use libc::pid_t;
use netlink::{RtnlHandle, NETLINK_ROUTE};
use oci::{Hook, Hooks};
use protocols::agent::OnlineCPUMemRequest;
use regex::Regex;
@@ -22,9 +22,10 @@ use std::collections::HashMap;
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::sync::mpsc::{self, Receiver, Sender};
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use std::{thread, time};
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::sync::Mutex;
#[derive(Debug)]
pub struct Sandbox {
@@ -42,18 +43,18 @@ pub struct Sandbox {
pub storages: HashMap<String, u32>,
pub running: bool,
pub no_pivot_root: bool,
pub sender: Option<Sender<i32>>,
pub rtnl: Option<RtnlHandle>,
pub sender: Option<tokio::sync::oneshot::Sender<i32>>,
pub rtnl: Handle,
pub hooks: Option<Hooks>,
pub event_rx: Arc<Mutex<Receiver<String>>>,
pub event_tx: Sender<String>,
pub event_tx: Option<Sender<String>>,
}
impl Sandbox {
pub fn new(logger: &Logger) -> Result<Self> {
let fs_type = get_mount_fs_type("/")?;
let logger = logger.new(o!("subsystem" => "sandbox"));
let (tx, rx) = mpsc::channel::<String>();
let (tx, rx) = channel::<String>(100);
let event_rx = Arc::new(Mutex::new(rx));
Ok(Sandbox {
@@ -70,12 +71,12 @@ impl Sandbox {
sandbox_pidns: None,
storages: HashMap::new(),
running: false,
no_pivot_root: fs_type.eq(TYPEROOTFS),
no_pivot_root: fs_type.eq(TYPE_ROOTFS),
sender: None,
rtnl: Some(RtnlHandle::new(NETLINK_ROUTE, 0).unwrap()),
rtnl: Handle::new()?,
hooks: None,
event_rx,
event_tx: tx,
event_tx: Some(tx),
})
}
@@ -149,25 +150,19 @@ impl Sandbox {
Ok(())
}
pub fn is_running(&self) -> bool {
self.running
}
pub fn set_hostname(&mut self, hostname: String) {
self.hostname = hostname;
}
pub fn setup_shared_namespaces(&mut self) -> Result<bool> {
pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
// Set up shared IPC namespace
self.shared_ipcns = Namespace::new(&self.logger)
.get_ipc()
.setup()
.await
.context("Failed to setup persistent IPC namespace")?;
// // Set up shared UTS namespace
self.shared_utsns = Namespace::new(&self.logger)
.get_uts(self.hostname.as_str())
.setup()
.await
.context("Failed to setup persistent UTS namespace")?;
Ok(true)
@@ -214,9 +209,9 @@ impl Sandbox {
None
}
pub fn destroy(&mut self) -> Result<()> {
pub async fn destroy(&mut self) -> Result<()> {
for ctr in self.containers.values_mut() {
ctr.destroy()?;
ctr.destroy().await?;
}
Ok(())
}
@@ -315,15 +310,32 @@ impl Sandbox {
Ok(hooks)
}
pub fn run_oom_event_monitor(&self, rx: Receiver<String>, container_id: String) {
let tx = self.event_tx.clone();
pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
let logger = self.logger.clone();
thread::spawn(move || {
for event in rx {
if self.event_tx.is_none() {
error!(
logger,
"sandbox.event_tx not found in run_oom_event_monitor"
);
return;
}
let tx = self.event_tx.as_ref().unwrap().clone();
tokio::spawn(async move {
loop {
let event = rx.recv().await;
// None means the container has exited,
// and sender in OOM notifier is dropped.
if event.is_none() {
return;
}
info!(logger, "got an OOM event {:?}", event);
let _ = tx
.send(container_id.clone())
.await
.map_err(|e| error!(logger, "failed to send message: {:?}", e));
}
});
@@ -383,7 +395,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
logger,
SYSFS_CPU_ONLINE_PATH,
r"cpu[0-9]+",
(num - onlined_count),
num - onlined_count,
);
if r.is_err() {
return r;
@@ -428,8 +440,8 @@ mod tests {
baremount.mount()
}
#[test]
fn set_sandbox_storage() {
#[tokio::test]
async fn set_sandbox_storage() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -462,8 +474,8 @@ mod tests {
);
}
#[test]
fn remove_sandbox_storage() {
#[tokio::test]
async fn remove_sandbox_storage() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
@@ -518,9 +530,9 @@ mod tests {
assert!(s.remove_sandbox_storage(destdir_path).is_ok());
}
#[test]
#[tokio::test]
#[allow(unused_assignments)]
fn unset_and_remove_sandbox_storage() {
async fn unset_and_remove_sandbox_storage() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
@@ -570,8 +582,8 @@ mod tests {
assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err());
}
#[test]
fn unset_sandbox_storage() {
#[tokio::test]
async fn unset_sandbox_storage() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -656,8 +668,8 @@ mod tests {
.unwrap()
}
#[test]
fn get_container_entry_exist() {
#[tokio::test]
async fn get_container_entry_exist() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -669,8 +681,8 @@ mod tests {
assert!(cnt.is_some());
}
#[test]
fn get_container_no_entry() {
#[tokio::test]
async fn get_container_no_entry() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -678,8 +690,8 @@ mod tests {
assert!(cnt.is_none());
}
#[test]
fn add_and_get_container() {
#[tokio::test]
async fn add_and_get_container() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -688,8 +700,9 @@ mod tests {
s.add_container(linux_container);
assert!(s.get_container("some_id").is_some());
}
#[test]
fn update_shared_pidns() {
#[tokio::test]
async fn update_shared_pidns() {
skip_if_not_root!();
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
@@ -705,8 +718,9 @@ mod tests {
let ns_path = format!("/proc/{}/ns/pid", test_pid);
assert_eq!(s.sandbox_pidns.unwrap().path, ns_path);
}
#[test]
fn add_guest_hooks() {
#[tokio::test]
async fn add_guest_hooks() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let tmpdir = Builder::new().tempdir().unwrap();
@@ -727,4 +741,12 @@ mod tests {
assert!(s.hooks.as_ref().unwrap().poststart.is_empty());
assert!(s.hooks.as_ref().unwrap().poststop.is_empty());
}
#[tokio::test]
async fn test_sandbox_set_destroy() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let ret = s.destroy().await;
assert!(ret.is_ok());
}
}

159
src/agent/src/signal.rs Normal file
View File

@@ -0,0 +1,159 @@
// Copyright (c) 2019-2020 Ant Financial
// Copyright (c) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::sandbox::Sandbox;
use anyhow::{anyhow, Result};
use nix::sys::wait::WaitPidFlag;
use nix::sys::wait::{self, WaitStatus};
use nix::unistd;
use prctl::set_child_subreaper;
use slog::{error, info, o, Logger};
use std::sync::Arc;
use tokio::select;
use tokio::signal::unix::{signal, SignalKind};
use tokio::sync::watch::Receiver;
use tokio::sync::Mutex;
use unistd::Pid;
async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
info!(logger, "handling signal"; "signal" => "SIGCHLD");
loop {
let result = wait::waitpid(
Some(Pid::from_raw(-1)),
Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
);
let wait_status = match result {
Ok(s) => {
if s == WaitStatus::StillAlive {
return Ok(());
}
s
}
Err(e) => return Err(anyhow!(e).context("waitpid reaper failed")),
};
info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
if let Some(pid) = wait_status.pid() {
let raw_pid = pid.as_raw();
let child_pid = format!("{}", raw_pid);
let logger = logger.new(o!("child-pid" => child_pid));
let sandbox_ref = sandbox.clone();
let mut sandbox = sandbox_ref.lock().await;
let process = sandbox.find_process(raw_pid);
if process.is_none() {
info!(logger, "child exited unexpectedly");
continue;
}
let mut p = process.unwrap();
if p.exit_pipe_w.is_none() {
info!(logger, "process exit pipe not set");
continue;
}
let pipe_write = p.exit_pipe_w.unwrap();
let ret: i32;
match wait_status {
WaitStatus::Exited(_, c) => ret = c,
WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
_ => {
info!(logger, "got wrong status for process";
"child-status" => format!("{:?}", wait_status));
continue;
}
}
p.exit_code = ret;
let _ = unistd::close(pipe_write);
info!(logger, "notify term to close");
// close the socket file to notify readStdio to close terminal specifically
// in case this process's terminal has been inherited by its children.
p.notify_term_close();
}
}
}
pub async fn setup_signal_handler(
logger: Logger,
sandbox: Arc<Mutex<Sandbox>>,
mut shutdown: Receiver<bool>,
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "signals"));
set_child_subreaper(true)
.map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
let mut sigchild_stream = signal(SignalKind::child())?;
loop {
select! {
_ = shutdown.changed() => {
info!(logger, "got shutdown request");
break;
}
_ = sigchild_stream.recv() => {
let result = handle_sigchild(logger.clone(), sandbox.clone()).await;
match result {
Ok(()) => (),
Err(e) => {
// Log errors, but don't abort - just wait for more signals!
error!(logger, "failed to handle signal"; "error" => format!("{:?}", e));
}
}
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::pin;
use tokio::sync::watch::channel;
use tokio::time::Duration;
#[tokio::test]
async fn test_setup_signal_handler() {
let logger = slog::Logger::root(slog::Discard, o!());
let s = Sandbox::new(&logger).unwrap();
let sandbox = Arc::new(Mutex::new(s));
let (tx, rx) = channel(true);
let handle = tokio::spawn(setup_signal_handler(logger, sandbox, rx));
let timeout = tokio::time::sleep(Duration::from_secs(1));
pin!(timeout);
tx.send(true).expect("failed to request shutdown");
loop {
select! {
_ = handle => {
println!("INFO: task completed");
break;
},
_ = &mut timeout => {
panic!("signal thread failed to stop");
}
}
}
}
}

View File

@@ -7,10 +7,16 @@ use crate::device::online_device;
use crate::linux_abi::*;
use crate::sandbox::Sandbox;
use crate::GLOBAL_DEVICE_WATCHER;
use netlink::{RtnlHandle, NETLINK_UEVENT};
use slog::Logger;
use std::sync::{Arc, Mutex};
use std::thread;
use anyhow::Result;
use netlink_sys::{protocols, SocketAddr, TokioSocket};
use nix::errno::Errno;
use std::os::unix::io::FromRawFd;
use std::sync::Arc;
use tokio::select;
use tokio::sync::watch::Receiver;
use tokio::sync::Mutex;
#[derive(Debug, Default)]
struct Uevent {
@@ -48,16 +54,23 @@ impl Uevent {
// Check whether this is a block device hot-add event.
fn is_block_add_event(&self) -> bool {
let pci_root_bus_path = create_pci_root_bus_path();
self.action == U_EVENT_ACTION_ADD
&& self.subsystem == "block"
&& self.devpath.starts_with(PCI_ROOT_BUS_PATH)
&& {
self.devpath.starts_with(pci_root_bus_path.as_str())
|| self.devpath.starts_with(ACPI_DEV_PATH) // NVDIMM/PMEM devices
}
&& !self.devname.is_empty()
}
fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
async fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
let pci_root_bus_path = create_pci_root_bus_path();
// Keep the same lock order as device::get_device_name(), otherwise it may cause deadlock.
let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
let mut sb = sandbox.lock().unwrap();
let watcher = GLOBAL_DEVICE_WATCHER.clone();
let mut w = watcher.lock().await;
let mut sb = sandbox.lock().await;
// Add the device node name to the pci device map.
sb.pci_device_map
@@ -67,20 +80,28 @@ impl Uevent {
// Close the channel after watcher has been notified.
let devpath = self.devpath.clone();
let empties: Vec<_> = w
.iter()
.iter_mut()
.filter(|(dev_addr, _)| {
let pci_p = format!("{}/{}", PCI_ROOT_BUS_PATH, *dev_addr);
let pci_p = format!("{}/{}", pci_root_bus_path, *dev_addr);
// blk block device
devpath.starts_with(pci_p.as_str()) ||
// scsi block device
{
(*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
devpath.contains(*dev_addr)
}
// scsi block device
{
(*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
devpath.contains(*dev_addr)
} ||
// nvdimm/pmem device
{
let pmem_suffix = format!("/{}/{}", SCSI_BLOCK_SUFFIX, self.devname);
devpath.starts_with(ACPI_DEV_PATH) &&
devpath.ends_with(pmem_suffix.as_str()) &&
dev_addr.ends_with(pmem_suffix.as_str())
}
})
.map(|(k, sender)| {
let devname = self.devname.clone();
let sender = sender.take().unwrap();
let _ = sender.send(devname);
k.clone()
})
@@ -92,9 +113,9 @@ impl Uevent {
}
}
fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
if self.is_block_add_event() {
return self.handle_block_add_event(sandbox);
return self.handle_block_add_event(sandbox).await;
} else if self.action == U_EVENT_ACTION_ADD {
let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
// It's a memory hot-add event.
@@ -114,34 +135,67 @@ impl Uevent {
}
}
pub fn watch_uevents(sandbox: Arc<Mutex<Sandbox>>) {
thread::spawn(move || {
let rtnl = RtnlHandle::new(NETLINK_UEVENT, 1).unwrap();
let logger = sandbox
.lock()
.unwrap()
.logger
.new(o!("subsystem" => "uevent"));
pub async fn watch_uevents(
sandbox: Arc<Mutex<Sandbox>>,
mut shutdown: Receiver<bool>,
) -> Result<()> {
let sref = sandbox.clone();
let s = sref.lock().await;
let logger = s.logger.new(o!("subsystem" => "uevent"));
loop {
match rtnl.recv_message() {
Err(e) => {
error!(logger, "receive uevent message failed"; "error" => format!("{}", e))
}
Ok(data) => {
let text = String::from_utf8(data);
match text {
Err(e) => {
error!(logger, "failed to convert bytes to text"; "error" => format!("{}", e))
// Unlock the sandbox to allow a successful shutdown
drop(s);
info!(logger, "starting uevents handler");
let mut socket;
unsafe {
let fd = libc::socket(
libc::AF_NETLINK,
libc::SOCK_DGRAM | libc::SOCK_CLOEXEC,
protocols::NETLINK_KOBJECT_UEVENT as libc::c_int,
);
socket = TokioSocket::from_raw_fd(fd);
}
socket.bind(&SocketAddr::new(0, 1))?;
loop {
select! {
_ = shutdown.changed() => {
info!(logger, "got shutdown request");
break;
}
result = socket.recv_from_full() => {
match result {
Err(e) => {
error!(logger, "failed to receive uevent"; "error" => format!("{}", e))
}
Ok((buf, addr)) => {
if addr.port_number() != 0 {
// not our netlink message
let err_msg = format!("{:?}", nix::Error::Sys(Errno::EBADMSG));
error!(logger, "receive uevent message failed"; "error" => err_msg);
continue;
}
Ok(text) => {
let event = Uevent::new(&text);
info!(logger, "got uevent message"; "event" => format!("{:?}", event));
event.process(&logger, &sandbox);
let text = String::from_utf8(buf);
match text {
Err(e) => {
error!(logger, "failed to convert bytes to text"; "error" => format!("{}", e))
}
Ok(text) => {
let event = Uevent::new(&text);
info!(logger, "got uevent message"; "event" => format!("{:?}", event));
event.process(&logger, &sandbox).await;
}
}
}
}
}
}
});
}
Ok(())
}

342
src/agent/src/util.rs Normal file
View File

@@ -0,0 +1,342 @@
// Copyright (c) 2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use std::io;
use std::io::ErrorKind;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::watch::Receiver;
// Size of I/O read buffer
const BUF_SIZE: usize = 8192;
// Interruptable I/O copy using readers and writers
// (an interruptable version of "io::copy()").
pub async fn interruptable_io_copier<R: Sized, W: Sized>(
mut reader: R,
mut writer: W,
mut shutdown: Receiver<bool>,
) -> io::Result<u64>
where
R: tokio::io::AsyncRead + Unpin,
W: tokio::io::AsyncWrite + Unpin,
{
let mut total_bytes: u64 = 0;
let mut buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
loop {
tokio::select! {
_ = shutdown.changed() => {
eprintln!("INFO: interruptable_io_copier: got shutdown request");
break;
},
result = reader.read(&mut buf) => {
let bytes = match result {
Ok(0) => return Ok(total_bytes),
Ok(len) => len,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
total_bytes += bytes as u64;
// Actually copy the data ;)
writer.write_all(&buf[..bytes]).await?;
},
};
}
Ok(total_bytes)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io;
use std::io::Cursor;
use std::io::Write;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll, Poll::Ready};
use tokio::pin;
use tokio::select;
use tokio::sync::watch::channel;
use tokio::task::JoinError;
use tokio::time::Duration;
#[derive(Debug, Default, Clone)]
struct BufWriter {
data: Arc<Mutex<Vec<u8>>>,
slow_write: bool,
write_delay: Duration,
}
impl BufWriter {
fn new() -> Self {
BufWriter {
data: Arc::new(Mutex::new(Vec::<u8>::new())),
slow_write: false,
write_delay: Duration::new(0, 0),
}
}
fn write_vec(&mut self, buf: &[u8]) -> io::Result<usize> {
let vec_ref = self.data.clone();
let mut vec_locked = vec_ref.lock();
let mut v = vec_locked.as_deref_mut().unwrap();
if self.write_delay.as_nanos() > 0 {
std::thread::sleep(self.write_delay);
}
std::io::Write::write(&mut v, buf)
}
}
impl Write for BufWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.write_vec(buf)
}
fn flush(&mut self) -> io::Result<()> {
let vec_ref = self.data.clone();
let mut vec_locked = vec_ref.lock();
let v = vec_locked.as_deref_mut().unwrap();
std::io::Write::flush(v)
}
}
impl tokio::io::AsyncWrite for BufWriter {
fn poll_write(
mut self: Pin<&mut Self>,
_cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<Result<usize, io::Error>> {
let result = self.write_vec(buf);
Ready(result)
}
fn poll_flush(
self: Pin<&mut Self>,
_cx: &mut Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
// NOP
Ready(Ok(()))
}
fn poll_shutdown(
self: Pin<&mut Self>,
_cx: &mut Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
// NOP
Ready(Ok(()))
}
}
impl ToString for BufWriter {
fn to_string(&self) -> String {
let data_ref = self.data.clone();
let output = data_ref.lock().unwrap();
let s = (*output).clone();
String::from_utf8(s).unwrap()
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_interruptable_io_copier_reader() {
#[derive(Debug)]
struct TestData {
reader_value: String,
result: io::Result<u64>,
}
let tests = &[
TestData {
reader_value: "".into(),
result: Ok(0),
},
TestData {
reader_value: "a".into(),
result: Ok(1),
},
TestData {
reader_value: "foo".into(),
result: Ok(3),
},
TestData {
reader_value: "b".repeat(BUF_SIZE - 1),
result: Ok((BUF_SIZE - 1) as u64),
},
TestData {
reader_value: "c".repeat(BUF_SIZE),
result: Ok((BUF_SIZE) as u64),
},
TestData {
reader_value: "d".repeat(BUF_SIZE + 1),
result: Ok((BUF_SIZE + 1) as u64),
},
TestData {
reader_value: "e".repeat((2 * BUF_SIZE) - 1),
result: Ok(((2 * BUF_SIZE) - 1) as u64),
},
TestData {
reader_value: "f".repeat(2 * BUF_SIZE),
result: Ok((2 * BUF_SIZE) as u64),
},
TestData {
reader_value: "g".repeat((2 * BUF_SIZE) + 1),
result: Ok(((2 * BUF_SIZE) + 1) as u64),
},
];
for (i, d) in tests.iter().enumerate() {
// Create a string containing details of the test
let msg = format!("test[{}]: {:?}", i, d);
let (tx, rx) = channel(true);
let reader = Cursor::new(d.reader_value.clone());
let writer = BufWriter::new();
// XXX: Pass a copy of the writer to the copier to allow the
// result of the write operation to be checked below.
let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
// Allow time for the thread to be spawned.
tokio::time::sleep(Duration::from_secs(1)).await;
let timeout = tokio::time::sleep(Duration::from_secs(1));
pin!(timeout);
// Since the readers only specify a small number of bytes, the
// copier will quickly read zero and kill the task, closing the
// Receiver.
assert!(tx.is_closed(), "{}", msg);
let spawn_result: std::result::Result<
std::result::Result<u64, std::io::Error>,
JoinError,
>;
let result: std::result::Result<u64, std::io::Error>;
select! {
res = handle => spawn_result = res,
_ = &mut timeout => panic!("timed out"),
}
assert!(spawn_result.is_ok());
result = spawn_result.unwrap();
assert!(result.is_ok());
let byte_count = result.unwrap() as usize;
assert_eq!(byte_count, d.reader_value.len(), "{}", msg);
let value = writer.to_string();
assert_eq!(value, d.reader_value, "{}", msg);
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_interruptable_io_copier_eof() {
// Create an async reader that always returns EOF
let reader = tokio::io::empty();
let (tx, rx) = channel(true);
let writer = BufWriter::new();
let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
// Allow time for the thread to be spawned.
tokio::time::sleep(Duration::from_secs(1)).await;
let timeout = tokio::time::sleep(Duration::from_secs(1));
pin!(timeout);
assert!(tx.is_closed());
let spawn_result: std::result::Result<std::result::Result<u64, std::io::Error>, JoinError>;
let result: std::result::Result<u64, std::io::Error>;
select! {
res = handle => spawn_result = res,
_ = &mut timeout => panic!("timed out"),
}
assert!(spawn_result.is_ok());
result = spawn_result.unwrap();
assert!(result.is_ok());
let byte_count = result.unwrap();
assert_eq!(byte_count, 0);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_interruptable_io_copier_shutdown() {
// Create an async reader that creates an infinite stream of bytes
// (which allows us to interrupt it, since we know it is always busy ;)
const REPEAT_CHAR: u8 = b'r';
let reader = tokio::io::repeat(REPEAT_CHAR);
let (tx, rx) = channel(true);
let writer = BufWriter::new();
let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
// Allow time for the thread to be spawned.
tokio::time::sleep(Duration::from_secs(1)).await;
let timeout = tokio::time::sleep(Duration::from_secs(1));
pin!(timeout);
assert!(!tx.is_closed());
tx.send(true).expect("failed to request shutdown");
let spawn_result: std::result::Result<std::result::Result<u64, std::io::Error>, JoinError>;
let result: std::result::Result<u64, std::io::Error>;
select! {
res = handle => spawn_result = res,
_ = &mut timeout => panic!("timed out"),
}
assert!(spawn_result.is_ok());
result = spawn_result.unwrap();
assert!(result.is_ok());
let byte_count = result.unwrap();
let value = writer.to_string();
let writer_byte_count = value.len() as u64;
assert_eq!(byte_count, writer_byte_count);
// Remove the char used as a payload. If anything else remins,
// something went wrong.
let mut remainder = value;
remainder.retain(|c| c != REPEAT_CHAR as char);
assert_eq!(remainder.len(), 0);
}
}

View File

@@ -7,6 +7,8 @@
// WARNING: This file is auto-generated - DO NOT EDIT!
//
#![allow(dead_code)]
pub const AGENT_VERSION: &str = "@AGENT_VERSION@";
pub const API_VERSION: &str = "@API_VERSION@";
pub const VERSION_COMMIT: &str = "@VERSION_COMMIT@";

View File

@@ -4,14 +4,6 @@
# SPDX-License-Identifier: Apache-2.0
#
distro := $(shell \
for file in /etc/os-release /usr/lib/os-release; do \
if [ -e $$file ]; then \
grep ^ID= $$file|cut -d= -f2-|tr -d '"'; \
break; \
fi \
done)
SKIP_GO_VERSION_CHECK=
include golang.mk
@@ -57,7 +49,6 @@ BINLIBEXECLIST :=
BIN_PREFIX = $(PROJECT_TYPE)
PROJECT_DIR = $(PROJECT_TAG)
IMAGENAME = $(PROJECT_TAG).img
INITRDNAME = $(PROJECT_TAG)-initrd.img
TARGET = $(BIN_PREFIX)-runtime
TARGET_OUTPUT = $(CURDIR)/$(TARGET)
@@ -91,7 +82,6 @@ SHAREDIR := $(PREFIX)/share
DEFAULTSDIR := $(SHAREDIR)/defaults
COLLECT_SCRIPT = data/kata-collect-data.sh
COLLECT_SCRIPT_SRC = $(COLLECT_SCRIPT).in
# @RUNTIME_NAME@ should be replaced with the target in generated files
RUNTIME_NAME = $(TARGET)
@@ -111,13 +101,11 @@ BASH_COMPLETIONS := data/completions/bash/kata-runtime
BASH_COMPLETIONSDIR := $(SHAREDIR)/bash-completion/completions
PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR)
PKGLIBDIR := $(LOCALSTATEDIR)/lib/$(PROJECT_DIR)
PKGRUNDIR := $(LOCALSTATEDIR)/run/$(PROJECT_DIR)
PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)
KERNELDIR := $(PKGDATADIR)
INITRDPATH := $(PKGDATADIR)/$(INITRDNAME)
IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME)
FIRMWAREPATH :=
@@ -126,7 +114,6 @@ CONFIG_FILE = configuration.toml
HYPERVISOR_ACRN = acrn
HYPERVISOR_FC = firecracker
JAILER_FC = jailer
HYPERVISOR_QEMU = qemu
HYPERVISOR_CLH = cloud-hypervisor
@@ -139,13 +126,13 @@ HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVIS
QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]
QEMUVALIDVIRTIOFSPATHS := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD)
QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD)
CLHPATH := $(CLHBINDIR)/$(CLHCMD)
CLHVALIDHYPERVISORPATHS := [\"$(CLHBINDIR)/$(CLHCMD)\"]
CLHVALIDHYPERVISORPATHS := [\"$(CLHPATH)\"]
FCPATH = $(FCBINDIR)/$(FCCMD)
FCVALIDPATHS = [\"$(FCPATH)\"]
FCVALIDHYPERVISORPATHS := [\"$(FCPATH)\"]
FCJAILERPATH = $(FCBINDIR)/$(FCJAILERCMD)
FCVALIDJAILERPATHS = [\"$(FCJAILERPATH)\"]
@@ -154,9 +141,6 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"]
ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD)
ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]
SHIMCMD := $(BIN_PREFIX)-shim
SHIMPATH := $(PKGLIBEXECDIR)/$(SHIMCMD)
NETMONCMD := $(BIN_PREFIX)-netmon
NETMONPATH := $(PKGLIBEXECDIR)/$(NETMONCMD)
@@ -184,13 +168,12 @@ DEFAULTEXPFEATURES := []
DEFENTROPYSOURCE := /dev/urandom
DEFDISABLEBLOCK := false
DEFSHAREDFS := virtio-9p
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
# Default DAX mapping cache size in MiB
#if value is 0, DAX is not enabled
DEFVIRTIOFSCACHESIZE := 0
DEFVIRTIOFSCACHESIZE ?= 0
DEFVIRTIOFSCACHE ?= auto
# Format example:
# [\"-o\", \"arg1=xxx,arg2\", \"-o\", \"hello world\", \"--arg3=yyy\"]
@@ -199,23 +182,18 @@ DEFVIRTIOFSCACHE ?= auto
# Make sure you quote args.
DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\"]
DEFENABLEIOTHREADS := false
DEFENABLEMEMPREALLOC := false
DEFENABLEHUGEPAGES := false
DEFENABLEVHOSTUSERSTORE := false
DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
DEFFILEMEMBACKEND := ""
DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
DEFENABLESWAP := false
DEFENABLEDEBUG := false
DEFDISABLENESTINGCHECKS := false
DEFMSIZE9P := 8192
DEFHOTPLUGVFIOONROOTBUS := false
DEFPCIEROOTPORT := 0
# Default cgroup model
DEFSANDBOXCGROUPONLY ?= false
DEFBINDMOUNTS := []
# Features
FEATURE_SELINUX ?= check
@@ -282,7 +260,6 @@ ifneq (,$(CLHCMD))
# CLH-specific options (all should be suffixed by "_CLH")
# currently, huge pages are required for virtiofsd support
DEFENABLEHUGEPAGES_CLH := true
DEFNETWORKMODEL_CLH := tcfilter
KERNELTYPE_CLH = uncompressed
KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH))
@@ -407,8 +384,6 @@ USER_VARS += FCVALIDJAILERPATHS
USER_VARS += SYSCONFIG
USER_VARS += IMAGENAME
USER_VARS += IMAGEPATH
USER_VARS += INITRDNAME
USER_VARS += INITRDPATH
USER_VARS += MACHINETYPE
USER_VARS += KERNELDIR
USER_VARS += KERNELTYPE
@@ -428,7 +403,6 @@ USER_VARS += KERNELPARAMS
USER_VARS += LIBEXECDIR
USER_VARS += LOCALSTATEDIR
USER_VARS += PKGDATADIR
USER_VARS += PKGLIBDIR
USER_VARS += PKGLIBEXECDIR
USER_VARS += PKGRUNDIR
USER_VARS += PREFIX
@@ -446,10 +420,8 @@ USER_VARS += QEMUPATH
USER_VARS += QEMUVALIDHYPERVISORPATHS
USER_VARS += QEMUVIRTIOFSCMD
USER_VARS += QEMUVIRTIOFSPATH
USER_VARS += QEMUVALIDVIRTIOFSPATHS
USER_VARS += RUNTIME_NAME
USER_VARS += SHAREDIR
USER_VARS += SHIMPATH
USER_VARS += SYSCONFDIR
USER_VARS += DEFVCPUS
USER_VARS += DEFMAXVCPUS
@@ -467,7 +439,6 @@ USER_VARS += DEFDISABLEBLOCK
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
USER_VARS += DEFSHAREDFS
USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
USER_VARS += DEFVIRTIOFSDAEMON
USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS
@@ -476,21 +447,15 @@ USER_VARS += DEFVIRTIOFSCACHE
USER_VARS += DEFVIRTIOFSEXTRAARGS
USER_VARS += DEFENABLEANNOTATIONS
USER_VARS += DEFENABLEIOTHREADS
USER_VARS += DEFENABLEMEMPREALLOC
USER_VARS += DEFENABLEHUGEPAGES
USER_VARS += DEFENABLEVHOSTUSERSTORE
USER_VARS += DEFVHOSTUSERSTOREPATH
USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS
USER_VARS += DEFFILEMEMBACKEND
USER_VARS += DEFVALIDFILEMEMBACKENDS
USER_VARS += DEFENABLESWAP
USER_VARS += DEFENABLEDEBUG
USER_VARS += DEFDISABLENESTINGCHECKS
USER_VARS += DEFMSIZE9P
USER_VARS += DEFHOTPLUGVFIOONROOTBUS
USER_VARS += DEFPCIEROOTPORT
USER_VARS += DEFENTROPYSOURCE
USER_VARS += DEFSANDBOXCGROUPONLY
USER_VARS += DEFBINDMOUNTS
USER_VARS += FEATURE_SELINUX
USER_VARS += BUILDFLAGS
@@ -632,7 +597,14 @@ generate-config: $(CONFIGS)
check: check-go-static
test: go-test
test: install-hook go-test
install-hook:
make -C virtcontainers hook
ifeq ($(shell id -u), 0)
echo "installing mock hook"
make -C virtcontainers install
endif
go-test: $(GENERATED_FILES)
go test -v -mod=vendor ./...
@@ -694,7 +666,8 @@ show-usage: show-header
@printf "• Additional targets:\n"
@printf "\n"
@printf "\tbuild : standard build (build everything).\n"
@printf "\tcheck : run tests.\n"
@printf "\ttest : run tests.\n"
@printf "\tcheck : run code checks.\n"
@printf "\tclean : remove built files.\n"
@printf "\tcontainerd-shim-v2 : only build containerd shim v2.\n"
@printf "\tcoverage : run coverage tests.\n"

View File

@@ -19,6 +19,8 @@ For details of the other Kata Containers repositories, see the
* [Quick start for developers](#quick-start-for-developers)
* [Architecture overview](#architecture-overview)
* [Configuration](#configuration)
* [Hypervisor specific configuration](#hypervisor-specific-configuration)
* [Stateless systems](#stateless-systems)
* [Logging](#logging)
* [Kata OCI](#kata-oci)
* [Kata containerd shimv2](#kata-containerd-shimv2)
@@ -65,7 +67,7 @@ The runtime has a built-in command to determine if your host system is capable
of running and creating a Kata Container:
```bash
$ kata-runtime kata-check
$ kata-runtime check
```
> **Note:**
@@ -106,6 +108,15 @@ The file contains comments explaining all options.
> You may need to modify this file to optimise or tailor your system, or if you have
> specific requirements.
### Hypervisor specific configuration
Kata Containers supports multiple hypervisors so your `configuration.toml`
configuration file may be a symbolic link to a hypervisor-specific
configuration file. See
[the hypervisors document](../../docs/hypervisors.md) for further details.
### Stateless systems
Since the runtime supports a
[stateless system](https://clearlinux.org/about),
it checks for this configuration file in multiple locations, two of which are
@@ -118,14 +129,14 @@ The below command lists the full paths to the configuration files that the
runtime attempts to load. The first path that exists will be used:
```bash
$ kata-runtime --kata-show-default-config-paths
$ kata-runtime --show-default-config-paths
```
Aside from the built-in locations, it is possible to specify the path to a
custom configuration file using the `--kata-config` option:
custom configuration file using the `--config` option:
```bash
$ kata-runtime --kata-config=/some/where/configuration.toml ...
$ kata-runtime --config=/some/where/configuration.toml ...
```
The runtime will log the full path to the configuration file it is using. See
@@ -135,7 +146,7 @@ To see details of your systems runtime environment (including the location of
the configuration file being used), run:
```bash
$ kata-runtime kata-env
$ kata-runtime env
```
## Logging

View File

@@ -1 +0,0 @@
../../VERSION

1
src/runtime/VERSION Normal file
View File

@@ -0,0 +1 @@
2.0.0

View File

@@ -9,10 +9,6 @@
// by the tests.
package main
import (
"fmt"
)
// name is the name of the runtime
const name = "@RUNTIME_NAME@"
@@ -36,14 +32,6 @@ var commit = "@COMMIT@"
// version is the runtime version.
var version = "@VERSION@"
// project-specific command names
var envCmd = fmt.Sprintf("%s-env", projectPrefix)
var checkCmd = fmt.Sprintf("%s-check", projectPrefix)
// project-specific option names
var configFilePathOption = fmt.Sprintf("%s-config", projectPrefix)
var showConfigPathsOption = fmt.Sprintf("%s-show-default-config-paths", projectPrefix)
// Default config file used by stateless systems.
var defaultRuntimeConfiguration = "@CONFIG_PATH@"

View File

@@ -208,6 +208,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# (default: disabled)
#enable_tracing = true
# Set the full url to the Jaeger HTTP Thrift collector.
# The default if not set will be "http://localhost:14268/api/traces"
#jaeger_endpoint = ""
# Sets the username to be used if basic auth is required for Jaeger.
#jaeger_user = ""
# Sets the password to be used if basic auth is required for Jaeger.
#jaeger_password = ""
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `enable_netmon`
@@ -235,4 +245,4 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true
# enable_pprof = true

View File

@@ -224,6 +224,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# (default: disabled)
#enable_tracing = true
# Set the full url to the Jaeger HTTP Thrift collector.
# The default if not set will be "http://localhost:14268/api/traces"
#jaeger_endpoint = ""
# Sets the username to be used if basic auth is required for Jaeger.
#jaeger_user = ""
# Sets the password to be used if basic auth is required for Jaeger.
#jaeger_password = ""
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `enable_netmon`
@@ -242,6 +252,12 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
# These will not be exposed to the container workloads, and are only provided for potential guest services.
sandbox_bind_mounts=@DEFBINDMOUNTS@
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.
@@ -251,4 +267,4 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true
# enable_pprof = true

View File

@@ -333,6 +333,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# (default: disabled)
#enable_tracing = true
# Set the full url to the Jaeger HTTP Thrift collector.
# The default if not set will be "http://localhost:14268/api/traces"
#jaeger_endpoint = ""
# Sets the username to be used if basic auth is required for Jaeger.
#jaeger_user = ""
# Sets the password to be used if basic auth is required for Jaeger.
#jaeger_password = ""
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `enable_netmon`
@@ -360,4 +370,4 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true
# enable_pprof = true

View File

@@ -120,8 +120,8 @@ default_memory = @DEFMEMSZ@
disable_block_device_use = @DEFDISABLEBLOCK@
# Shared file system type:
# - virtio-9p (default)
# - virtio-fs
# - virtio-fs (default)
# - virtio-9p
shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@"
# Path to vhost-user-fs daemon.
@@ -241,6 +241,10 @@ valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
# The behaviour is undefined if mem_prealloc is also set to true
#enable_swap = true
# -pflash can add image file to VM. The arguments of it should be in format
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
pflashes = []
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available.
#
@@ -319,6 +323,26 @@ valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
# Default 0-sized value means unlimited rate.
#tx_rate_limiter_max_rate = 0
# Set where to save the guest memory dump file.
# If set, when GUEST_PANICKED event occurred,
# guest memeory will be dumped to host filesystem under guest_memory_dump_path,
# This directory will be created automatically if it does not exist.
#
# The dumped file(also called vmcore) can be processed with crash or gdb.
#
# WARNING:
# Dump guests memory can take very long depending on the amount of guest memory
# and use much disk space.
#guest_memory_dump_path="/var/crash/kata"
# If enable paging.
# Basically, if you want to use "gdb" rather than "crash",
# or need the guest-virtual addresses in the ELF vmcore,
# then you should enable paging.
#
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
#guest_memory_dump_paging=false
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
@@ -459,6 +483,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# (default: disabled)
#enable_tracing = true
# Set the full url to the Jaeger HTTP Thrift collector.
# The default if not set will be "http://localhost:14268/api/traces"
#jaeger_endpoint = ""
# Sets the username to be used if basic auth is required for Jaeger.
#jaeger_user = ""
# Sets the password to be used if basic auth is required for Jaeger.
#jaeger_password = ""
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `enable_netmon`
@@ -477,6 +511,12 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
# These will not be exposed to the container workloads, and are only provided for potential guest services.
sandbox_bind_mounts=@DEFBINDMOUNTS@
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.
@@ -486,4 +526,4 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true
# enable_pprof = true

View File

@@ -73,7 +73,7 @@ func (s *cacheServer) GetBaseVM(ctx context.Context, empty *types.Empty) (*pb.Gr
return nil, errors.Wrapf(err, "failed to GetBaseVM")
}
return vm.ToGrpc(config)
return vm.ToGrpc(ctx, config)
}
func (s *cacheServer) quit() {

View File

@@ -133,17 +133,25 @@ func getCPUFlags(cpuinfo string) string {
// haveKernelModule returns true if the specified module exists
// (either loaded or available to be loaded)
func haveKernelModule(module string) bool {
kmodLog := kataLog.WithField("module", module)
// First, check to see if the module is already loaded
path := filepath.Join(sysModuleDir, module)
if katautils.FileExists(path) {
return true
}
// Only root can load modules
if os.Getuid() != 0 {
kmodLog.Error("Module is not loaded and it can not be inserted. Please consider running with sudo or as root")
return false
}
// Now, check if the module is unloaded, but available.
// And modprobe it if so.
cmd := exec.Command(modProbeCmd, module)
if output, err := cmd.CombinedOutput(); err != nil {
kataLog.WithField("module", module).WithError(err).Warnf("modprobe insert module failed: %s", string(output))
kmodLog.WithError(err).WithField("output", string(output)).Warnf("modprobe insert module failed")
return false
}
return true
@@ -305,8 +313,9 @@ func genericHostIsVMContainerCapable(details vmContainerCapableDetails) error {
}
var kataCheckCLICommand = cli.Command{
Name: checkCmd,
Usage: "tests if system can run " + project,
Name: "check",
Aliases: []string{"kata-check"},
Usage: "tests if system can run " + project,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "check-version-only",
@@ -343,36 +352,36 @@ EXAMPLES:
- Perform basic checks:
$ %s %s
$ %s check
- Local basic checks only:
$ %s %s --no-network-checks
$ %s check --no-network-checks
- Perform further checks:
$ sudo %s %s
$ sudo %s check
- Just check if a newer version is available:
$ %s %s --check-version-only
$ %s check --check-version-only
- List available releases (shows output in format "version;release-date;url"):
$ %s %s --only-list-releases
$ %s check --only-list-releases
- List all available releases (includes pre-release versions):
$ %s %s --only-list-releases --include-all-releases
$ %s check --only-list-releases --include-all-releases
`,
project,
noNetworkEnvVar,
name, checkCmd,
name, checkCmd,
name, checkCmd,
name, checkCmd,
name, checkCmd,
name, checkCmd,
name,
name,
name,
name,
name,
name,
),
Action: func(context *cli.Context) error {
@@ -385,8 +394,8 @@ EXAMPLES:
return err
}
span, _ := katautils.Trace(ctx, "kata-check")
defer span.Finish()
span, _ := katautils.Trace(ctx, "check")
defer span.End()
if !context.Bool("no-network-checks") && os.Getenv(noNetworkEnvVar) == "" {
cmd := RelCmdCheck
@@ -412,7 +421,7 @@ EXAMPLES:
runtimeConfig, ok := context.App.Metadata["runtimeConfig"].(oci.RuntimeConfig)
if !ok {
return errors.New("kata-check: cannot determine runtime config")
return errors.New("check: cannot determine runtime config")
}
err = setCPUtype(runtimeConfig.HypervisorType)

Some files were not shown because too many files have changed in this diff Show More