Compare commits

..

366 Commits

Author SHA1 Message Date
Fabiano Fidêncio
e754ff37e4 Merge pull request #2015 from fidencio/2.2.0-alpha0-branch-bump
# Kata Containers 2.2.0-alpha0
2021-06-11 18:51:08 +02:00
Julio Montes
6e7b55baa9 Merge pull request #1995 from GabyCT/topic/removetravisreference
docs: Remove old travis reference
2021-06-11 09:23:47 -05:00
Fabiano Fidêncio
54832cd052 release: Kata Containers 2.2.0-alpha0
- Update CC=gcc setting for Fedora s390x
- osbuilder: Streamline s390x CMake & musl handling
- runtime: remove the call to storeSandbox at the end of createSandboxFromConfig
- virtcontainers: Add support for Secure Execution
- agent: Conform to the latest nix version (0.21.0)
- docs: Update the stable branch strategy to what was proposed in our ML
- runtime: add more traces for network
- tools/packaging: clone meson and dependencies before building QEMU
- runtime: remove covertool from cli test
- factory: Use lazy unmount
- docs: Fix Release Process document
- Add sandbox and container ID to trace spans
- agent: Fix fd leak caused by netlink
- metrics: Add virtiofsd exporter
- versions: Update kubernetes to 1.21.1
- tracing: Add basic VSOCK tracing
- agent: Upgrade tokio-vsock to fix fd leak of vsock socket
- runtime: fix some comments and logs
- runtime: Add support for PEF
- cleanup TODOs in runtime
- tracing: Make runtime span attributes more consistent
- virtiofsd: refactor qemu.go to use code in virtiofsd.go
- runtime: remove unused doc.go
- cgroup: fix the issue of set mem.limit and mem.swap
- agent: re-enable the standard SIGPIPE behavior
- virtiofsd: Fix file descriptors leak and return correct PID
- runtime: and cgroup and SandboxCgroupOnly check for check sub-command
- kernel: add ppc64le fragments
- docs: Use --ignore-preflight-errors=all flag
- agent: fix start container failed when dropping all capabilities
- agent: Remove unnecessary underscore(_) variables
- docs: Add instructions for getting QEMU source
- qemu: align before memory hotplug on arm64
- workflows: release kata 2.x snap through the stable channel
- Sandbox bindmount cleanup
- docs: Update add customer agent command
- agent: Stop relying in the unmaintained prctl crate
- how-to-use-virtio-mem-with-kata.md: Update doc to make it clear
- docs: Add document for memory hotplug on arm64
- github: Run require porting labels only at main
- kernel: add confidential guest build option
- rustjail: separated the propagation flags from mount flags
- runtime: improve sandbox cleanup logic
- docs: add note for connecting debug console for old versions
- image_build: align image size to 128M for arm64
- agent: avoid reaping the exit signal of execute_hook in the reaper
- agent: move the dependency tempfile to the dev-dependencies section
- docs: Document test repository changes when creating a stable branch
- docs: Remove horizontal ruler markers that disable spell checks
- docs/Developer-Guide: Add instructions to apply QEMU patches
- runtime: make dialing timeout configurable
- Get sandbox metrics cli
- Support TDx
- packaging/kata-cleanup: add k3s containerd volume
- osbuilder: Upgrade alpine version to 3.13.5
- Monitor cleanup
- Open CONFIG_VIRTIO_MEM in x86_64 Linux kernel
- agent: delete code which is no longer used
- cli: delete tracing code for kata-runtime binary
- docs: add per-Pod Kata configurations for `enable_pprof`
- Fix issue of virtio-mem
- Set fixed NOFILE limit value for kata-agent
- ci/install_yq.sh: install_yq: Check version before return
- runtime: use s.ctx instead ctx for checking cancellation
- runtime: fix some comments

a1247bc0 agent: Conform to the latest nix version (0.21.0)
3130e66d runtime: remove storeSandbox at the end of createSandboxFromConfig
7593ebf9 runtime: Use CC=gcc on Fedora s390x
a484d6db osbuilder: Streamline s390x CMake & musl handling
da2d9ab8 osbuilder: Remove CC=gcc for Fedora s390x
c0c05c73 virtcontainers: Add support for Secure Execution
78f21710 virtcontainers/s390x: Put consts into one block
784025bb runtime: add more traces for network
9ec9bbba tools/packaging: clone meson and dependencies before building QEMU
9158ec68 docs: Fix Release Process document
9e3349c1 agent: Fix fd leak caused by netlink
3d0e0b27 tracing: Add network model to span
8ca02072 tracing: Add sandbox and container ID to trace spans
a9a0eccf tracing: Add basic VSOCK tracing
2234b730 metrics: Add virtiofsd exporter
9bf781d7 agent: Upgrade tokio-vsock to fix fd leak of vsock socket
b68334a1 runtime: fix some comments and logs
1f5b229b runtime: remove FIXME in SandboxState about CgroupPath
fee0004a runtime: remove TODO about hot add memory in qemu.go
2e29ef9c runtime: remove TODO comment from StatusContainer
72cd8f5e virtiofsd: refactor qemu.go to use code in virtiofsd.go
0b22c48d runtime: remove unused doc.go
30f4834c cgroup: fix the issue of set mem.limit and mem.swap
0ae364c8 agent: re-enable the standard SIGPIPE behavior
05a46fed tracing: Make runtime span attributes more consistent
727bfc45 runtime: and cgroup and SandboxCgroupOnly check for check sub-command
b25ad1ab tracing: Make trace-forwarder async
45f02227 tracing: Add trace points
773deca2 virtiofsd: Fix file descriptors leak and return correct PID
37a426b4 runtime: Add support for PEF
fe670c5d docs: Use --ignore-preflight-errors=all flag
5b5047bd docs: Add instructions for getting QEMU source
3e4ebe10 agent: fix start container failed when dropping all capabilities
9a43d76d workflows: release kata 2.x snap through the stable channel
7f1030d3 sandbox-bindmount: persist mount information
089a7484 sandbox: Cleanup if failure to setup sandbox-bindmount occurs
f65acc20 docs: Update add customer agent command
20a382c1 agent: Remove unnecessary underscore(_) variables
4b88532c docs: Don't use Docker as an example of a container manager
4142e424 docs: Don't mention 1.x components as part of the stable branch strategy
a0af2bd7 docs: Use stable-2.x / 2.x.y as example in the branch strategy document
a5e1f66a docs: Maintain only one stable branch per major release
419773b8 docs: Emphasize behaviour changes may be a reason for a major bump
54a75008 docs: Refer to `main` branch in the stable branch strategy document
7dde0b5d kernel: add ppc64le fragments
84906181 kernel: skip fragments for ppc64le
9676b86b kernel: move CONFIG_RANDOMIZE_BASE
bd0cde40 factory: Use lazy unmount
f52468be agent/agent-ctl: Replace prctl crate by the capctl one
d289b1d6 agent-ctl: Perform a `cargo update`
bc36b7b4 qemu: align before memory hotplug on arm64
8aefc793 agent: Perform a `cargo update`
785be0bb how-to-use-virtio-mem-with-kata.md: Update doc to make it clear
f8a16c17 kernel: add confidential guest build option
a65f11ea docs: Add document for memory hotplug on arm64
1b607056 runtime: remove covertool from cli test
fc42dc07 github: Run require porting labels only at main
dbef2b29 versions: Update kubernetes to 1.21.1
35151f17 runtime: sandbox delete should succeed after verifying sandbox state
e5fe572f rustjail: separated the propagation flags from mount flags
ffbb4d9b docs: add note for connecting debug console for old versions
a5bb383c agent: avoid reaping the exit signal of execute_hook in the reaper
ce7a5ba2 agent: move the dependency tempfile to the dev-dependencies section
e24e9462 docs/Developer-Guide: Add instructions to apply QEMU patches
850cf8cd docs: Document test repository changes when creating a stable branch
8068a469 kata-runtime: add `metrics` command
37873061 kata-monitor: export get stats for sandbox
01b56d6c runtime: make dialing timeout configurable
e8038718 osbuilder: Upgrade alpine version to 3.13.5
3caed6f8 runtime: shim: dedup client, socket addr code
4bc006c8 runtime: Short the shim-monitor path
5fdf617e docs: Fix spell-check errors found after new text is discovered
42425456 docs: Remove horizontal ruler markers that disable spell checks
3883e4e2 kernel: configs: Open CONFIG_VIRTIO_MEM in x86_64 Linux kernel
4f61f4b4 virtcontainers: Support TDX
0affe886 virtcontainers: define confidential guest framework
539afba0 runtime: define config options to enable confidential computing
79831faf runtime: use s.ctx instead ctx for checking cancellation
f6d5fbf9 runtime: fix some comments
9381e5f3 packaging/kata-cleanup: add k3s containerd volume
7f7c3fc8 qemu.go: qemu: resizeMemory: Fix virtio-mem resize overflow issue
c9053ea3 qemu.go: qemu: setupVirtioMem: let sizeMB be multiple of 2Mib
a188577e agent: Set fixed NOFILE limit value for kata-agent
88cf3db6 runtime: implement CPUFlags function
2b0d5b25 image_build: align image size to 128M for arm64
d601ae34 agent: delete not used comments
6038da19 agent: delete rustjail/src/configs directory
84ee8aa8 agent: delete not used functions
d8896157 ci/install_yq.sh: install_yq: Check version before return
95e54e3f docs: add per-Pod Kata configurations for enable_pprof
13c23fec cli: delete tracing code for kata-runtime binary

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-06-11 16:10:01 +02:00
Fabiano Fidêncio
dc4307d3cc Merge pull request #1974 from Jakob-Naucke/fix-cc-fedora-s390x
Update CC=gcc setting for Fedora s390x
2021-06-11 00:31:51 +02:00
Fabiano Fidêncio
bd195d67d4 Merge pull request #1976 from Jakob-Naucke/streamline-s390x-osbuilder
osbuilder: Streamline s390x CMake & musl handling
2021-06-11 00:31:34 +02:00
Fabiano Fidêncio
24bbcf58d3 Merge pull request #1981 from LiangZhou-CTY/patch-1
runtime: remove the call to storeSandbox at the end of createSandboxFromConfig
2021-06-11 00:30:39 +02:00
Gabriela Cervantes
a668f310c3 docs: Remove old travis reference
This PR removes the travis reference as we currently for kata 2.0,
travis is not being supported.

Fixes #1994

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-06-10 17:17:17 -05:00
Fabiano Fidêncio
8239f6fc17 Merge pull request #1772 from Jakob-Naucke/sec-exec
virtcontainers: Add support for Secure Execution
2021-06-11 00:02:01 +02:00
Fupan Li
9d84272dd1 Merge pull request #1988 from ManaSugi/conform-to-latest-nix
agent: Conform to the latest nix version (0.21.0)
2021-06-10 17:17:03 +08:00
Samuel Ortiz
15e3d1656b Merge pull request #1877 from fidencio/wip/update-stable-branch-strategy
docs: Update the stable branch strategy to what was proposed in our ML
2021-06-10 10:26:31 +02:00
Manabu Sugimoto
a1247bc0bb agent: Conform to the latest nix version (0.21.0)
We need to fix some agent's code to conform to the latest nix crate
to be able to use new features of the nix.

Fixes: #1987

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-06-10 16:58:51 +09:00
Liang Zhou
3130e66d33 runtime: remove storeSandbox at the end of createSandboxFromConfig
Remove storeSandbox() at the end of createSandboxFromConfig(),
because this callchain createSandboxFromConfig -> createContainers
has already calls storeSandbox().
This can improve the startup speed of the container,
even just for a little.

Fixes: #1980

Signed-off-by: Liang Zhou <zhoul110@chinatelecom.cn>
2021-06-10 11:56:40 +08:00
Tim Zhang
f26837a0f1 Merge pull request #1967 from liubin/fix/1956-add-more-traces-for-network
runtime: add more traces for network
2021-06-10 10:56:42 +08:00
Jakob Naucke
7593ebf947 runtime: Use CC=gcc on Fedora s390x
This was fixed for the Go agent back in
https://github.com/kata-containers/osbuilder/issues/217, but is also
required for the runtime.

Fixes: #1973

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-06-08 16:36:24 +02:00
Fabiano Fidêncio
16a835e4a0 Merge pull request #1966 from devimc/2021-06-04/fixSnapCerts
tools/packaging: clone meson and dependencies before building QEMU
2021-06-08 10:36:53 +02:00
Jakob Naucke
a484d6db87 osbuilder: Streamline s390x CMake & musl handling
- Merge codepath in lib.sh with ppc64le -- do not install CMake
- Like ppc64le, do not install musl rather than just not using it

Fixes: #1975

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-06-07 17:39:31 +02:00
Jakob Naucke
da2d9ab813 osbuilder: Remove CC=gcc for Fedora s390x
since that was required specifically for the Go agent

Fixes: #1973

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-06-07 17:32:05 +02:00
Fabiano Fidêncio
208ab60e1e Merge pull request #1863 from zhsj/drop-covertool
runtime: remove covertool from cli test
2021-06-07 16:21:51 +02:00
Fabiano Fidêncio
51ac042cad Merge pull request #939 from keloyang/detach
factory: Use lazy unmount
2021-06-07 13:26:16 +02:00
Jakob Naucke
c0c05c73e1 virtcontainers: Add support for Secure Execution
Secure Execution is a confidential computing technology on s390x (IBM Z
& LinuxONE). Enable the correspondent virtualization technology in QEMU
(where it is referred to as "Protected Virtualization").

- Introduce enableProtection and appendProtectionDevice functions for
  QEMU s390x.
- Introduce CheckCmdline to check for "prot_virt=1" being present on the
  kernel command line.
- Introduce CPUFacilities and avilableGuestProtection for hypervisor
  s390x to check for CPU support.

Fixes: #1771

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-06-07 10:50:33 +02:00
Jakob Naucke
78f21710e3 virtcontainers/s390x: Put consts into one block
Previously, all consts were in single lines in
virtcontainers/qemu_s390x.go. Put them into a const block.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-06-07 10:50:30 +02:00
bin
784025bb08 runtime: add more traces for network
Add traces for all the endpoinnt types
and the main interface functions.
Record errors for some traces.

Fixes: #1956

Signed-off-by: bin <bin@hyper.sh>
2021-06-07 11:38:40 +08:00
Julio Montes
a57118d03a Merge pull request #1961 from GabyCT/topic/fixreleasedoc
docs: Fix Release Process document
2021-06-04 14:59:11 -05:00
Julio Montes
9ec9bbbabc tools/packaging: clone meson and dependencies before building QEMU
In some distros (Ubuntu 18 and 20) it's not possible to clone meson
and QEMU dependencies from https://git.qemu.org due to problems with
its certificates, let's pull these dependencies from github before
building QEMU.

fixes #1965

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-06-04 14:52:39 -05:00
Chelsea Mafrica
60806ce3c8 Merge pull request #1957 from cmaf/tracing-attributes-sandboxID-1
Add sandbox and container ID to trace spans
2021-06-04 09:10:05 -07:00
Gabriela Cervantes
9158ec68cc docs: Fix Release Process document
This PR updates the correct url for github actions as well as it
corrects a misspelling.

Fixes #1960

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-06-03 12:51:25 -05:00
Tim Zhang
1255b83427 Merge pull request #1955 from Tim-Zhang/fix-fd-leak-of-netlink
agent: Fix fd leak caused by netlink
2021-06-03 20:15:15 +08:00
Tim Zhang
9e3349c18e agent: Fix fd leak caused by netlink
See also: little-dude/netlink#165

Fixes: #1952

Because the author of netlink has no time to maintain the crate
(https://github.com/little-dude/netlink/issues/161), so we
need to switch the dependency to github temporarily.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-06-03 17:23:37 +08:00
Chelsea Mafrica
3d0e0b2786 tracing: Add network model to span
Trace spans erroneously set the network model to default in all cases.
Add function to return network model string and use it to set attribute
in spans.

Fixes #1878

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-06-02 21:53:54 -07:00
Chelsea Mafrica
8ca0207281 tracing: Add sandbox and container ID to trace spans
Add sandbox, container, and hypervisor IDs to trace spans. Note that
some spans in sandbox.go are created with a trace() call from api.go.
These spans have additional attributes set after span creation to
overwrite the api attributes.

Fixes #1878

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-06-02 21:53:54 -07:00
Bin Liu
1673110ee9 Merge pull request #1930 from jcvenegas/kata-moinitor-export-virtiofsd
metrics: Add virtiofsd exporter
2021-06-03 10:38:55 +08:00
Fabiano Fidêncio
fd59030031 Merge pull request #1851 from fidencio/wip/bump-kubernetes-version-to-1.21.1
versions: Update kubernetes to 1.21.1
2021-06-02 23:41:57 +02:00
Chelsea Mafrica
33c12b6d08 Merge pull request #1929 from jodh-intel/add-agent-tracing
tracing: Add basic VSOCK tracing
2021-06-02 11:45:41 -07:00
James O. D. Hunt
a9a0eccf33 tracing: Add basic VSOCK tracing
Implement an openTelemetry custom exporter that sends trace spans to a
VSOCK socket. A VSOCK-to-span converter (such as the Kata trace
forwarder) needs to be running on the host to allow systems like Jaeger
to capture the trace spans.

By default, tracing is not enabled (meaning a NOP tracer is used). To
activate tracing, set the `agent.kata.enable_tracing=true` in the
configuration file.

The type of tracing this change introduces is "static isolated"
tracing. See [1] for further details.

> **Note:**
>
> This change only provides the foundational changes for agent
> tracing work. The feature is _not_ yet complete since it does
> not yet show the correct trace hierarchy.

Fixes: #60.

[1] - https://github.com/kata-containers/agent/blob/master/TRACING.md

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-06-02 18:00:05 +01:00
Carlos Venegas
2234b73090 metrics: Add virtiofsd exporter
Export proc stats for virtiofsd.

This commit only adds for hypervisors that have support for it.

- qemu
- cloud-hypervisor

Fixes: #1926

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-06-02 16:06:00 +00:00
Julio Montes
1f0964bad8 Merge pull request #1951 from Tim-Zhang/fix-fd-leak-of-vsock
agent: Upgrade tokio-vsock to fix fd leak of vsock socket
2021-06-02 09:41:10 -05:00
Tim Zhang
9bf781d704 agent: Upgrade tokio-vsock to fix fd leak of vsock socket
Fixes: #1950

The further information: rust-vsock/vsock-rs#15

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-06-02 16:03:09 +08:00
Tim Zhang
476ec9bd86 Merge pull request #1948 from liubin/fix/1947-fix-comments
runtime: fix some comments and logs
2021-06-02 10:52:01 +08:00
Pradipta Banerjee
604e3a6fa1 Merge pull request #1882 from Amulyam24/pef
runtime: Add support for PEF
2021-06-01 12:56:53 +05:30
Peng Tao
41e04495f4 Merge pull request #1943 from bergwolf/cleanup2
cleanup TODOs in runtime
2021-06-01 14:16:46 +08:00
Chelsea Mafrica
bcde703b36 Merge pull request #1859 from cmaf/tracing-attributes-1
tracing: Make runtime span attributes more consistent
2021-05-31 21:57:58 -07:00
bin
b68334a1a8 runtime: fix some comments and logs
This commit fix some conments/logs.
And add some logs for debug.

Fixes: #1947

Signed-off-by: bin <bin@hyper.sh>
2021-06-01 09:04:18 +08:00
Bin Liu
d1ac0a1a2c Merge pull request #1938 from liubin/fix/1933-virtiofsd-refactor
virtiofsd: refactor qemu.go to use code in virtiofsd.go
2021-06-01 08:32:56 +08:00
Fabiano Fidêncio
d7b6e3e178 Merge pull request #1942 from bergwolf/cleanup
runtime: remove unused doc.go
2021-05-31 22:41:24 +02:00
Peng Tao
1f5b229bef runtime: remove FIXME in SandboxState about CgroupPath
It is in real life usage as we put non constrained sandbox processes
(like shim) in a separate cgroup path.

Fixes: #1944
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-05-29 13:17:14 +08:00
Peng Tao
fee0004ad4 runtime: remove TODO about hot add memory in qemu.go
Already addressed by https://github.com/kata-containers/runtime/pull/786

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-05-29 11:15:50 +08:00
Peng Tao
2e29ef9cab runtime: remove TODO comment from StatusContainer
It is no longer valid as containerd already doesn't treat container pid
as host process pid.

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-05-29 11:10:32 +08:00
bin
72cd8f5ef6 virtiofsd: refactor qemu.go to use code in virtiofsd.go
CloudHypervisor is using virtiofsd.go to manage virtiofsd process,
but qemu has its code in qemu.go. This commit let qemu to re-use
code in virtiofsd.go to reduce code and improve maintenanceability.

Fixes: #1933

Signed-off-by: bin <bin@hyper.sh>
2021-05-29 11:00:05 +08:00
Peng Tao
0b22c48d2a runtime: remove unused doc.go
It doesn't even contain any actual code there.

Fixes: #1941
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-05-29 10:25:29 +08:00
Peng Tao
c455d84571 Merge pull request #1918 from lifupan/main
cgroup: fix the issue of set mem.limit and mem.swap
2021-05-29 10:05:44 +08:00
Peng Tao
fd6d32ee42 Merge pull request #1939 from lifupan/fix_epipe
agent: re-enable the standard SIGPIPE behavior
2021-05-29 10:05:09 +08:00
Fabiano Fidêncio
bcf78a18ae Merge pull request #1932 from liubin/fix/1931-virtiofsd-fd-leak-and-return-right-pid
virtiofsd: Fix file descriptors leak and return correct PID
2021-05-28 12:29:56 +02:00
Fupan Li
a761e980e4 Merge pull request #1934 from liubin/fix/1927-check-SandboxCgroupOnly-and-cgroup-v2
runtime: and cgroup and SandboxCgroupOnly check for check sub-command
2021-05-28 16:43:44 +08:00
fupan.lfp
30f4834c5b cgroup: fix the issue of set mem.limit and mem.swap
When update memory limit, we should adapt the write sequence
for memory and swap memory, so it won't fail because
the new value and the old value don't fit kernel's
validation.

Fixes: #1917

Signed-off-by: fupan.lfp <fupan.lfp@antgroup.com>
2021-05-28 15:44:14 +08:00
fupan.lfp
0ae364c8eb agent: re-enable the standard SIGPIPE behavior
The Rust standard library had suppressed the default SIGPIPE
behavior, see https://github.com/rust-lang/rust/pull/13158.
Since the parent's signal handler would be inherited by it's child
process, thus we should re-enable the standard SIGPIPE behavior as a
workaround.

Fixes: #1887

Signed-off-by: fupan.lfp <fupan.lfp@antgroup.com>
2021-05-28 15:25:05 +08:00
Chelsea Mafrica
05a46fede0 tracing: Make runtime span attributes more consistent
Span attributes (tags) are not consistent in runtime tracing, so
designate and use core attributes such source, package, subsystem, and
type as span metadata for more understandable output.

Use WithAttributes() during span creation to reduce calls to
SetAttributes().

Modify Trace() in katautils to accept slice of attributes so multiple
functions using different attributes can use it.

Fixes #1852

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-05-27 10:07:11 -07:00
bin
727bfc4556 runtime: and cgroup and SandboxCgroupOnly check for check sub-command
In kata-runtime check sub-command, checks cgroups and SandboxCgroupOnly
to show message if the SandboxCgroupOnly is not set to true
and cgroup v2 is used.

Fixes: #1927

Signed-off-by: bin <bin@hyper.sh>
2021-05-27 21:19:12 +08:00
James O. D. Hunt
b25ad1ab2c tracing: Make trace-forwarder async
The tracing crates are now async, so update the trace forwarder to use
the new API.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-05-27 10:48:05 +01:00
James O. D. Hunt
45f02227b2 tracing: Add trace points
Use the tracing crate to create automatic trace spans for the _majority_
of top-level modules.

Note that not all functions in the top-level modules can be traced:

- Some functions cannot be traced due to the requirement that all
  function parameters implement the `Debug` trait. In some cases (such
  as `netlink.rs`), objects are being passed that are defined in
  different crates and which do not implement `Debug`.
- Some functions may never return (`signal.rs`).
- Some functions are inlined.
- Some functions are very simple getter/setter functions.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-05-27 10:42:58 +01:00
bin
773deca2f6 virtiofsd: Fix file descriptors leak and return correct PID
This commit will fix two problems:
- Virtiofsd process ID returned to the caller will always be 0,
   the pid var is never being assigned a value.
- Socket listen fd may leak in case of failure of starting virtiofsd process.
  This is a port of be9ca0d58b

Fixes: #1931

Signed-off-by: bin <bin@hyper.sh>
2021-05-27 16:51:41 +08:00
Amulyam24
37a426b4c6 runtime: Add support for PEF
Protected Execution Facility(PEF) is the confidential computing
technology on ppc64le. This PR adds the support for it in Kata.
Also re-vendor govmm for the latest changes.

Fixes: #1881

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-05-25 14:29:42 +00:00
Peng Tao
35f297ad50 Merge pull request #1899 from Amulyam24/fragments
kernel: add ppc64le fragments
2021-05-25 10:18:55 +08:00
Fabiano Fidêncio
9fb301f4df Merge pull request #1920 from ManaSugi/update-kubeadm-flag
docs: Use --ignore-preflight-errors=all flag
2021-05-24 21:12:12 +02:00
Fabiano Fidêncio
c3f6c88668 Merge pull request #1915 from quanweiZhou/fix_start_container_failed_when_drop_all_caps
agent: fix start container failed when dropping all capabilities
2021-05-24 14:13:52 +02:00
Tim Zhang
005e5ddedc Merge pull request #1905 from ManaSugi/del_underscore_var
agent: Remove unnecessary underscore(_) variables
2021-05-24 17:39:48 +08:00
Manabu Sugimoto
fe670c5de5 docs: Use --ignore-preflight-errors=all flag
The --skip-preflight-checks flag has been deprecated in the Kubernetes v1.9
and removed from Kubernetes v1.12.
We should use --ignore-preflight-errors=all flag instead of --skip-preflight-checks.

Fixes: #1919

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-05-24 18:06:41 +09:00
Fabiano Fidêncio
852aa9454f Merge pull request #1908 from stevenhorsman/add-doc-for-getting-qemu
docs: Add instructions for getting QEMU source
2021-05-24 10:31:03 +02:00
Fupan Li
c09d8fcfda Merge pull request #1888 from yuanzhe-liu0/qemu_align
qemu: align before memory hotplug on arm64
2021-05-24 16:13:01 +08:00
stevenhorsman
5b5047bd4a docs: Add instructions for getting QEMU source
Update the developer guide to add instructions of how to get the
correct version of the QEMU source and sets your_qemu_directory
variable, so that follow on steps are easier for a new joiner to the
community to understand

Fixes #1907

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2021-05-23 10:23:18 +01:00
quanweiZhou
3e4ebe10ac agent: fix start container failed when dropping all capabilities
When starting a container and dropping all capabilities,
the init child process has no permission to read the exec.fifo
file because the parent set the file mode 0o622. So change the exec.fifo file mode to 0o644.

fixes #1913

Signed-off-by: quanweiZhou <quanweiZhou@linux.alibaba.com>
2021-05-22 17:33:49 +08:00
Fabiano Fidêncio
c078628544 Merge pull request #1910 from devimc/2021-05-21/snap/updateChannels
workflows: release kata 2.x snap through the stable channel
2021-05-22 09:21:58 +02:00
Julio Montes
9a43d76d5e workflows: release kata 2.x snap through the stable channel
kata 1.x has been deprecated, now kata 2.x can be released through
the stable channel

fixes #1909

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-05-21 15:48:48 -05:00
Fabiano Fidêncio
7dc1d32017 Merge pull request #1897 from egernst/sandbox-bindmount-cleanup
Sandbox bindmount cleanup
2021-05-21 22:36:12 +02:00
GabyCT
6f3b1bb796 Merge pull request #1906 from stevenhorsman/fix-add-customer-agent-command
docs: Update add customer agent command
2021-05-21 15:04:04 -05:00
Eric Ernst
7f1030d303 sandbox-bindmount: persist mount information
Without this, if the shim dies, we will not have a reliable way to
identify what mounts should be cleaned up if `containerd-shim-kata-v2
cleanup` is called for the sandbox.

Before this, if you `ctr run` with a sandbox bindmount defined and SIGKILL the
containerd-shim-kata-v2, you'll notice the sandbox bindmount left on
host.

With this change, the shim is able to get the sandbox bindmount
information from disk and do the appropriate cleanup.

Fixes #1896

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-05-21 12:54:35 -07:00
Eric Ernst
089a7484e1 sandbox: Cleanup if failure to setup sandbox-bindmount occurs
If for any reason there's an error when trying to setup the sandbox
bindmounts, make sure we roll back any mounts already created when
setting up the sandbox.

Without this, we'd leave shared directory mount and potentially
sandbox-bindmounts on the host.

Fixes: #1895

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-05-21 12:54:35 -07:00
stevenhorsman
f65acc20dc docs: Update add customer agent command
Update the developer guide to correct the
command that adds a customer kata-agent to the rootfs image
putting it in /usr/bin/kata-agent rather than /bin/kata-agent

Fixes #1904

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2021-05-21 15:42:04 +01:00
Manabu Sugimoto
20a382c158 agent: Remove unnecessary underscore(_) variables
We should remove underscore(_) prefixed variables when ? operator is
used.

Fixes: #1903

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-05-21 17:45:34 +09:00
Fabiano Fidêncio
4b88532c2f docs: Don't use Docker as an example of a container manager
Let's update the doc to use Containerd and CRI-O as examples, which fits
better the 2.x world.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-21 09:52:39 +02:00
Fabiano Fidêncio
4142e42465 docs: Don't mention 1.x components as part of the stable branch strategy
Let's slightly rewrite the text to ensure users of 2.x that never had
contact with kata-containers 1.x would be able to understand the
sentences.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-21 09:52:39 +02:00
Fabiano Fidêncio
a0af2bd7dc docs: Use stable-2.x / 2.x.y as example in the branch strategy document
This may help to reduce some confusion as 1.x was a totally different
thing for the project.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-21 09:52:39 +02:00
Fabiano Fidêncio
a5e1f66a15 docs: Maintain only one stable branch per major release
This is a proposal that was sent to the ML and can be accessed via
http://lists.katacontainers.io/pipermail/kata-dev/2021-May/001894.html

Shortly, the proposal is to maintain only one stable branch per major
active release.

This will help the developers and the CI maintainers, to spend more time
on what's coming, rather on backporting and debugging issues with old
releases; while still providing one stable branch that downstream
companies can rely on.

Hopefully, with this we'll be able to lower the maintainance burden and
spend more time on getting things rock solid / move forward in a faster
pace with the project.

Fixes: #1876

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-21 09:52:39 +02:00
Fabiano Fidêncio
419773b8df docs: Emphasize behaviour changes may be a reason for a major bump
The current wording is good, but we could emphasize better that changes
on behaviour from a previous release by simply making the text bold
rather than italic.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-21 09:52:39 +02:00
Fabiano Fidêncio
54a750086d docs: Refer to main branch in the stable branch strategy document
As there's no active `master` branch as part of kata-containers 2.x,
let's avoid referring to it, and let's referr to the `main` branch
instead.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-21 09:52:39 +02:00
Amulyam24
7dde0b5d84 kernel: add ppc64le fragments
Adding support for ppc64le kernel fragments.

Fixes: #1898

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-05-21 07:06:45 +00:00
Amulyam24
8490618125 kernel: skip fragments for ppc64le
Adding !powerpc to the group of fragments not
supported on ppc64le.

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-05-21 06:09:05 +00:00
Amulyam24
9676b86b44 kernel: move CONFIG_RANDOMIZE_BASE
This config is not selected for ppc64le. It is
only supported on PPC32 for now. Moved it to
respective arch base.conf.

Signed-off-by: Amulyam24 <amulmek1@in.ibm.com>
2021-05-21 06:05:21 +00:00
Shukui Yang
bd0cde40e7 factory: Use lazy unmount
we can have the following case,
1. start kata container with factory feature, this need kata-runtime
   config to enable factory and use initrd as base image.
2. start a kata container.
3. cd /root; cd /run/vc/vm/template dir, this will make
   /run/vc/vm/template to be in used.
4. destroy vm template with kata-runtime factory destroy , and check
                the template mountpoint.
we can see  the template mountpoints will add everytime we repeat the above steps .

[root@centos1 template]# mount |grep template
[root@centos1 template]# docker run -ti --rm  --runtime untrusted-runtime --net none busybox echo

[root@centos1 template]# cd /root; cd /run/vc/vm/template/
[root@centos1 template]# /kata/bin/kata-runtime factory destroy
vm factory destroyed
[root@centos1 template]# mount |grep template
tmpfs on /run/vc/vm/template type tmpfs (rw,nosuid,nodev,relatime,seclabel,size=2105344k)
[root@centos1 template]# docker run -ti --rm  --runtime untrusted-runtime --net none busybox echo

[root@centos1 template]# cd /root; cd /run/vc/vm/template/
[root@centos1 template]# /kata/bin/kata-runtime factory destroy
vm factory destroyed
[root@centos1 template]# mount |grep template
tmpfs on /run/vc/vm/template type tmpfs (rw,nosuid,nodev,relatime,seclabel,size=2105344k)
tmpfs on /run/vc/vm/template type tmpfs (rw,nosuid,nodev,relatime,seclabel,size=2105344k)

Fixes: #938

Signed-off-by: Shukui Yang <keloyangsk@gmail.com>
2021-05-20 16:18:28 +08:00
Fupan Li
0c463babf3 Merge pull request #1885 from fidencio/wip/stop-using-unmaintained-prctl-crate
agent: Stop relying in the unmaintained prctl crate
2021-05-20 10:50:04 +08:00
Fabiano Fidêncio
f52468bea7 agent/agent-ctl: Replace prctl crate by the capctl one
While evaluating the possibility of having kata-agent statically linked
to the GNU libc, we've ended up facing some issues with prctl.

When debugging the issues, we figured out that the crate hasn't been
maintained since 2015 and that the capctl one is a good 1:1 replacement
for what we need.

Fixes: #1844

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-19 20:16:26 +02:00
Fabiano Fidêncio
d289b1d621 agent-ctl: Perform a cargo update
While in the beginning of the development cycle, let's perform a `cargo
update`.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-19 19:44:12 +02:00
Yuanzhe Liu
bc36b7b49f qemu: align before memory hotplug on arm64
When hotplug memory on arm64 in kata, kernel will shout:

[ 0.396551] Block size [0x40000000] unaligned hotplug range: start 0xc8000000, size 0x40000000
[ 0.396556] acpi PNP0C80:01: add_memory failed
[ 0.396834] acpi PNP0C80:01: acpi_memory_enable_device() error
[ 0.396948] acpi PNP0C80:01: Enumeration failure

It means that kernel will check if the memory range to be hotplugged
align with 1G before plug the memory. So we should twist the qemu to
make sure the memory range align with 1G to pass the kernel check.

Fixes: #1841

Signed-off-by: Yuanzhe Liu <yuanzheliu09@gmail.com>
2021-05-19 14:23:55 +00:00
Fabiano Fidêncio
8aefc79314 agent: Perform a cargo update
While in the beginning of the development cycle, let's perform a `cargo
update`.

Fixes: #1883

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-19 09:43:17 +02:00
Fupan Li
be936442eb Merge pull request #1846 from teawater/vm_doc
how-to-use-virtio-mem-with-kata.md: Update doc to make it clear
2021-05-18 14:07:49 +08:00
Hui Zhu
b97791add1 Merge pull request #1855 from jongwu/mem_doc
docs: Add document for memory hotplug on arm64
2021-05-18 09:32:55 +08:00
Hui Zhu
785be0bbde how-to-use-virtio-mem-with-kata.md: Update doc to make it clear
Update this howto because the virtio-mem support of kata, qemu and Linux
was updated.

Fixes: #1845

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-05-18 09:27:57 +08:00
Chelsea Mafrica
6b9e46ef54 Merge pull request #1858 from GabyCT/topic/fixprporting
github: Run require porting labels only at main
2021-05-17 11:43:04 -07:00
Chelsea Mafrica
2e52529895 Merge pull request #1822 from jimcadden/kernel-conf-guest
kernel: add confidential guest build option
2021-05-17 11:42:07 -07:00
Jim Cadden
f8a16c170a kernel: add confidential guest build option
Includes support for SEV guest kernels

Fixes #1870

Signed-off-by: Jim Cadden <jcadden@ibm.com>
2021-05-17 09:52:32 -04:00
Jianyong Wu
a65f11ea56 docs: Add document for memory hotplug on arm64
After some enablement work, memory hotplug can be used on arm64.
Here we offer a document to instruct user to enable it.

Fixes: #1854
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-05-17 16:21:37 +08:00
Shengjing Zhu
1b60705646 runtime: remove covertool from cli test
covertool has no active since 2018 and is not compatible with go1.16

  ../vendor/github.com/dlespiau/covertool/pkg/cover/cover.go:76:29: cannot use f (type dummyTestDeps) as type testing.testDeps in argument to testing.MainStart:
  dummyTestDeps does not implement testing.testDeps (missing SetPanicOnExit0 method)

Fixes: #1862

Signed-off-by: Shengjing Zhu <zhsj@debian.org>
2021-05-16 03:06:06 +08:00
Gabriela Cervantes
fc42dc07cf github: Run require porting labels only at main
This PR modifies that require porting labels only run at main.

Fixes #1857

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
2021-05-14 16:21:44 -05:00
Peng Tao
f6c5f7c0ef Merge pull request #1844 from lifupan/main
rustjail: separated the propagation flags from mount flags
2021-05-14 10:25:35 +08:00
Eric Ernst
f8d1f9b86e Merge pull request #985 from bergwolf/sandbox-cgroups-only
runtime: improve sandbox cleanup logic
2021-05-13 16:31:26 -07:00
Fabiano Fidêncio
dbef2b2931 versions: Update kubernetes to 1.21.1
The reason for doing such is to (try to) avoid random crashes we've been
facing as part of our CI, such as the one reported as part of
https://github.com/kata-containers/tests/issues/3473

Fixes: #1850

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-14 00:08:55 +02:00
Peng Tao
35151f1786 runtime: sandbox delete should succeed after verifying sandbox state
Otherwise we might block delete and create orphan containers.

Fixes: #1039

Signed-off-by: Peng Tao <bergwolf@hyper.sh>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-05-13 14:05:49 -07:00
fupan.lfp
e5fe572f51 rustjail: separated the propagation flags from mount flags
Since the propagation flags couldn't be combinted with the
standard mount flags, and they should be used with the remount,
thus it's better to split them from the standard mount flags.

Fixes: #1699

Signed-off-by: fupan.lfp <fupan.lfp@antgroup.com>
2021-05-13 23:53:52 +08:00
snir911
c995a982bc Merge pull request #1843 from liubin/fix/1842-update-docs-for-connecting-debug-console
docs: add note for connecting debug console for old versions
2021-05-13 14:06:51 +03:00
bin
ffbb4d9b11 docs: add note for connecting debug console for old versions
Before 2.1-alpha1, user still need starting kata-monitor
to connect to debug console.

Fixes: #1842

Signed-off-by: bin <bin@hyper.sh>
2021-05-13 15:39:04 +08:00
GabyCT
bdc9a66bd9 Merge pull request #1770 from jongwu/image_align
image_build: align image size to 128M for arm64
2021-05-12 09:38:06 -05:00
Bin Liu
cc4748fa64 Merge pull request #1829 from Tim-Zhang/fix-reap
agent: avoid reaping the exit signal of execute_hook in the reaper
2021-05-12 17:24:25 +08:00
Bin Liu
15778a17e5 Merge pull request #1828 from Tim-Zhang/move-dep
agent: move the dependency tempfile to the dev-dependencies section
2021-05-12 17:21:50 +08:00
Tim Zhang
2909a0364d Merge pull request #1824 from c3d/issue/1823-release-process-version-bump
docs: Document test repository changes when creating a stable branch
2021-05-12 14:46:32 +08:00
Tim Zhang
a5bb383cf3 agent: avoid reaping the exit signal of execute_hook in the reaper
Fixes: #1826

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-05-12 14:40:20 +08:00
Tim Zhang
ce7a5ba22e agent: move the dependency tempfile to the dev-dependencies section
The tempfile is only used by tests.

Fixes: #1827

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-05-12 14:39:58 +08:00
Fabiano Fidêncio
979b73c35a Merge pull request #1794 from c3d/issue/1793-workaround-spell-checker-bug
docs: Remove horizontal ruler markers that disable spell checks
2021-05-11 23:27:37 +02:00
GabyCT
5d05f36117 Merge pull request #1825 from wainersm/docs_qemu_patches
docs/Developer-Guide: Add instructions to apply QEMU patches
2021-05-11 13:36:00 -05:00
Fabiano Fidêncio
ac61e60492 Merge pull request #1790 from snir911/configure_timeout
runtime: make dialing timeout configurable
2021-05-11 16:52:05 +02:00
Wainer dos Santos Moschetta
e24e94622c docs/Developer-Guide: Add instructions to apply QEMU patches
Occasionally patches are necessary to build QEMU with the kata containers
configuration. This changed the developer guide to make it clear it is
recommended to apply the patches; and tell how.

Fixes #1807
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-05-11 10:39:53 -04:00
Christophe de Dinechin
850cf8cdb3 docs: Document test repository changes when creating a stable branch
When we create a new stable branch, it is good practice to ensure that the test
repository points to that stable branch, to make sure that it is not impacted by
later changes to the CI made on the stable branch.

Fixes: #1823

Signed-off-by: Christophe de Dinechin <christophe@dinechin.org>
2021-05-11 11:44:03 +02:00
Bin Liu
bffb099d99 Merge pull request #1816 from egernst/get-sandbox-metrics-cli
Get sandbox metrics cli
2021-05-11 13:10:30 +08:00
Samuel Ortiz
2c4e4ca1ac Merge pull request #1590 from devimc/2021-02-02/ConfidentialComputing
Support TDx
2021-05-10 22:19:40 +02:00
Fabiano Fidêncio
becd270ccf Merge pull request #1802 from nubificus/fix-k3s-cleanup
packaging/kata-cleanup: add k3s containerd volume
2021-05-10 21:15:43 +02:00
Eric Ernst
8068a4692f kata-runtime: add metrics command
For easier debug, let's add subcommand to kata-runtime for gathering
metrics associated with a given sandbox.

kata-runtime metrics --sandbox-id foobar

Fixes: #1815

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-05-10 10:45:10 -07:00
Fabiano Fidêncio
c4bd246efb Merge pull request #1818 from cmaf/update-alpine-version
osbuilder: Upgrade alpine version to 3.13.5
2021-05-10 17:55:35 +02:00
Eric Ernst
3787306107 kata-monitor: export get stats for sandbox
Gathering stats for a given sandbox is pretty useful; let's export a
function from katamonitor pkg to do this.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-05-10 08:53:56 -07:00
Snir Sheriber
01b56d6cbf runtime: make dialing timeout configurable
allow to set dialing timeout in configuration.toml
default is 30s

Fixes: #1789
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-05-10 16:39:37 +03:00
Eric Ernst
12a04cb0ba Merge pull request #1811 from egernst/monitor-cleanup
Monitor cleanup
2021-05-07 21:03:34 -07:00
Chelsea Mafrica
e8038718aa osbuilder: Upgrade alpine version to 3.13.5
We are using an older version of alpine, so upgrade to latest 3.13.5.

Fixes #1817

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-05-07 16:34:45 -07:00
Eric Ernst
3caed6f88d runtime: shim: dedup client, socket addr code
(1) Add an accessor function, SocketAddress, to the shim-v2 code for
determining the shim's abstract domain socket address, given the sandbox
ID.

(2) In kata monitor, create a function, BuildShimClient, for obtaining the appropriate
http.Client for communicating with the shim's monitoring endpoint.

(3) Update the kata CLI and kata-monitor code to make use of these.

(4) Migrate some kata monitor methods to be functions, in order to ease
future reuse.

(5) drop unused namespace from functions where it is no longer needed.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-05-07 15:20:37 -07:00
Fabiano Fidêncio
4bc006c8a4 runtime: Short the shim-monitor path
Instead of having something like
"/containerd-shim/$namespace/$sandboxID/shim-monitor.sock", let's change
the approach to:
* create the file in a more neutral location "/run/vc", instead of
  "/containerd-shim";
* drop the namespace, as the sandboxID should be unique;
* remove ".sock" from the socket name.

This will result on a name that looks like:
"/run/vc/$sandboxID/shim-monitor"

Fixes: #497

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-07 14:20:35 -07:00
Christophe de Dinechin
5fdf617e7f docs: Fix spell-check errors found after new text is discovered
The spell-checker scripts has some bugs that caused large chunks of texts to not
be spell checked at all (see #1793). The previous commit worked around this bug,
which exposed another bug:

The following source text:

    are discussions about using VM save and restore to
    give [`criu`](https://github.com/checkpoint-restore/criu)-like
    functionality, which might provide a solution

yields the surprising error below:

    WARNING: Word 'givelike': did you mean one of the following?: give like, give-like, wavelike

Apparently, an extra space is removed, which is another issue with the
spell-checking script. This case is somewhat contrived because of the URL link,
so for now, I decided for a creative rewriting, inserting the word "a" knowing
that "alike" is a valid word ;-)

Fixes: #1793

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-05-07 11:43:44 +02:00
Christophe de Dinechin
42425456e7 docs: Remove horizontal ruler markers that disable spell checks
There is a bug in the CI script checking spelling that causes it
to skip any text that follows a horizontal ruler.
(https://github.com/kata-containers/tests/issues/3448)

Solution: replace one horizontal ruler marker with another that
does not trip the spell-checking script.

Fixes: #1793

Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-05-07 11:43:44 +02:00
Fabiano Fidêncio
0a3b7938c9 Merge pull request #1799 from teawater/open_vm
Open CONFIG_VIRTIO_MEM in x86_64 Linux kernel
2021-05-07 11:43:17 +02:00
Hui Zhu
3883e4e290 kernel: configs: Open CONFIG_VIRTIO_MEM in x86_64 Linux kernel
Open CONFIG_VIRTIO_MEM in x86_64 Linux kernel.

Fixes: #1798

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-05-07 15:19:06 +08:00
Tim Zhang
1bfc426a2b Merge pull request #1784 from liubin/fix/1783-delete-un-used-fn
agent: delete code which is no longer used
2021-05-07 14:25:26 +08:00
Fabiano Fidêncio
2436839fa7 Merge pull request #1749 from liubin/fix/1748-delete-tracing-in-cli
cli: delete tracing code for kata-runtime binary
2021-05-07 08:17:16 +02:00
Tim Zhang
75648b0770 Merge pull request #1745 from liubin/fix/1744-add-doc-for-enable_pprof
docs: add per-Pod Kata configurations for `enable_pprof`
2021-05-07 13:45:34 +08:00
Fupan Li
70e1d44262 Merge pull request #1800 from teawater/fix_vm
Fix issue of virtio-mem
2021-05-07 13:08:12 +08:00
Fupan Li
487e165093 Merge pull request #1778 from snir911/patch_nofile
Set fixed NOFILE limit value for kata-agent
2021-05-07 13:06:10 +08:00
Tim Zhang
29716c35e6 Merge pull request #1777 from teawater/check_yq
ci/install_yq.sh: install_yq: Check version before return
2021-05-07 10:02:51 +08:00
Chelsea Mafrica
3e8137399c Merge pull request #1805 from liubin/fix/1804-select-sandbox-ctx
runtime: use s.ctx instead ctx for checking cancellation
2021-05-06 09:51:47 -07:00
Chelsea Mafrica
917665ab6d Merge pull request #1751 from liubin/fix/1750-fix-comments
runtime: fix some comments
2021-05-06 08:42:15 -07:00
Julio Montes
4f61f4b490 virtcontainers: Support TDX
Add support for Intel TDX confidential guests

fixes #1332

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-05-06 10:09:05 -05:00
Julio Montes
0affe8860d virtcontainers: define confidential guest framework
Define the structure and functions needed to support confidential
guests, this commit doesn't add support for any specific technology,
support for TDX, SEV, PEF and others will be added in following
commits.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-05-06 10:09:05 -05:00
Julio Montes
539afba03d runtime: define config options to enable confidential computing
Define config options to enable or disable confidential computing and
its features, for example:
* Image service offloading
* Image decryption keys

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-05-06 10:09:05 -05:00
bin
79831fafaf runtime: use s.ctx instead ctx for checking cancellation
s.ctx should be used for checking cancellation, and the
local ctx is used for tracing.

Fixes: #1804

Signed-off-by: bin <bin@hyper.sh>
2021-05-06 17:22:53 +08:00
bin
f6d5fbf9ba runtime: fix some comments
This commint include two types of fixes for comments
in src/runtime/containerd-shim-v2/start.go.

- Update comment for calling of watchOOMEvents.
- Comments without heading spaces.

Fixes: #1750

Signed-off-by: bin <bin@hyper.sh>
2021-05-06 17:12:52 +08:00
Orestis Lagkas Nikolos
9381e5f31a packaging/kata-cleanup: add k3s containerd volume
kata-deploy cleanup expects to find containerd configuration
in /etc/containerd/config.toml. In case of k3s mount the k3s
containerd config as a volume.

Fixes #1801

Signed-off-by: Orestis Lagkas Nikolos <olagkasn@nubificus.co.uk>
2021-05-06 03:18:16 -05:00
Hui Zhu
7f7c3fc8ec qemu.go: qemu: resizeMemory: Fix virtio-mem resize overflow issue
This commit change sizeByte from uint32 to uint64 to fix overflow issue.

Fixes: #1796

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-05-06 14:13:50 +08:00
Hui Zhu
c9053ea3fb qemu.go: qemu: setupVirtioMem: let sizeMB be multiple of 2Mib
Got:
FATA[0000] run pod sandbox: rpc error: code = Unknown desc = failed to
create containerd task: Add 189759MB virtio-mem-pci fail QMP command
failed: backend memory size must be multiple of 0x200000: unknown

This commit let sizeMB be multiple of 2Mib to fix the issue.

Fixes: #1796

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-05-06 14:13:48 +08:00
Snir Sheriber
a188577ebf agent: Set fixed NOFILE limit value for kata-agent
Some applications may fail if NOFILE limit is set to unlimited.
Although in some environments this value is explicitly overridden,
lets set it to a more sane value in case it doesn't.

Fixes #1715
Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
2021-05-04 15:06:11 +03:00
Julio Montes
88cf3db601 runtime: implement CPUFlags function
`CPUFlags` returns a map with all the CPU flags, these CPU flags
may help us to identiry whether a system support confidential computing
or not.

Signed-off-by: Julio Montes <julio.montes@intel.com>
2021-05-03 09:33:13 -05:00
Jianyong Wu
2b0d5b252e image_build: align image size to 128M for arm64
There is an inconformity between qemu and kernel of memory alignment
check in memory hotplug. Both of qemu and kernel will do the start
address alignment check in memory hotplug. But it's 2M in qemu
while 128M in kernel. It leads to an issue when memory hotplug.

Currently, the kata image is a nvdimm device, which will plug into the VM as
a dimm. If another dimm is pluged, it will reside on top of that nvdimm.
So, the start address of the second dimm may not pass the alginment
check in kernel if the nvdimm size doesn't align with 128M.

There are 3 ways to address this issue I think:
1. fix the alignment size in kernel according to qemu. I think people
in linux kernel community will not accept it.
2. do alignment check in qemu and force the start address of hotplug
in alignment with 128M, which means there maybe holes between memory blocks.
3. obey the rule in user end, which means fix it in kata.

I think the second one is the best, but I can't do that for some reason.
Thus, the last one is the choice here.

Fixes: #1769
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-05-03 10:44:30 +08:00
Fabiano Fidêncio
d0eda5ecfd Merge pull request #1786 from fidencio/2.1.0-rc0-branch-bump
# Kata Containers 2.1.0-rc0
2021-05-01 01:13:22 +02:00
Fabiano Fidêncio
799433d863 release: Kata Containers 2.1.0-rc0
- Update kata-deploy to use CRI-O drop-in files
- Update dependencies versions
- fix build kernel shell error when setup with `-f`
- virtcontainers: Fix virtio-fs on s390x
- Runtimeclass updates
- versions: Upgrade to cloud-hypervisor v15.0
- clh: return error if apiSocketPath failed
- runtime: fix dropped error
- agent: Update seccomp configuration for errnoRet and flags
- Fix the issue that sandbox size is not right after update
- docs: Document limitation regarding subpaths
- qemu: kill virtiofsd if failure to start VMM
- runtime/virtcontainers: Fix typo on qmp error msg
- cli: delete not used files
- runtime: delete not used function parameter builtIn
- add io.katacontainers.config.hypervisor.virtio_fs_extra_args handling
- Entropy source annotation
- runtime: Fix stdout/stderr output from container being truncated
- fix the issue of missing set fsGroup for EphemeralStorage
- qemu: Fix assertion failure on shutdown
- Assorted clippy fixes for Rust agent
- agent: use channel instead of pipe(2) to send exit signal of process
- Improve agent shutdown handling
- Enable virtio-fs on s390x
- block: Generate PCI path for virtio-blk devices on clh
- runtime: Disable trace for healthcheck
- agent/rustjail: Fix accidental damage from tokio conversion
- cli: Use genericGetExpectedHostDetails on s390x
- runtime/tests: Change "moo FAILURE" message
- Update the information about the release process
- remove ProcessListContainer API

2047f26f kata-deploy: Adapt CRI-O config to use drop-in files
8de2f914 kata-deploy: Rely on CRIO default's values for manage_ns_lifecycle
ea9936e0 versions: Bump runc to v1.0.0-rc93
9c333b2c versions: Bump CRI-O version to 1.21.x
e33f207b versions: Bump critools version to 1.21.0
8e5df723 versions: Bump kubernetes version to 1.21.0
d15f84c9 versions: Remove Docker entry
516f4ec0 versions: Remove OpenShift entry
be101ac1 versions: Remove CRI-O meta dependencies
1ca6bedf versions: Upgrade to cloud-hypervisor v15.0
906c0df4 kata-deploy: don't update worker pool nodes
3ee61776 virtcontainers: Enable virtio-fs on s390x
8385ff95 runtime: Re-vendor GoVMM
adba4532 virtcontainers: Revert "virtcontainers: Allow s390x appendVhostUserDevice"
ede078bc kata-deploy: aks-test: bump kubernetes/containerd
484af12b kata-deploy: update to handle new runtimeclass path
05c224c3 runtimeclass: add nodeSelector
ee7de8ab tools: fix build kernel shell error
7d5a4252 docs: Document limitation regarding subpaths
36776408 runtime/virtcontainers: Fix typo on qmp error msg
12a65d23 runtimeclass: drop stale runtimeclass definitions
0787ea80 cgroupsCreate: not set resources to c.config.Resources
831224aa Sandbox: Fix ContainerConfig ptr in CreateContainer and createContainers
a57c8ab1 qemu: kill virtiofsd if failure to start VMM
ff2b9e54 cli: delete not used files
0d0a520d clh: return error if apiSocketPath failed
fc6bb01a runtime: fix dropped error
30ff6ee8 runtime: handle io.katacontainers.config.hypervisor.virtio_fs_extra_args
677f0d99 runtime: delete not used function parameter builtIn
dcb9f403 config: Protect annotation for entropy_source
f4c26aad agent: fix the issue of missing set fsGroup for EphemeralStorage
628d55bf kata-agent: fix the issue of fsGroup missing
0405beb2 agent: Remove unused Default implementation for NamespaceType
7b83b7ec agent/uevent: Better initialize Uevent in test
b0190a40 agent: Use vec![] macro rather than init-then-push
1c43245e agent/device: Remove unneeded Result<> wrappers from uev matchers
e41cdb8b agent: Use str::is_empty() method in config::get_string_value()
2377c097 agent: Use CamelCase for NamespaceType values
75eca6d5 agent/rustjail: Clean up error path in execute_hook()s async task
6ce1e56d agent/rustjail: Remove an unnecessary PathBuf
3c4485ec agent/rustjail: Clean up some static definitions with vec! macro
eaec5a6c agent/oci: Change name case to make clippy happy
3f5fdae0 agent/rustjail: (trivial) Clean up comment on process_grpc_to_oci()
210f39a4 agent/rustjail: Simplify renaming imports
d4a54137 runtime: Fix stdout/stderr output from container being truncated
8ecf8e5c agent: use channel instead of pipe to send exit signal of process
81c5ff12 agent: Update seccomp configuration for errnoRet and flags
8a33bd4c qemu: Fix assertion failure on shutdown
7f609113 virtcontainers: Allow s390x appendVhostUserDevice
67ac4f45 runtime: update GoVMM for memory backend support
6577b01a agent/rustjail: Fix accidental damage from tokio conversion
de2631e7 utils: Make WaitLocalProcess safer
9256e590 shutdown: Don't sever console watcher too early
51ab8700 utils: Improve WaitLocalProcess
507ef636 utils: Add waitLocalProcess function
1d5098de agent/block: Generate PCI path for virtio-blk devices on clh
e7c97f0f runtime/tests: Change "moo FAILURE" message
8bc53498 docs: Simplify the repo bumping section
8a47b05a docs: Mention that an app token should be used with hub
d434c2e9 docs: OBS account is not require anymore
543f9da3 runtime: Disable trace for healthcheck
421439c6 API: remove ProcessListContainer/ListProcesses
1366f0fb cli: Use genericGetExpectedHostDetails on s390x

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-05-01 00:14:17 +02:00
Fabiano Fidêncio
ebca056ef8 Merge pull request #1782 from fidencio/wip/kata-deploy-update-crio-config
Update kata-deploy to use CRI-O drop-in files
2021-05-01 00:09:51 +02:00
Fabiano Fidêncio
239cc51199 Merge pull request #1689 from fidencio/wip/update-dependencies-versions
Update dependencies versions
2021-05-01 00:01:45 +02:00
Fabiano Fidêncio
2047f26fa3 kata-deploy: Adapt CRI-O config to use drop-in files
By using drop-in file it simplifies the deployment and maintenance of
the CRI-O configurations by a lot, and all versions of CRI-O that should
be used together with the currently supported versions of kubenertes
support the drop-in configuration file.

Depends-on: github.com/kata-containers/kata-containers#1689
Fixes #1781

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 23:14:19 +02:00
Fabiano Fidêncio
8de2f914ab kata-deploy: Rely on CRIO default's values for manage_ns_lifecycle
manage_ns_lifecycle (previously known as manage_network_ns_lifecycle)
has its default value as `true` for all CRI-O versions that should be
used with the kubernetes versions that are still supported / didn't
reach their EOL.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 23:14:19 +02:00
Fabiano Fidêncio
d11d0796e1 Merge pull request #1766 from zyt312074545/fix_build_kernel_shell
fix build kernel shell error when setup with `-f`
2021-04-30 19:37:45 +02:00
Eric Ernst
1c0d3afd55 Merge pull request #1754 from Jakob-Naucke/fix-virtiofs-s390x
virtcontainers: Fix virtio-fs on s390x
2021-04-30 09:28:12 -07:00
Fabiano Fidêncio
04660b1af2 Merge pull request #1763 from egernst/runtimeclass-updates
Runtimeclass updates
2021-04-30 18:21:33 +02:00
Fabiano Fidêncio
2e0221125a Merge pull request #1780 from likebreath/0429/clh_v15.0
versions: Upgrade to cloud-hypervisor v15.0
2021-04-30 18:20:36 +02:00
Fabiano Fidêncio
29fdfcfebc Merge pull request #1725 from liubin/liubin/1724-not-return-if-get-api-socket-failed
clh: return error if apiSocketPath failed
2021-04-30 18:16:45 +02:00
Fabiano Fidêncio
dc23adcd50 Merge pull request #1743 from alrs/fix-runtime-err
runtime: fix dropped error
2021-04-30 18:15:22 +02:00
bin
d601ae3446 agent: delete not used comments
Delete comments meanless or make people confusion.

Fixes: #1783

Signed-off-by: bin <bin@hyper.sh>
2021-04-30 19:37:55 +08:00
bin
6038da1903 agent: delete rustjail/src/configs directory
This directory is not used anymore.

Fixes: #1783

Signed-off-by: bin <bin@hyper.sh>
2021-04-30 19:18:03 +08:00
bin
84ee8aa8b2 agent: delete not used functions
In file src/agent/rustjail/src/validator.rs,
these two functions are not used:
- get_namespace_path
- check_host_ns

Fixes: #1783

Signed-off-by: bin <bin@hyper.sh>
2021-04-30 19:17:41 +08:00
Fabiano Fidêncio
ea9936e004 versions: Bump runc to v1.0.0-rc93
Let's bump runc to its latest release.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:43:18 +02:00
Fabiano Fidêncio
9c333b2c79 versions: Bump CRI-O version to 1.21.x
For CRI-O, let's rely on the "release-1.21" branch, as this is the
branch getting backports for the 1.21.x cycle.

Relying on the branch avoids our needs to keep bumping it every now and
then.

Fixes: #1688

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:42:42 +02:00
Fabiano Fidêncio
e33f207b7d versions: Bump critools version to 1.21.0
Let's bump critools version to the same version of the kubernetes.

Fixes: #1686

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:42:42 +02:00
Fabiano Fidêncio
8e5df72302 versions: Bump kubernetes version to 1.21.0
1.21.0 is the latest k8s release.

Fixes: #1685

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:42:42 +02:00
Fabiano Fidêncio
d15f84c956 versions: Remove Docker entry
It's been some time already, since
https://github.com/kata-containers/tests/pull/3272, that we don't depend
on a specific version of docker.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:42:42 +02:00
Fabiano Fidêncio
516f4ec06e versions: Remove OpenShift entry
Tested between Kata Containers and OpenShift are already being done via
the OpenShift CI.  This entry is only related to the OpenShift 3.x,
which is not tested anymore via our CI in any possible way.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:42:42 +02:00
Fabiano Fidêncio
be101ac1ef versions: Remove CRI-O meta dependencies
CRI-O meta dependencies (crictl and openshift) are a left over from the
OCP 3.x era.  Currently we don't need those as we have Kata Containers
onboard with the OpenShift CI, and we don't test OCP 3.x in any way
nowadays.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-30 11:42:42 +02:00
Fabiano Fidêncio
bd486f7bf3 Merge pull request #1720 from ManaSugi/update-seccomp-spec
agent: Update seccomp configuration for errnoRet and flags
2021-04-30 10:52:42 +02:00
Bo Chen
1ca6bedf3e versions: Upgrade to cloud-hypervisor v15.0
Quotes from the cloud-hypervisor release v15.0:

This release is the first in a new version numbering scheme to represent that
we believe Cloud Hypervisor is maturing and entering a period of stability.
With this new release we are beginning our new stability guarantees.

Other highlights from the latest release include: 1) Network device rate
limiting; 2) Support for runtime control of `virtio-net` guest offload;
3) `--api-socket` supports file descriptor parameter; 4) Bug fixes on
`virtio-pmem`, PCI BARs alignment, `virtio-net`, etc.; 5) Deprecation of
the "LinuxBoot" protocol for ELF and bzImage in the coming release.

Details can be found: https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v15.0

Note: The client code of cloud-hypervisor's OpenAPI is automatically
generated by `openapi-generator` [1-2]. As the API changes do not
impact usages in Kata, no additional changes in kata's runtime are
needed to work with the current version of cloud-hypervisor.

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Fixes: #1779

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-04-29 10:56:22 -07:00
Eric Ernst
906c0df405 kata-deploy: don't update worker pool nodes
Our cluster's life is shorter than time it takes to update nodes; for
better stability of the kata-deploy test, let's not update the nodes.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-29 09:24:51 -07:00
Hui Zhu
d8896157df ci/install_yq.sh: install_yq: Check version before return
Check the yq version before return.

Fixes: #1776

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-04-29 18:19:18 +08:00
Jakob Naucke
3ee61776d6 virtcontainers: Enable virtio-fs on s390x
Allow and configure vhost-user-fs devices (virtio-fs) on s390x. As a
consequence, appendVhostUserDevice now takes a context, which affects
its signature for other architectures.

Fixes: #1753

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-04-29 09:54:08 +02:00
Jakob Naucke
8385ff9554 runtime: Re-vendor GoVMM
for vhost-user-fs-ccw devno support

shortlog:
f0e9a35 Merge pull request #171 from Jakob-Naucke/fix-virtiofs-s390x
abd3c7e qemu: VhostUserDevice CCW device numbers
3eaeda7 qemu: Refactor vhostuserDev.QemuParams
7183b12 Merge pull request #166 from kata-containers/egernst-patch-1
092293f Merge pull request #169 from QiuMike/master
511cf58 Fix qemu commandline issue with empty romfile
8ba62b0 Merge pull request #164 from devimc/2021-03-30/tdxSupport
b3eac95 qmp: remove frequent, chatty log
3141894 qemu: add support for tdx-guest object

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-04-29 09:53:54 +02:00
Jakob Naucke
adba4532a4 virtcontainers: Revert "virtcontainers: Allow s390x appendVhostUserDevice"
This reverts commit 7f60911333.
Patch allowed other vhost user devices besides FS not supported on s390x
and failed to attach a CCW device number, which results in the
inavailability to use more devices after vhost-user-fs-ccw.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-04-29 09:43:33 +02:00
Eric Ernst
b20dff8027 Merge pull request #1759 from kata-containers/fix_update
Fix the issue that sandbox size is not right after update
2021-04-28 14:48:24 -07:00
Eric Ernst
ede078bc85 kata-deploy: aks-test: bump kubernetes/containerd
Bump to 1.20, latest aks-engine

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-28 10:41:28 -07:00
Eric Ernst
484af12b54 kata-deploy: update to handle new runtimeclass path
Runtimeclass paths changed. Update the kata-deploy action's test
accordingly.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-28 10:41:28 -07:00
Eric Ernst
05c224c3d4 runtimeclass: add nodeSelector
To ensure we run on nodes which have Kata installed, let's add the
nodeSelector to the runtimeclass definition, and have it match the label
that we applied during installation of the kata artifacts.

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-28 10:41:28 -07:00
zyt312074545
ee7de8abcc tools: fix build kernel shell error
Build kernel shell setup with -f, don't find patches directory path,
because patches_path is none, so fix this error.

Fixes: #1768

Signed-off-by: zyt312074545 <zyt312074545@hotmail.com>
2021-04-28 12:54:18 +00:00
Fabiano Fidêncio
783f5aba68 Merge pull request #1733 from c3d/issue/1728-subpath-limitation
docs: Document limitation regarding subpaths
2021-04-28 08:27:58 +02:00
Eric Ernst
23a8179184 Merge pull request #1756 from egernst/leave-no-virtiofs-behind
qemu: kill virtiofsd if failure to start VMM
2021-04-27 17:16:33 -07:00
Fabiano Fidêncio
cd1c1ae239 Merge pull request #1765 from wainersm/qemu_1
runtime/virtcontainers: Fix typo on qmp error msg
2021-04-27 21:23:32 +02:00
Christophe de Dinechin
7d5a4252b6 docs: Document limitation regarding subpaths
Subpaths are not supported at the moment. Document that fact.

Fixes: #1728

Signed-off-by: Christophe de Dinechin <christophe@dinechin.org>
2021-04-27 18:53:45 +02:00
Wainer dos Santos Moschetta
3677640811 runtime/virtcontainers: Fix typo on qmp error msg
"negotiate" was misspelled on qemu's qmp error message.

Fixes #1764
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-04-27 11:52:42 -04:00
Eric Ernst
12a65d2359 runtimeclass: drop stale runtimeclass definitions
- 1.13/1.14 are very old now; let's drop
- move from k8s-1.18 to just runtimeclasses directoy
- update docs to reflect the new reality

Fixes: #1425

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-27 08:06:09 -07:00
Hui Zhu
0787ea8073 cgroupsCreate: not set resources to c.config.Resources
cgroupsCreate will just keep the CPU resources infomation but not the
others.
Set it to c.config.Resources will clean most of resources of the
container.

This commit remove it to handle the issue.

Fixes: #1758

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-04-27 16:44:30 +08:00
Hui Zhu
831224aa22 Sandbox: Fix ContainerConfig ptr in CreateContainer and createContainers
The pointer that send to newContainer in CreateContainer and
createContainers is not the pointer that point to the address in
s.config.Containers.

This commit fix this issue.

Fixes: #1758

Signed-off-by: Hui Zhu <teawater@antfin.com>
2021-04-27 16:44:22 +08:00
Eric Ernst
a57c8ab1be qemu: kill virtiofsd if failure to start VMM
If the QEMU VMM fails to launch, we currently fail to kill virtiofsd,
resulting in leftover processes running on the host. Let's make sure we
kill these, and explicitly cleanup the virtiofs socket on the
filesystem.

Ideally we'll migrate QEMU to utilize the same virtiofsd interface that
CLH uses, but let's fix this bug as a first step.

Fixes: #1755

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-26 21:07:20 -07:00
bin
95e54e3f48 docs: add per-Pod Kata configurations for enable_pprof
Now enabling enable_pprof for individual pods is supported,
but not documented.

This commit will add per-Pod Kata configurations for `enable_pprof`
in file `docs/how-to/how-to-set-sandbox-config-kata.md`

Fixes: #1744

Signed-off-by: bin <bin@hyper.sh>
2021-04-26 22:20:49 +08:00
Fabiano Fidêncio
fb30c58847 Merge pull request #1747 from liubin/fix/1746-deleted-not-used-files
cli: delete not used files
2021-04-26 09:57:19 +02:00
bin
13c23fec11 cli: delete tracing code for kata-runtime binary
There are no pod/container operations in kata-runtime binary,
tracing in this package is meaningless.

Fixes: #1748

Signed-off-by: bin <bin@hyper.sh>
2021-04-26 11:11:22 +08:00
bin
ff2b9e5478 cli: delete not used files
Delete two files that not used anymore:
- src/runtime/cli/console.go
- src/runtime/cli/console_test.go

Fixes: #1746

Signed-off-by: bin <bin@hyper.sh>
2021-04-25 17:46:56 +08:00
bin
0d0a520d42 clh: return error if apiSocketPath failed
If apiSocketPath failed, should return the error, but not nil

Fixes: #1724

Signed-off-by: bin <bin@hyper.sh>
2021-04-25 10:25:42 +08:00
Lars Lehtonen
fc6bb01a7f runtime: fix dropped error
Fixes: #212

Signed-off-by: Lars Lehtonen <lars.lehtonen@gmail.com>
2021-04-24 14:18:50 -07:00
Chelsea Mafrica
8587e3a00b Merge pull request #1732 from liubin/fix/1731-delete-builtin-parameter
runtime: delete not used function parameter builtIn
2021-04-23 18:30:55 -07:00
Fabiano Fidêncio
fe2311cd4c Merge pull request #1739 from pmores/virtiofsd-extra-args-annotation-handling
add io.katacontainers.config.hypervisor.virtio_fs_extra_args handling
2021-04-23 23:22:01 +02:00
Pavel Mores
30ff6ee88b runtime: handle io.katacontainers.config.hypervisor.virtio_fs_extra_args
Users can specify extra arguments for virtiofsd in a pod spec using the
io.katacontainers.config.hypervisor.virtio_fs_extra_args annontation.
However, this annotation was ignored so far by the runtime.  This commit
fixes the issue by processing the annotation value (if present) and
translating it to the corresponding hypervisor configuration item.

Fixes #1523

Signed-off-by: Pavel Mores <pmores@redhat.com>
2021-04-23 21:09:28 +02:00
Fabiano Fidêncio
5eaf7a9982 Merge pull request #1049 from c3d/feature/1043-entropy-source-annotation
Entropy source annotation
2021-04-23 20:16:11 +02:00
bin
677f0d9904 runtime: delete not used function parameter builtIn
Parametr builtIn is not used in function updateRuntimeConfigAgent,
delete it from updateRuntimeConfigAgent and LoadConfiguration
function signature.

Fixes: #1731

Signed-off-by: bin <bin@hyper.sh>
2021-04-23 17:42:42 +08:00
Fabiano Fidêncio
a4fffa1f22 Merge pull request #1714 from littlejawa/issue_1713
runtime: Fix stdout/stderr output from container being truncated
2021-04-22 23:00:47 +02:00
Fabiano Fidêncio
b41d9a99b4 Merge pull request #1703 from lifupan/main_fix
fix the issue of missing set fsGroup for EphemeralStorage
2021-04-22 20:29:36 +02:00
Christophe de Dinechin
dcb9f40394 config: Protect annotation for entropy_source
It would be undesirable to be given an annotation like "/dev/null".
Filter out bad annotation values.

Fixes: #1043

Suggested-by: James O. D. Hunt <james.o.hunt@intel.com>
Signed-off-by: Christophe de Dinechin <dinechin@redhat.com>
2021-04-22 15:26:40 +02:00
fupan.lfp
f4c26aad00 agent: fix the issue of missing set fsGroup for EphemeralStorage
For k8s emptyDir volume, a specific fsGroup would
be set for it, thus guest should get this fsGroup
from runtime and set it properly on the EphemeralStorage
volume in guest.

Fixes: #1580

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-04-22 21:09:02 +08:00
fupan.lfp
628d55bf4c kata-agent: fix the issue of fsGroup missing
For k8s emptyDir volume, a specific fsGroup would
be set for it, thus runtime should pass this fsGroup
for EphemeralStorage to guest and set it properly on
the emptyDir volume in guest.

Fixes: #1580

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-04-22 21:08:52 +08:00
Fabiano Fidêncio
14dca3fe1f Merge pull request #1718 from egernst/qemu-assert-fix
qemu: Fix assertion failure on shutdown
2021-04-22 12:57:25 +02:00
David Gibson
e91591fff2 Merge pull request #1701 from dgibson/clippy
Assorted clippy fixes for Rust agent
2021-04-22 20:36:49 +10:00
Bin Liu
db4fbac1d3 Merge pull request #1722 from Tim-Zhang/use-channle-for-process-exit
agent: use channel instead of pipe(2) to send exit signal of process
2021-04-22 15:27:36 +08:00
David Gibson
0405beb2d8 agent: Remove unused Default implementation for NamespaceType
Currently we implement the Default trait for NamespaceType.  It doesn't
really make sense to have a default for this type though - you really need
to know what type of namespace you're setting.  In fact the Default
implementation is never used, so we can just drop it.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:54:02 +10:00
David Gibson
7b83b7ec1f agent/uevent: Better initialize Uevent in test
We had some code that initialized a Uevent to the default value, then set
specific fields to various values.  This can be accomplished inside the one
initialized using the ..Default::default() syntax.  Making this change
stops clippy from complaining.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:57 +10:00
David Gibson
b0190a407f agent: Use vec![] macro rather than init-then-push
We have one place where we create an empty vector then immediately push
something into it.  We can do this in one step using the vec![] macro,
which stops clippy complaining.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:56 +10:00
David Gibson
1c43245e3e agent/device: Remove unneeded Result<> wrappers from uev matchers
The various type implementing the UeventMatcher trait have new() methods
which return a Result<>, however none of them can actually fail.  This is
a leftover from their development where some versions could fail to
initialize.  Remove the unneccessary wrappers to silence clippy.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:34 +10:00
David Gibson
e41cdb8b9f agent: Use str::is_empty() method in config::get_string_value()
An explicit check against "" is a bit less clear and makes clippy complain.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:29 +10:00
David Gibson
2377c0975c agent: Use CamelCase for NamespaceType values
Currently these are in all-caps, to match typical capitalization of IPC,
UTS and PID in the world at large.  However, this violates Rust's
capitalization conventions and makes clippy complain.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:24 +10:00
David Gibson
75eca6d56f agent/rustjail: Clean up error path in execute_hook()s async task
Clippy (in Rust 1.51 at least) has some complaints about this closure
inside execute_hook() because it uses explicit returns in some places
where it doesn't need them, because they're the last expression in the
function.

That isn't necessarily obvious from a glance, but we can make clippy happy
and also make things a little clearer: first we replace a somewhat verbose
'match' using Option::ok_or_else(), then rearrange the remaining code to
put all the error path first with an explicit return then the "happy" path
as the stright line exit with an implicit return.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:23 +10:00
David Gibson
6ce1e56d20 agent/rustjail: Remove an unnecessary PathBuf
PathBuf is an owned, mutable Path.  We don't need those properties in
get_value_from_cgroup() so we can use a Path instead.  This may be slightly
safer, and definitely stops clippy (version 1.51 at least) from
complaining.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:53:04 +10:00
David Gibson
3c4485ece3 agent/rustjail: Clean up some static definitions with vec! macro
DEFAULT_ALLOWED_DEVICES and DEFAULT_DEVICES are essentially global
constant lists.  They're implemented as a lazy_static! initialized Vec
values.

The code to initialize them creates an empty Vec then pushes values
onto it.  We can simplify this a bit by using the vec! macro.  This
might be slightly more efficient, and it definitely stops recent
clippy versions (e.g. 1.51) from complaining about it.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:52:59 +10:00
David Gibson
eaec5a6c06 agent/oci: Change name case to make clippy happy
Recent versions of clippy (e.g. in Rust 1.51) complain about a number
of names in the oci crate, which don't obey Rust's normal CamelCasing
conventions.

It's pretty clear that these don't obey the usual rules because they
are attempting to preserve conventional casing of existing acronyms
they incorporate ("VM", "POSIX", etc.).  However, it's been my
experience that matching the case and name conventions of your
environs is more important than matching case with external norms.

Therefore, this patch changes all the identifiers in the oci crate to
match Rust conventions.  Their users in the rustjail crate are updated
to match.

fixes #1611

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:52:54 +10:00
David Gibson
3f5fdae0d8 agent/rustjail: (trivial) Clean up comment on process_grpc_to_oci()
This comment appears to be connected specifically with this function, but
has some other items separating it for no particular reason.  It also has
a typo.  Correct both.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:52:45 +10:00
David Gibson
210f39a46f agent/rustjail: Simplify renaming imports
Functions in rustjail deal with both the local oci module's data structure
and the protocol::oci module's data structure.  Since these both cover the
OCI container config they are quite similar and have many identically named
types.

To avoid conflicts, we import many things from those modules with altered
names.  However the names we use oci* and grpc* don't fit the normal Rust
capitalization convention for types.

However by renaming the import of the 'protocols::oci' module itself to
'grpc', we can actually get rid of the many renames by just qualifying at
each use site with only a very small increase in verbosity.  As a bonus
this gets rid of multiple 'use' items scattered through the file.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-22 11:52:42 +10:00
Julien Ropé
d4a5413774 runtime: Fix stdout/stderr output from container being truncated
Do not close the tty as part of the stdout redirection routine.
The close is already happening a couple lines below, after all routines
have finished.

Fixes #1713

Signed-off-by: Julien Ropé <jrope@redhat.com>
2021-04-21 17:09:09 +02:00
Tim Zhang
8ecf8e5c1f agent: use channel instead of pipe to send exit signal of process
The situation is not a IPC scene, pipe(2) is too heavy.

We have tokio::sync::watch::channel after tokio has been introduced.
The channel has better performance and easy to use.

Fixes: #1721

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-04-21 16:47:41 +08:00
Chelsea Mafrica
1c222c75ac Merge pull request #1697 from jodh-intel/improve-agent-shutdown-handling
Improve agent shutdown handling
2021-04-20 21:25:36 -07:00
Manabu Sugimoto
81c5ff1231 agent: Update seccomp configuration for errnoRet and flags
Update:
- Make the type of errnoRet in oci.proto oneof
- Update seccomp_grpc_to_oci that can set errnoRet as EPREM if the
value is empty.
- Update the oci.pb.go based on the above fixes
- Add seccomp errnoRet and flags option to configs in rustjail

Fixes: #1719

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-04-21 12:16:58 +09:00
Eric Ernst
8a33bd4c19 qemu: Fix assertion failure on shutdown
Occassional coredumps and OOMs observed on shutdown path due to improper
BH handling during aio cleanup in QEMU. Thankfully this has been fixed
in upstream already -- let's carry this patch.

Fixes: #1717

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2021-04-20 15:28:30 -07:00
Fabiano Fidêncio
4c177b5c40 Merge pull request #1599 from Jakob-Naucke/virtiofs-s390x
Enable virtio-fs on s390x
2021-04-20 21:07:15 +02:00
Carlos Venegas
cd27308755 Merge pull request #1432 from dgibson/bug1431
block: Generate PCI path for virtio-blk devices on clh
2021-04-20 12:00:09 -05:00
Fabiano Fidêncio
9df86d28a5 Merge pull request #1678 from cmaf/remove-spans-healthcheck
runtime: Disable trace for healthcheck
2021-04-20 18:38:47 +02:00
Jakob Naucke
7f60911333 virtcontainers: Allow s390x appendVhostUserDevice
Remove the prohibition of vhost-user devices on s390x, which are by now
supported (e.g. vhost-user-fs-ccw). As a consequence,
appendVhostUserDevice no longer needs an error in its signature.
This enables virtio-fs support on s390x.

Fixes: #1469

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-04-20 12:20:32 +02:00
Jakob Naucke
67ac4f4585 runtime: update GoVMM for memory backend support
Update GoVMM to get memory backend support for non-DIMM setups. This is
necessary for virtio-fs on s390x.

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-04-20 12:19:52 +02:00
Tim Zhang
34becbf289 Merge pull request #1705 from dgibson/bug1702
agent/rustjail: Fix accidental damage from tokio conversion
2021-04-19 17:45:41 +08:00
David Gibson
6577b01a5c agent/rustjail: Fix accidental damage from tokio conversion
register_memory_event_v2() includes a closure spawned as an async task
with tokio.  At the end of that closure, there's a test for a closed fd
exiting if so.  But this is right at the end of the closure when it was
about to exit anyway, so this does nothing.

This code was originally an explicit thread, converted to a tokio task
by 332fa4c "agent: switch to async runtime".  It looks like there was an
error during conversion, where this logic was accidentally moved out of the
while loop above, where it makes a lot more sense.

Put it back into the loop.

fixes #1702

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-19 16:54:43 +10:00
James O. D. Hunt
de2631e711 utils: Make WaitLocalProcess safer
Rather than relying on the system clock, use a channel timeout to avoid
problems if the system time changed.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-04-15 15:46:42 +01:00
James O. D. Hunt
9256e590dc shutdown: Don't sever console watcher too early
Fixed logic used to handle static agent tracing.

For a standard (untraced) hypervisor shutdown, the runtime kills the VM
process once the workload has finished. But if static agent tracing is
enabled, the agent running inside the VM is responsible for the
shutdown. The existing code handled this scenario but did not wait for
the hypervisor process to end. The outcome of this being that the
console watcher thread was killed too early.

Although not a problem for an untraced system, if static agent tracing
was enabled, the logs from the hypervisor would be truncated, missing the
crucial final stages of the agents shutdown sequence.

The fix necessitated adding a new parameter to the `stopSandbox()` API,
which if true requests the runtime hypervisor logic simply to wait for
the hypervisor process to exit rather than killing it.

Fixes: #1696.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-04-15 15:22:00 +01:00
James O. D. Hunt
51ab870091 utils: Improve WaitLocalProcess
Previously, the hypervisors were sending a signal and then checking to
see if the process had died by sending the magic null signal (`0`). However,
that doesn't work as it was written: the logic was assuming sending the
null signal to a process that was dead would return `ESRCH`, but it
doesn't: you first need to you `wait(2)` for the process before sending
that signal. This means that previously, all affected hypervisors would
appear to take `timeout` seconds to end, even though they had _already_
finished.

Now, the hypervisors true end time will be seen as we wait for the
processes before sending the null signal to ensure the process has
finished.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-04-15 14:51:06 +01:00
James O. D. Hunt
507ef6369e utils: Add waitLocalProcess function
Refactored some of the hypervisors to remove the duplicated code used to
trigger a shutdown.

Also added some unit tests.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-04-15 14:51:03 +01:00
Chelsea Mafrica
0f2fe4a418 Merge pull request #1565 from Jakob-Naucke/s390x-fix-cli-test
cli: Use genericGetExpectedHostDetails on s390x
2021-04-14 10:25:23 -07:00
Chelsea Mafrica
038cecaa3f Merge pull request #1684 from dgibson/moo
runtime/tests: Change "moo FAILURE" message
2021-04-13 09:08:46 -07:00
David Gibson
1d5098de70 agent/block: Generate PCI path for virtio-blk devices on clh
Currently runtime and agent special case virtio-blk devices under clh,
ostensibly because the PCI address information is not available in that
case.

In fact, cloud-hypervisor's VmAddDiskPut API does return a PciDeviceInfo,
which includes a PCI address.  That API is broken, because PCI addressing
depends on guest (firmware or OS) actions that the hypervisor won't know
about.  clh only gets away with this because it only uses a single PCI root
and never uses PCI bridges, in which case the guest addresses are
accurately predictable: they always have domain and bus zero.

Until https://github.com/kata-containers/kata-containers/pull/1190, Kata
couldn't handle PCI addressing unless there was exactly one bridge, which
might be why this was actually special-cased for clh.

With #1190 merged, we can handle more general PCI paths, and we can derive
a trivial (one element) PCI path from the information that the clh API
gives us.  We can use that to remove this special case.

fixes #1431

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-13 13:29:24 +10:00
David Gibson
e7c97f0f5d runtime/tests: Change "moo FAILURE" message
Change the "moo FAILURE" message shown in a couple of the unit tests to
"moo message".  This means that searching for unrelated failures in the
test output by looking for "FAIL" won't show these messages as false
positives any more.

fixes #1683

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-13 13:25:03 +10:00
GabyCT
2e60a9d3d9 Merge pull request #1681 from fidencio/wip/improve-release-process-documentation
Update the information about the release process
2021-04-12 15:05:36 -05:00
Fabiano Fidêncio
8bc53498b4 docs: Simplify the repo bumping section
Instead of giving too many options, let's just mention the script and
rely on it entirely for the release.

This helps to simplify the document and have one well stablished
process.

Fixes: #1680

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-12 11:47:18 +02:00
Fabiano Fidêncio
8a47b05a7c docs: Mention that an app token should be used with hub
During the 2.1.0-alpha2 / 2.0.3 release, I had a hard time trying to
perform anything related to hub as the app token should be used instead
of the user password.  Thankfully Carlos pointed me out to that
direction, but it'd be good to have it explicitly documented.

Fixes: #1680

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-12 11:47:15 +02:00
Fabiano Fidêncio
d434c2e9c6 docs: OBS account is not require anymore
Since we stopped building kata-containers packages as part of our
release process, there's no need to have an OBS account to be able to do
the release.

Fixes: #1680

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-12 11:47:09 +02:00
Fupan Li
17d33868c2 Merge pull request #1670 from liubin/1668-remove-ProcessListContainer-API
remove ProcessListContainer API
2021-04-12 10:22:37 +08:00
Chelsea Mafrica
543f9da3ba runtime: Disable trace for healthcheck
With tracing enabled, grpc health check generates a large number of
spans which creates too much data for tasks running longer than a few
minutes. To solve this, remove span creation from kata agent check() and
sendReq() where the majority of the spans come from. Leave contexts in
functions for subsequent calls that create spans.

Fixes #1395

Signed-off-by: Chelsea Mafrica <chelsea.e.mafrica@intel.com>
2021-04-09 15:47:00 -07:00
Fabiano Fidêncio
4e62d596db Merge pull request #1675 from fidencio/2.1.0-alpha2-branch-bump
# Kata Containers 2.1.0-alpha2
2021-04-09 20:14:19 +02:00
Fabiano Fidêncio
4f164b5246 release: Kata Containers 2.1.0-alpha2
- release: Do not git add kata-{deploy,cleanup}.yaml for the tests repo
- kata-deploy: add runtimeclass that includes pod overhead
- release: automatically bump the version of the kata-deploy images
- Refine uevent matching conditions
- docs: update dev-guide to include fixes from 1.x
- virtcontainers: replace newStore by store in Sandbox struct
- agent: log the mount point if it is already mounted
- tools/agent-ctl: Update Cargo.lock
- agent: Rework the debug console
- oci: Update seccomp configuration
- kernel: update experimental kernel to 5.10.x
- kata-deploy: Fix `test-kata.sh` and do some small cleanups / improvements in the kata-deploy script
- github: Fix slash-command-action usage
- rustjail: fix the issue of missing default home env
- Make uevent watching mechanism more flexible
- ci/openshift-ci: Prepare to build on CentOS 8
- docs: update configuration for passing annotations in conatinerd
- Revert "github: Remove kata-deploy-test action"
- runtime: increase dial timeout
- qemu experimental: Move to latest tree on virtio-fs-dev (qemu 6.0 + DAX patches).
- github: Remove kata-deploy-test action
- agent: s390x statfs constants
- kernel: upgrade kernel to 5.10.x for arm64.
- Don't do anything in Pipestream::shutdown
- Fix fsgroup
- agent: Remove many "panic message is not string literal" warnings
- osbuilder: Update QAT Dockerfile with new QAT driver version
- osbuilder: update dockerfiles to utilize IMAGE_REGISTRY
- Only keep one VERSION file
- Dechat deruntime
- runtime: Format auto-generated client code for cloud-hypervisor API
- runtime: use concrete KataAgentConfig instead of interface type
- versions: Update cloud-hypervisor to release v0.14.1
- runtime: import runtime/v2/runc/options to decode request from Docker
- virtcontainers/fc: Upgrade Firecracker to v0.23.1
- docs: Remove ubuntu installation guide
- docs: Update snap install guide
- docs: update how-to-use-k8s-with-cri-containerd-and-kata.md
- Update install docs for Fedora and CentOS
- action: fix missing qemu tag
- Remove installation guides for SLE and openSUSE
- kernel: Enable OVERLAY_FS_{METACOPY,XINO_AUTO}
- versions: kernel 5.10.x
- virtcontainers: Fix missing contexts in s390x
- runtime: makefile allow override DAX value

11897248 release: Do not git add kata-{deploy,cleanup}.yaml for the tests repo
2b5f79d6 release: automatically bump the version of the kata-deploy images
8682d6b7 docs: update dev-guide to include fixes from 1.x
f444adb5 kata-cleanup: Explicitly add tag to the container image
12582c2f kata-deploy: add runtimeclass that includes pod overhead
d75fe956 virtcontainers: replace newStore by store in Sandbox struct
342eb765 tools/agent-ctl: Update Cargo.lock
24b0703f agent: fix test for the debug console
79033257 agent: async the debug console
8ea2ce9a agent/device: Remove legacy uevent matching
5d007743 agent/device: Refine uevent matching for pmem devices
9017e110 agent: start to rework the debug console
a59e07c1 agent/define: Refine uevent matching for virtio-scsi devices
484a3647 agent/device: Rework uevent handling for virtio-blk devices
7873b7a1 github: Fix slash-command-action usage
eda8da1e github: Revert "github: Remove kata-deploy-test action"
a938d903 rustjail: fix the issue of missing default home env
b0e4618e docs: update configuration for passing annotations in conatinerd
d43098ec kata-deploy: Adapt regex for testing kata-deploy
107ceca6 kernel: update experimental kernel to 5.10.x
ca4dccf9 release: Get rid of "master"
c2197cbf release: Use sudo to install hub
49eec920 agent: log the tag and mount point if it is already mounted
16f732fc ci/lib: Use git to clone the tests repository
9281e567 ci/openshift-ci: Add build root dockerfile
1cce9300 github: Remove kata-deploy-test action
0828f9ba agent/uevent: Introduce wait_for_uevent() helper
16ed55e4 agent/device: Use consistent matching for past and future uevents
4b16681d agent/uevent: Put matcher object rather than "device address" in watch list
b8b32248 agent/uevent: Consolidate event matching logic
d2caff6c agent: Re-organize uevent processing
55ed2ddd agent: Store uevent watchers in Vec rather than HashMap
91e0ef5c agent/uevent: Report whole Uevents to device watchers
36420054 agent: Store whole Uevent in map, rather than just /dev name
06162025 agent/device: Move GLOBAL_DEVICE_WATCHER into Sandbox
11ae32e3 agent/device: Fix path matching for PCI devices
4f608804 agent/device: Update test_get_device_name()
ee6a590d agent: add test test_pipestream_shutdown
4a2d4370 agent: don't do anything in Pipestream::shutdown
e3e670c5 agent/device: Forward port test for get_device_name() from Kata 1.x
ed08980f agent: Remove many "panic message is not string literal" warnings
f365bdb7 versions: qemu-experimental: 6.0~rc 470dd6
6491b9d7 qemu: Add support to build static qemu for dev tree
13653e7b runtime: increase dial timeout
935460e5 osbuilder: update dockerfiles to utilize IMAGE_REGISTRY
010d57f4 osbuilder: Update QAT Dockerfile with new QAT driver version
adb866ad kata-deploy: Adapt to the correct tag name
60adc7f0 VERSION: Use the correct form
a4c125a8 trace: move gRPC requests from debug to trace
50fff977 trace: move trace span chatter to trace rather than info
28bd8c11 kernel: upgrade kernel to 5.10.x for arm64.
6fe48329 runtime: use concrete KataAgentConfig instead of interface type
64939425 mount: fix the issue of missing set fsGroup
88e58a4f agent: fix the issue of missing pass fsGroup
572aff53 build: Only keep one VERSION file
0c38d9ec runtime: Fix the format of the client code of cloud-hypervisor APIs
52cacf88 runtime: Format auto-generated client code for cloud-hypervisor API
84b62dc3 versions: Update cloud-hypervisor to release v0.14.1
4a38ff41 docs: Update snap install guide
ede1ab86 docs: Remove ubuntu installation guide
6255cc19 virtcontainers/fc: Upgrade Firecracker to v0.23.1
2c47277c docs: update how-to-use-k8s-with-cri-containerd-and-kata.md
317f55f8 docs: Update minimum version for Fedora
1ce29fc9 docs: Update CentOS install docs
3f90561b docs: Update Fedora install docs
8a1c6c3f action: fix missing qemu tag
a9ff9c87 docs: Remove openSUSE installation guide
2888ceb0 docs: Remove SLE installation guide
09d454ac runtime: import runtime/v2/runc/options to decode request from Docker
0b502d15 runtime: makefile allow override DAX value
a65519b9 versions: keep using kernel 5.4.x for ARM
31ced01e virtcontainers: Fix missing contexts in s390x
52a276fb agent: Fix type for PROC_SUPER_MAGIC on s390x
5b7c8b7d agent: Update cgroups-rs to 0.2.5
c035cdb3 versions: kernel 5.10.x
660b0473 oci: Update seccomp configuration
8c1e0d30 kernel: Enable OVERLAY_FS_{METACOPY,XINO_AUTO}

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-09 17:56:29 +02:00
Fabiano Fidêncio
73aa74b4bb Merge pull request #1673 from fidencio/wip/release-dont-git-add-kata-deploy-cleanup-on-tests-repo
release: Do not git add kata-{deploy,cleanup}.yaml for the tests repo
2021-04-09 16:21:14 +02:00
Fabiano Fidêncio
1189724822 release: Do not git add kata-{deploy,cleanup}.yaml for the tests repo
I was, mistakenly, `git add`ing those files unconditionally.

Fixes: #1672

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-09 14:38:13 +02:00
Fabiano Fidêncio
43a9d4e90a Merge pull request #1666 from egernst/rc-overhead
kata-deploy: add runtimeclass that includes pod overhead
2021-04-09 12:44:41 +02:00
bin
421439c633 API: remove ProcessListContainer/ListProcesses
This commit will remove ProcessListContainer API from VCSandbox
and ListProcesses from agent.proto.

Fixes: #1668

Signed-off-by: bin <bin@hyper.sh>
2021-04-09 17:34:25 +08:00
Peng Tao
efd5d6f1fe Merge pull request #1667 from fidencio/wip/automatically-bump-kata-deploy-image-version
release: automatically bump the version of the kata-deploy images
2021-04-09 16:06:07 +08:00
David Gibson
0e04d6299b Merge pull request #1642 from dgibson/ueventplus
Refine uevent matching conditions
2021-04-09 13:10:52 +10:00
Fabiano Fidêncio
2b5f79d685 release: automatically bump the version of the kata-deploy images
Let's teach `update-repository-version.sh` to automatically bump the
version of the kata-deploy images to be used within that release, when
running against the `kata-containers` repo.

Fixes: #1665

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-09 00:31:27 +02:00
GabyCT
368ab486f6 Merge pull request #1662 from egernst/docs-cleanup
docs: update dev-guide to include fixes from 1.x
2021-04-08 15:43:43 -05:00
Eric Ernst
2334b858a0 Merge pull request #1661 from liubin/1660-replace-newStore-by-store
virtcontainers: replace newStore by store in Sandbox struct
2021-04-08 13:17:44 -07:00
Eric Ernst
8682d6b7ea docs: update dev-guide to include fixes from 1.x
This addresses a few gaps with respect to fixes in 1.x docs:
  - Cleanup QEMU information in order to drop references to qemu-lite
  - Make sure we include directions for debug console in case of QEMU

Fixes: #574

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-04-08 12:55:59 -07:00
Fabiano Fidêncio
f444adb51b kata-cleanup: Explicitly add tag to the container image
We have the tags explicitly set on kata-deploy, let's do the same for
kata-cleanup.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-08 21:43:59 +02:00
Eric Ernst
12582c2f6d kata-deploy: add runtimeclass that includes pod overhead
The overhead values may not be perfect, but this is a start, and a good
reference.

Fixes: #580

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-04-08 12:42:15 -07:00
bin
d75fe95685 virtcontainers: replace newStore by store in Sandbox struct
The property name make newcomers confused when reading code.
Since in Kata Containers 2.0 there will only be one type of store,
so it's safe to replace it by `store` simply.

Fixes: #1660

Signed-off-by: bin <bin@hyper.sh>
2021-04-08 23:59:16 +08:00
Eric Ernst
324b026a77 Merge pull request #1604 from wainersm/agent_mount-1
agent: log the mount point if it is already mounted
2021-04-08 08:26:12 -07:00
Eric Ernst
6d3053be4c Merge pull request #1656 from fidencio/wip/update-agent-ctl-cargo-lock
tools/agent-ctl: Update Cargo.lock
2021-04-08 08:25:17 -07:00
Fupan Li
521887db16 Merge pull request #1648 from Tim-Zhang/rework-debug-console
agent: Rework the debug console
2021-04-08 23:14:52 +08:00
Fabiano Fidêncio
342eb765c2 tools/agent-ctl: Update Cargo.lock
While performing a `make` in the top directory of the project, I've
noticed that agent-ctl's Cargo.lock file was not up-to-date.

Fixes: #1655

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-08 10:24:08 +02:00
Tim Zhang
24b0703fda agent: fix test for the debug console
Fix test for the debug console.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-04-08 14:57:40 +08:00
Tim Zhang
790332575b agent: async the debug console
Make the debug console in this commit.
Finish the rework of debug console.

Fixes: #1647

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-04-08 14:57:36 +08:00
David Gibson
8ea2ce9a31 agent/device: Remove legacy uevent matching
DevAddrMatcher existed purely as a transitional step as we refined the
uevent matching logic for each of the different device types we care about.
We've now done that, so it can be removed along with several related
pieces.

fixes #1628

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-08 12:30:18 +10:00
David Gibson
5d007743c1 agent/device: Refine uevent matching for pmem devices
Use the new uevent matching infrastructure to refine the matching for pmem
devices to something more pinned down to that device type.  While we're
there, fix a few anciliary problems with get_pmem_device_name():

- The name is poor - the *input* to this function is the expected device
  name, so the result isn't helpful, except that it needs to wait for the
  device to be ready in the guest.  Change it to wait_for_pmem_device() and
  explicitly check that the returned device name matches the one expected.
- Remove an incorrect comment in nvdimm_storage_handler() (the only caller)
  which appears to have been copied from the virtio-blk path, but then
  become stale.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-08 12:02:39 +10:00
James O. D. Hunt
9017e1100b agent: start to rework the debug console
It's the first commit of the rework.

Fixes: #1647

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-04-08 09:57:48 +08:00
David Gibson
a59e07c1f9 agent/define: Refine uevent matching for virtio-scsi devices
Current get_scsi_device_name() uses the legacy uevent matching which
isn't very precise.  This refines it to use a specific matcher
implementation.  While we're at it:

- No longer insist on the SCSI controller being under the PCI root.
  It generally will be, but there's no particular reason to require
  it.

The matcher still has a problem in that it won't work sensibly if
there are multiple SCSI busses in the guest.  Fixing that requires
changes on the runtime side as well, though, so it's beyond scope for
this change.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-08 11:13:00 +10:00
David Gibson
484a364729 agent/device: Rework uevent handling for virtio-blk devices
There are some problems with get_pci_device_name():

1) It's misnamed: in fact it is only used for handling virtio-blk PCI
   devices.  It's also only correct for virtio-blk devices, the event
   matching doesn't locate the "raw" PCI device, but rather the block
   device created by virtio-blk as a child of the PCI device itself.

2) The uevent matching is imprecise.  As all things using the legacy
   DevAddrMatcher, it matches on a bunch of conditions used across several
   different device types, not all of which make sense for virtio-blk pci
   devices specifically.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-08 11:13:00 +10:00
Eric Ernst
15c2d7ed30 Merge pull request #1400 from ManaSugi/update-oci-seccomp
oci: Update seccomp configuration
2021-04-07 15:18:19 -07:00
Carlos Venegas
ec36883fe3 Merge pull request #1638 from jcvenegas/2021-04-06/linux-5.10.x-qemu-experimental
kernel: update experimental kernel to 5.10.x
2021-04-07 16:02:44 -05:00
Wainer Moschetta
69dbcaa32b Merge pull request #1633 from fidencio/wip/fix-test-kata-deploy-script
kata-deploy: Fix `test-kata.sh` and do some small cleanups / improvements in the kata-deploy script
2021-04-07 16:13:17 -03:00
GabyCT
a374b007bd Merge pull request #1646 from fidencio/wip/fix-slash-command-action-usage
github: Fix slash-command-action usage
2021-04-07 10:17:04 -05:00
GabyCT
d922070c50 Merge pull request #1644 from lifupan/fix_env
rustjail: fix the issue of missing default home env
2021-04-07 10:16:07 -05:00
GabyCT
81bcded9a3 Merge pull request #1492 from dgibson/uevent
Make uevent watching mechanism more flexible
2021-04-07 10:15:33 -05:00
Fabiano Fidêncio
d2fda148fa Merge pull request #1637 from wainersm/openshift_ci_centos8
ci/openshift-ci: Prepare to build on CentOS 8
2021-04-07 15:37:41 +02:00
Fabiano Fidêncio
514ba369fd Merge pull request #1630 from liubin/1629-update-containerd-config
docs: update configuration for passing annotations in conatinerd
2021-04-07 15:36:21 +02:00
Fabiano Fidêncio
7873b7a1f9 github: Fix slash-command-action usage
`/test-kata-deploy` command does **not** work, and the output returned
is:
```
Error: Comment didn't contain a valid slash command
```

So, why does this happen?

This is the regex used: `^\/([\w]+)\b *(.*)?$`, being the important part
of the command "\/([\w]+)\b", with the rest being arguments to it.
Okay, `\w` is the key here, as `\w` means: a-z, A-Z, 0-9, including the
_.

Our command is `/test-kata-deploy`, and `-` is not present as part of
`\w`.  Knowing this we need to update the command to something like:
`/test_kata_deploy`

Fixes: #1645

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-07 14:46:21 +02:00
Fabiano Fidêncio
b09fda36bd Merge pull request #1641 from fidencio/wip/re-add-kata-deploy-test-action
Revert "github: Remove kata-deploy-test action"
2021-04-07 13:18:25 +02:00
Fabiano Fidêncio
eda8da1ec5 github: Revert "github: Remove kata-deploy-test action"
This partially reverts commit 1cce930071.

As mentioned in #1635, the malformed yaml wouldn't allow us to actually
test changes that were supposed to be test by this action.

So, this is now reverted and adapted accordingly.

Main differences from what we had before:
* As it tests kata-deploy itself, not the statically built binaries,
  let's just use the binaries from 2.0.0 release;
* Adapt download and deploy location to the
  `kata-containers/kata-containers` repo, as the original action was
  based on 1.x repos;

Fixes: #1640

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-07 12:07:30 +02:00
fupan.lfp
a938d90310 rustjail: fix the issue of missing default home env
first get the "HOME" env from "/etc/passwd", if
there's no corresponding uid entry in /etc/passwd,
then set "/" as the home env.

Fixes: #1643

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-04-07 15:11:28 +08:00
bin
b0e4618e84 docs: update configuration for passing annotations in conatinerd
Using "io.containerd.kata.v2" instead of deprecated "io.containerd.runc.v1".

Fixes: #1629

Signed-off-by: bin <bin@hyper.sh>
2021-04-07 10:16:02 +08:00
Fabiano Fidêncio
d43098ec21 kata-deploy: Adapt regex for testing kata-deploy
On commit 60f6315 we've started adding the specific version of the image
to be used, in order to ensure people using our content from a tarball
would be relying on the correct image.

However, later on, @bergwolf figured out it had some undesired side
effects, such as
https://github.com/kata-containers/kata-containers/runs/2235812941?check_suite_focus=true

What happens there is that the regular expression used to point the
image to a testing one doesn't take into consideration the $VERSION, and
that breaks the deployment.

Depends-on: github.com/kata-containers/kata-containers#1641
Fixes: #1632

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-06 23:41:24 +02:00
GabyCT
0b87fd436f Merge pull request #1544 from snir911/timeout
runtime: increase dial timeout
2021-04-06 16:10:51 -05:00
Carlos Venegas
84e453a643 Merge pull request #1625 from jcvenegas/2021-04-05/qemu-experimental-6.0
qemu experimental: Move to latest tree on virtio-fs-dev (qemu 6.0 + DAX patches).
2021-04-06 15:29:05 -05:00
Carlos Venegas
107ceca680 kernel: update experimental kernel to 5.10.x
Relevant changes for experimental :

42d3e2d04 virtiofs: calculate number of scatter-gather elements accurately
413daa1a3 fuse: connection remove fix
bf109c640 fuse: implement crossmounts
1866d779d fuse: Allow fuse_fill_super_common() for submounts
fcee216be fuse: split fuse_mount off of fuse_conn
8f622e949 fuse: drop fuse_conn parameter where possible
24754db27 fuse: store fuse_conn in fuse_req
c6ff213fe fuse: add submount support to <uapi/linux/fuse.h>
d78092e49 fuse: fix page dereference after free
9a752d18c virtiofs: add logic to free up a memory range
d0cfb9dcb virtiofs: maintain a list of busy elements
6ae330cad virtiofs: serialize truncate/punch_hole and dax fault path
9483e7d58 virtiofs: define dax address space operations
2a9a609a0 virtiofs: add DAX mmap support
c2d0ad00d virtiofs: implement dax read/write operations
ceec02d43 virtiofs: introduce setupmapping/removemapping commands
fd1a1dc6f virtiofs: implement FUSE_INIT map_alignment field
45f2348ec virtiofs: keep a list of free dax memory ranges
1dd539577 virtiofs: add a mount option to enable dax
22f3787e9 virtiofs: set up virtio_fs dax_device
f4fd4ae35 virtiofs: get rid of no_mount_options
b43b7e81e virtiofs: provide a helper function for virtqueue initialization

Fixes: #1639

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-04-06 19:06:09 +00:00
Fabiano Fidêncio
ca4dccf980 release: Get rid of "master"
We don't use the "master" branch for anything in
`kata-containers/kata-containers`.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-06 20:28:42 +02:00
Fabiano Fidêncio
c2197cbf2b release: Use sudo to install hub
This doesn't make much difference for the automated process we have in
place, but makes a whole lot of difference for those trying to have the
binaries deployed locally.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-06 20:28:42 +02:00
Fabiano Fidêncio
dc373d0161 Merge pull request #1635 from fidencio/wip/remove-kata-deploy-test-action
github: Remove kata-deploy-test action
2021-04-06 20:27:54 +02:00
Wainer dos Santos Moschetta
49eec92038 agent: log the tag and mount point if it is already mounted
On commit 17e9a2cff5 it was introduced a guard for the case the mount point is already
mounted. Instead of log only the mount tag ("kataShared") with this change it will print
both tag and mount point path.

Fixes: #1398
Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-04-06 14:14:59 -04:00
Wainer dos Santos Moschetta
16f732fc18 ci/lib: Use git to clone the tests repository
On clone_tests_repo() use git instead of `go get` to clone and/or
update the tests repository.

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-04-06 13:44:02 -04:00
Wainer dos Santos Moschetta
9281e56705 ci/openshift-ci: Add build root dockerfile
This adds the dockerfile which is used by the OpenShift CI operator to build
the build root image. It is installed git as it is required by the operator
to clone repositories. The sudo package is also installed because many scripts
relies on the command but it is not installed by tests/.ci/setup_env_centos.sh.

Fixes #1636

Signed-off-by: Wainer dos Santos Moschetta <wainersm@redhat.com>
2021-04-06 13:44:02 -04:00
Fabiano Fidêncio
1cce930071 github: Remove kata-deploy-test action
Currently the action is not running because it's broken, and it was
broken by 50fea9f.

Sadly, I cannot just test a fix on a PR as every single time we end up
triggering what's currently on main, rather than triggering the content
of the PR itself.

With this in mind, let me just remove the file and re-add it as part of
a new PR and, hopefully, have it tested in this way.

Sorry for the breakage, by the way.

Fixes: #1634

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-06 19:01:53 +02:00
GabyCT
aac852a0bc Merge pull request #1561 from Jakob-Naucke/s390x-statfs-constants
agent: s390x statfs constants
2021-04-06 11:11:40 -05:00
GabyCT
201ad249c2 Merge pull request #1597 from jongwu/PA
kernel: upgrade kernel to 5.10.x for arm64.
2021-04-06 09:44:16 -05:00
David Gibson
0828f9ba70 agent/uevent: Introduce wait_for_uevent() helper
get_device_name() contains logic to wait for a specific uevent, then
extract the /dev node name from it.  In future we're going to want similar
logic to wait on uevents, but using different match criteria, or getting
different information out.

To simplify this, add a wait_for_uevent() helper in the uevent module,
which takes an explicit UeventMatcher object and returns the whole uevent
found.

To make testing easier, we also extract the cut down uevent watcher from
test_get_device_name() into a new spawn_test_watcher() helper.  Its used
for both test_get_device_name() and a new test_wait_for_uevent() amd will
be useful for more tests in future.

fixes #1484

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 21:14:52 +10:00
David Gibson
16ed55e440 agent/device: Use consistent matching for past and future uevents
get_device_name() looks at kernel uevents to work out the device name for
a given PCI (usually) address.  However, when we call it we can't know if
the uevent we're interested in has already happened (in which case it will
have been recorded in Sandbox::uevent_map) or yet to come, in which case
we need to register to watch it.

However, we currently match differently against past and future events.
For past events we simply look for a sysfs path including the address, but
for future events we use a complex bit of logic in the is_match() closure.
Change it to use the exact same matching logic in both cases.

fixes #1397

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 21:14:33 +10:00
David Gibson
4b16681d87 agent/uevent: Put matcher object rather than "device address" in watch list
Currently, Sandbox::uevent_watchers lists uevents to watch for by a
"device address" string.  This is not very clearly defined, and is
matched against events with a rather complex closure created in
Uevent::process_add().

That closure makes a bunch of fragile assumptions about what sort of
events we could ever be interested in.  In some ways it is too
restrictive (requires everything to be a block device), but in others
is not restrictive enough (allows things matching NVDIMM paths, even
if we're looking for a PCI block device).

To allow the clients more precise control over uevent matching, we
define a new UeventMatcher trait with a method to match uevents.  We
then have the atchers list include UeventMatcher trait objects which
are used directly by Uevent::process_add(), instead of constructing
our match directly from dev_addr.

For now we don't actually change the matching function, or even use
multiple different trait implementations, but we'll refine that in
future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 21:14:18 +10:00
David Gibson
b8b322482c agent/uevent: Consolidate event matching logic
The event matching logic in Uevent::process_add() is split into two parts.
The first checks if we care about the event at all, the second checks
whether the event is relevant to a particular watcher.

However, we're going to be adding more types of watchers in future, which
will make the global filter too restrictive.  Fold the two bits of logic
together into a per-watcher filter function.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:59:43 +10:00
David Gibson
d2caff6c55 agent: Re-organize uevent processing
Uevent::process() is a bit oddly organized.  It treats the onlining of
hotplugged memory as the "default" case, although that's quite specific,
while treating the handling of hotplugged block devices more like a special
case, although that's pretty close to being very general.

Furthermore splitting Uevent::is_block_add_event() from
Uevent::handle_block_add_event() doesn't make a lot of sense, since their
logic is intimately related to each other.

Alter the code to be a bit more sensible: first split on the "action" type
since that's the most fundamental difference, then handle the memory
onlining special case, then the block device add (which will become a lot
more general in future changes).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:59:20 +10:00
David Gibson
55ed2ddd07 agent: Store uevent watchers in Vec rather than HashMap
Sandbox:dev_watcher is a HashMap from a "device address" to a channel used
to notify get_device_name() that a suitable uevent has been found.
However, "device address" isn't well defined, having somewhat different
meanings for different device/event types.  We never actually look up this
HashMap by key, except to remove entries.

Not looking up by key suggests that a map is not the appropriate data
structure here.  Furthermore, HashMap imposes limitations on the types
which will prevent some future extensions we want.

So, replace the HashMap with a Vec<Option<>>.  We need the Option<> so that
we can remove entries by index (removing them from the Vec completely would
hange the indices of other entries, possibly breaking concurrent work.

This does mean that the vector will keep growing as we watch for different
events during startup.  However, we don't expect the number of device
events we watch for during a run to be very large, so that shouldn't be
a problem.  We can optimize this later if it becomes a problem.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:59:19 +10:00
David Gibson
91e0ef5c90 agent/uevent: Report whole Uevents to device watchers
Currently, when Uevent::handle_block_add_event() receives an event matching
a registered watcher, it reports the /dev node name from the event back
to the watcher.

This changes it to report the entire uevent, not just the /dev node name.
This will allow various future extensions.  It also makes the client side
of the uevent watching - get_device_name() - more consistent between its
two paths: finding a past uevent in Sandbox::uevent_map() or waiting for
a new uevent via a watcher.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:58:47 +10:00
David Gibson
3642005479 agent: Store whole Uevent in map, rather than just /dev name
Sandbox::pci_device_map contains a mapping from sysfs paths to /dev entries
which is used by get_device_name() to look up the right /dev node.  But,
the map only supplies the answer if the uevent for the device has already
been received, otherwise get_device_name() has to wait for it.

However the matching for already-received and yet-to-come uevents isn't
quite the same which makes the whole system fragile.

In order to make sure the matching for both cases is identical, we need the
already-received side to store the whole uevent to match against, not just
the sysfs path and device name.

So, rename pci_device_map to uevent_map and store the whole uevent there
verbatim.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:58:47 +10:00
David Gibson
0616202580 agent/device: Move GLOBAL_DEVICE_WATCHER into Sandbox
In Kata 1.x, both the sysToDevMap and the deviceWatchers are in the sandbox
structure.  For some reason in Kata 2.x, the device watchers have moved to
a separate global variable, GLOBAL_DEVICE_WATCHER.

This is a bad idea: apart from introducing an extra global variable
unnecessarily, it means that Sandbox::pci_device_map and
GLOBAL_DEVICE_WATCHER are protected by separate mutexes.  Since the
information in these two structures has to be kept in sync with each other,
it makes much more sense to keep them both under the same single Sandbox
mutex.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:58:45 +10:00
David Gibson
11ae32e3c0 agent/device: Fix path matching for PCI devices
For the case of virtio-blk PCI devices, when matching uevents we create
a pci_p temporary.  However, we build it incorrectly: the dev_addr values
we use for PCI devices are a relative sysfs paths from the PCI root to the
device in question *including an initial /*.  But when we construct pci_p
we add an extra /, meaning the resulting path will *not* match properly.

AFAICT the only reason we got away with this is because in practice the
virtio-blk devices where discovered by the kernel before we looked for them
meaning the loosed matching in get_device_name() was used, rather than the
pci_p logic in handle_block_add_event().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:58:06 +10:00
David Gibson
4f60880414 agent/device: Update test_get_device_name()
The current test_get_device_name(), ported from Kata 1.x doesn't really
reflect how the function is used in practice.  The example path appears
to be for a virtio-blk device, but it's an s390 specific variant, not a
PCI device.  The s390 form isn't actually supported by any of the existing
users of get_device_name().

Change it to a plausible virtio-blk-pci style path to better test how
get_device_name() will actually be used in practice.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 20:49:48 +10:00
Bin Liu
117c59150d Merge pull request #1613 from Tim-Zhang/pipestream-shutdown-do-nothing
Don't do anything in Pipestream::shutdown
2021-04-06 14:03:00 +08:00
Tim Zhang
ee6a590db1 agent: add test test_pipestream_shutdown
Make sure PipeStream::shutdown() do not close the inner fd.

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-04-06 11:44:56 +08:00
Tim Zhang
4a2d437043 agent: don't do anything in Pipestream::shutdown
The only right way to shutdown pipe is drop it
Otherwise PipeStream will conflict with its twins
Because they both have the same fd, and both registered.

Fixes: #1614

Signed-off-by: Tim Zhang <tim@hyper.sh>
2021-04-06 11:44:38 +08:00
Peng Tao
d5600641dd Merge pull request #1603 from lifupan/fix_fsgroup
Fix fsgroup
2021-04-06 11:35:03 +08:00
David Gibson
e3e670c56f agent/device: Forward port test for get_device_name() from Kata 1.x
Kata 1.x had a testcase for the equivalent getDeviceName function in Go,
this adapts it to Rust and adds it to Kata 2.x.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 13:29:37 +10:00
David Gibson
b980965c6b Merge pull request #1627 from dgibson/bug1626
agent: Remove many "panic message is not string literal" warnings
2021-04-06 13:29:11 +10:00
David Gibson
ed08980fc1 agent: Remove many "panic message is not string literal" warnings
Rust 1.51 appears to have added a new warning in anticipation of Rust 2021,
which requires the format string for panic!()s (including via the various
assert!() macros) to be a string literal.  This triggers quite a few times
in the agent code.  This patch fixes them.

fixes #1626

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-04-06 11:51:34 +10:00
Carlos Venegas
f365bdb7cf versions: qemu-experimental: 6.0~rc 470dd6
Move to next 6.0 dev tree for qemu experimental,
the qemu version is the same base as:

https://gitlab.com/virtio-fs/qemu/-/commits/virtio-fs-dev/

Using qemu 6.0-rc1 some patches does not apply.

Fixes: #1624

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-04-05 21:01:58 +00:00
Carlos Venegas
6491b9d7aa qemu: Add support to build static qemu for dev tree
Update static build scripts to allow build qemu dev tree.
When qemu starts the process for a new version the patch number
from the qemu version is more than 50. Add logic to detect it
and not apply patches fro the base branch.

For example:

Qemu 5.2.50 means the beginning for 6.0 development. After detect a
development version, patches for 5.2.x will not be applied.

Signed-off-by: Carlos Venegas <jos.c.venegas.munoz@intel.com>
2021-04-05 20:55:55 +00:00
GabyCT
503039482b Merge pull request #1620 from eadamsintel/update-qat-dockerfile
osbuilder: Update QAT Dockerfile with new QAT driver version
2021-04-05 09:51:14 -05:00
Snir Sheriber
13653e7b55 runtime: increase dial timeout
On some setups, starting multiple kata pods (qemu) simultaneously on the same node
might cause kata VMs booting time to increase and the pods to fail with:
Failed to check if grpc server is working: rpc error: code = DeadlineExceeded desc = timed
out connecting to vsock 1358662990:1024: unknown

Increasing default dialing timeout to 30s should cover most cases.

Signed-off-by: Snir Sheriber <ssheribe@redhat.com>
Fixes: #1543
2021-04-04 09:37:38 +03:00
Eric Ernst
d44412fe24 Merge pull request #1623 from egernst/custom-image-registry
osbuilder: update dockerfiles to utilize IMAGE_REGISTRY
2021-04-02 14:18:36 -07:00
Chelsea Mafrica
17b1452c2a Merge pull request #1607 from fidencio/wip/only-keep-one-VERSION-file
Only keep one VERSION file
2021-04-02 11:14:12 -07:00
Eric Ernst
935460e549 osbuilder: update dockerfiles to utilize IMAGE_REGISTRY
While we introduced IMAGE_REGISTRY, we didn't actually update the
corresponding Dockerfiles to utilize it. Let's add

Fixes: #1622

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-04-02 09:46:16 -07:00
Eric Adams
010d57f4e2 osbuilder: Update QAT Dockerfile with new QAT driver version
This fixes the QAT driver version and provides a check early in the
building process to make sure the driver exists. It also provides
hints to users on how to fix themselves if the driver changes again.

Fixes: #1618

Signed-off-by: Eric Adams <eric.adams@intel.com>
2021-04-01 19:20:44 +00:00
Fabiano Fidêncio
adb866ad64 kata-deploy: Adapt to the correct tag name
Use 2.1.0-alpha1 instead of 2.1-alpha1

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-01 20:45:30 +02:00
Fabiano Fidêncio
60adc7f02b VERSION: Use the correct form
Following what's been done in the past for 1.x repos, the version should
be 2.1.0-alpha1 (instead of 2.1-alpha1).

Fixes: #1617

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-04-01 20:43:34 +02:00
Bo Chen
1511d966aa Merge pull request #1616 from egernst/dechat-deruntime
Dechat deruntime
2021-04-01 11:02:27 -07:00
Chelsea Mafrica
4a3282cf1a Merge pull request #1608 from likebreath/0331/go_fmt_clh_clinet_code
runtime: Format auto-generated client code for cloud-hypervisor API
2021-04-01 10:39:02 -07:00
Eric Ernst
a4c125a8b9 trace: move gRPC requests from debug to trace
There are many requests to the agent that happen with relatively
high frequency when a workload is running (checkRequest, as an example).

Let's move from Debug to Trace to avoid bombarding journal.

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-04-01 09:03:26 -07:00
Eric Ernst
50fff97753 trace: move trace span chatter to trace rather than info
No human should ever read that ouptut. Let's at least move it to trace for now.

Fixes: #1615

Signed-off-by: Eric Ernst <eric.g.ernst@gmail.com>
2021-04-01 09:02:56 -07:00
Fupan Li
5524bc806b Merge pull request #1612 from liubin/1610/use-concrete-kata-agent-config-type
runtime: use concrete KataAgentConfig instead of interface type
2021-04-01 21:26:38 +08:00
Jianyong Wu
28bd8c1110 kernel: upgrade kernel to 5.10.x for arm64.
In kernel 5.10.x on arm64 side, When CONFIG_RANDOM_BASE is enabled,
physical base address can be a negative number. It may lead to bug
when a PA is taken as a unsigned number in comparison, as PA is
calculated based on the physical base address. The bug has been fixed
latest code by commit ee7febce051945be2 in memory hotplug zone. We can
eliminate the bug in an easy way by casting the PA as a signed value in
the current code base to avoid lots of backport.

Depends-on: github.com/kata-containers/tests#3388
Fixes: #1596
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-04-01 17:01:44 +08:00
bin
6fe48329b5 runtime: use concrete KataAgentConfig instead of interface type
Kata Containers 2.0 only have one type of agent, so there is no
need to use interface as config's type

Fixes: #1610

Signed-off-by: bin <bin@hyper.sh>
2021-04-01 13:44:45 +08:00
fupan.lfp
6493942568 mount: fix the issue of missing set fsGroup
For k8s emptyDir volume, a specific fsGroup would
be set for it, thus guest should get this fsGroup
from runtime and set it properly on the emptyDir volume
in guest.

Fixes: #1580

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-04-01 11:33:26 +08:00
fupan.lfp
88e58a4f4b agent: fix the issue of missing pass fsGroup
For k8s emptyDir volume, a specific fsGroup would
be set for it, thus runtime should pass this fsGroup
to guest and set it properly on the emptyDir volume
in guest.

Fixes: #1580

Signed-off-by: fupan.lfp <fupan.lfp@antfin.com>
2021-04-01 11:33:18 +08:00
Fabiano Fidêncio
572aff53e8 build: Only keep one VERSION file
Instead of having different VERSION files spread accross the project,
let's always use the one in the topsrcdir and remove all the others,
keeping only a synlink to the topsrcdir one.

Fixes: #1579

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-31 23:51:20 +02:00
Bo Chen
0c38d9ecc4 runtime: Fix the format of the client code of cloud-hypervisor APIs
Regenerate the client code with the added `go-fmt` step. No functional
changes.

Fixes: #1606

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-03-31 14:41:44 -07:00
Bo Chen
52cacf8838 runtime: Format auto-generated client code for cloud-hypervisor API
This patch extends the current process of generating client code for
cloud-hypervisor API with an additional step, `go-fmt`, which will remove
the generated `client/go.mod` file and format all auto-generated code.

Fixes: #1606

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-03-31 14:36:24 -07:00
Eric Ernst
c0c7bef2b8 Merge pull request #1592 from likebreath/0330/versions_clh_v0.14.0
versions: Update cloud-hypervisor to release v0.14.1
2021-03-31 12:39:35 -07:00
Fabiano Fidêncio
a3d8554ab9 Merge pull request #1577 from liubin/feature/1576-import-runc-v2-options-types
runtime: import runtime/v2/runc/options to decode request from Docker
2021-03-31 20:35:24 +02:00
Fabiano Fidêncio
a6a53698c1 Merge pull request #1519 from nubificus/fc-v0.23.1
virtcontainers/fc: Upgrade Firecracker to v0.23.1
2021-03-31 20:34:25 +02:00
Bo Chen
84b62dc3b1 versions: Update cloud-hypervisor to release v0.14.1
Highlights for cloud-hypervisor version 0.14.0 include: 1) Structured
event monitoring; 2) MSHV improvements; 3) Improved aarch64 platform; 4)
Updated hotplug documentation; 6) PTY control for serial and
virtio-console; 7) Block device rate limiting; 8) Plan to deprecate the
support of "LinuxBoot" protocol and support PVH protocol only.

Highlights for cloud-hypervisor version 0.13.0 include: 1) Wider VFIO
device support; 2) Improve huge page support; 3) MACvTAP support; 4) VHD
disk image support; 5) Improved Virtio device threading; 6) Clean
shutdown support via synthetic power button.

Details can be found:
https://github.com/cloud-hypervisor/cloud-hypervisor/releases

Note: The client code of cloud-hypervisor's OpenAPI is automatically
generated by `openapi-generator` [1-2]. As the API changes do not
impact usages in Kata, no additional changes in kata's runtime are
needed to work with the latest version of cloud-hypervisor.

[1] https://github.com/OpenAPITools/openapi-generator
[2] https://github.com/kata-containers/kata-containers/blob/main/src/runtime/virtcontainers/pkg/cloud-hypervisor/README.md

Fixes: #1591

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-03-31 11:09:47 -07:00
Carlos Venegas
ed2e736df7 Merge pull request #1589 from fidencio/wip/update-install-docs-for-ubuntu
docs: Remove ubuntu installation guide
2021-03-31 11:54:34 -06:00
Carlos Venegas
0e7af7f27f Merge pull request #1602 from fidencio/wip/update-install-for-snap
docs: Update snap install guide
2021-03-31 10:52:48 -06:00
Fabiano Fidêncio
4a38ff41f0 docs: Update snap install guide
As this repo is specific to the kata-containers 2.x, let's stop
mentioning / referring to the 1.x here, including how to setup and use
the snap package for 1.x.

Fixes: #1601

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-31 15:26:47 +02:00
Fabiano Fidêncio
ede1ab8670 docs: Remove ubuntu installation guide
The installation guide points to 1.x packages from OBS.  For 2.x we
decided to stop building packages on OBS in favour of advertising
kata-deploy.

Apart from this, Ubuntu itself doesn't provide packages for
kata-containers.

Fixes: #1588

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-31 14:49:41 +02:00
Tim Zhang
3cc27610ab Merge pull request #1354 from liubin/fix/1325-update-doc-for-using-k8s
docs: update how-to-use-k8s-with-cri-containerd-and-kata.md
2021-03-31 19:19:00 +08:00
Orestis Lagkas Nikolos
6255cc1959 virtcontainers/fc: Upgrade Firecracker to v0.23.1
This patch upgrades Firecracker version from v0.21.1 to v0.23.1

* Generate swagger models for v0.23.1 (from firecracker.yaml)
* Change uint64 types in TokenBucket object according to rate-limiter
implementation (introduced in commit #cfeb966)
* Update Firecracker Logger/Metrics to support the new API
* Update payload in fc.vmRunning to support the new API
* Add Metrics type to fcConfig

Fixes: #1518

Signed-off-by: Orestis Lagkas Nikolos <olagkasn@nubificus.co.uk>
2021-03-31 04:55:40 -05:00
Fabiano Fidêncio
9c8e95c820 Merge pull request #1584 from fidencio/wip/update-install-docs-for-fedora-and-centos
Update install docs for Fedora and CentOS
2021-03-31 11:31:11 +02:00
bin
2c47277ca1 docs: update how-to-use-k8s-with-cri-containerd-and-kata.md
Update how-to-use-k8s-with-cri-containerd-and-kata.md to fit the latest
Kubernetes way.
And also changed CNI plugin from flannel to bridge, that will be easy to run.

Fixes: #1325

Signed-off-by: bin <bin@hyper.sh>
2021-03-31 17:10:39 +08:00
Fabiano Fidêncio
317f55f89e docs: Update minimum version for Fedora
The minimum version where everything was running out-of-the-box, for 2.x
package, is Fedora 34.

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-31 10:34:27 +02:00
Fabiano Fidêncio
1ce29fc959 docs: Update CentOS install docs
There are two changes here.  There first one being relying on the
`centos-release-advanced-virtualization` package instead providing the
content of the repo ourselves; and the second one being installing
`kata-containers` (2.x) instead of the `kata-runtime` one (1.x).

Fixes: #1583

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-31 10:01:03 +02:00
Fabiano Fidêncio
3f90561bf1 docs: Update Fedora install docs
The package to be installed on Fedora is `kata-containers` instead of
`kata-runtime`.  The difference being `kata-runtime` is the 1.x package,
while `kata-containers` is the 2.x one.

Fixes: #1582

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-03-31 10:01:03 +02:00
bin
09d454ac74 runtime: import runtime/v2/runc/options to decode request from Docker
Shimv2 protocol CreateTaskRequest.Options has a type of *google_protobuf.Any.
If the call is from Docker, to decode the request,
the proto types(github.com/containerd/containerd/runtime/v2/runc/options)
should be imported.

Fixes: #1576

Signed-off-by: bin <bin@hyper.sh>
2021-03-30 19:44:00 +08:00
Jakob Naucke
1366f0fb9c cli: Use genericGetExpectedHostDetails on s390x
getExpectedHostDetails did not offload any work to
genericGetExpectedHostDetails on s390x. By using that function, much
redundant code can be saved. This also resolves 2 issues with the
previous version:

- The number of CPUs was not calculated.
- vcUtils.SupportsVsocks() still used the Kata v1 signature.

Fixes: #1564

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-03-29 17:58:16 +02:00
Jakob Naucke
52a276fbdb agent: Fix type for PROC_SUPER_MAGIC on s390x
statfs f_types are long on most architectures, but not on s390x, where
they are uint. Following the fix in rust-lang/libc at
https://github.com/rust-lang/libc/pull/1999, the custom defined
PROC_SUPER_MAGIC must be updated in a similar way.

Fixes: #1204

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-03-29 17:25:19 +02:00
Jakob Naucke
5b7c8b7d26 agent: Update cgroups-rs to 0.2.5
to pull in the chain of https://github.com/rust-lang/libc/pull/1999,
https://github.com/nix-rust/nix/pull/1372, and
https://github.com/kata-containers/cgroups-rs/pull/38. This adds statfs
constants on s390x. cgroups-rs 0.2.4 also contains this fix, but let's
move to the latest 0.2.5 right away.

Fixes: #1204

Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-03-29 17:25:14 +02:00
Manabu Sugimoto
660b047306 oci: Update seccomp configuration
Seccomp configuration should be updated to prepare for the future seccomp support based on the latest OCI specification.

Add:
- flags which is used with seccomp(2) in struct LinuxSeccomp
- errnoRet which is errno return code in struct LinuxSyscall
- some new seccomp actions and an architecture

Fixes: #1391

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
2021-02-11 22:37:02 +09:00
316 changed files with 14373 additions and 6500 deletions

View File

@@ -1,9 +1,12 @@
on: issue_comment
on:
issue_comment:
types: [created, edited]
name: test-kata-deploy
jobs:
check_comments:
if: ${{ github.event.issue.pull_request }}
types: [created, edited]
runs-on: ubuntu-latest
steps:
- name: Check for Command
@@ -11,7 +14,7 @@ jobs:
uses: kata-containers/slash-command-action@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
command: "test-kata-deploy"
command: "test_kata_deploy"
reaction: "true"
reaction-type: "eyes"
allow-edits: "false"
@@ -19,6 +22,7 @@ jobs:
- name: verify command arg is kata-deploy
run: |
echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
create-and-test-container:
needs: check_comments
runs-on: ubuntu-latest
@@ -29,22 +33,26 @@ jobs:
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
echo "reference for PR: " ${ref}
echo "##[set-output name=pr-ref;]${ref}"
- uses: actions/checkout@v2-beta
- name: check out
uses: actions/checkout@v2
with:
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
- name: build-container-image
id: build-container-image
run: |
PR_SHA=$(git log --format=format:%H -n1)
VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/main/VERSION)
VERSION="2.0.0"
ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
wget "${ARTIFACT_URL}" -O tools/packaging/kata-deploy/kata-static.tar.xz
docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./tools/packaging/kata-deploy
docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
docker push katadocker/kata-deploy-ci:$PR_SHA
echo "##[set-output name=pr-sha;]${PR_SHA}"
- name: test-kata-deploy-ci-in-aks
uses: ./kata-deploy/action
uses: ./tools/packaging/kata-deploy/action
with:
packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
env:

View File

@@ -12,6 +12,9 @@ on:
- reopened
- labeled
- unlabeled
pull_request:
branches:
- main
jobs:
check-pr-porting-labels:

View File

@@ -33,5 +33,5 @@ jobs:
snap_file="kata-containers_${snap_version}_amd64.snap"
# Upload the snap if it exists
if [ -f ${snap_file} ]; then
snapcraft upload --release=candidate ${snap_file}
snapcraft upload --release=stable ${snap_file}
fi

View File

@@ -1 +1 @@
2.1-alpha1
2.2.0-alpha0

View File

@@ -18,7 +18,9 @@ function install_yq() {
GOPATH=${GOPATH:-${HOME}/go}
local yq_path="${GOPATH}/bin/yq"
local yq_pkg="github.com/mikefarah/yq"
[ -x "${GOPATH}/bin/yq" ] && return
local yq_version=3.4.1
[ -x "${GOPATH}/bin/yq" ] && [ "`${GOPATH}/bin/yq --version`"X == "yq version ${yq_version}"X ] && return
read -r -a sysInfo <<< "$(uname -sm)"
@@ -56,8 +58,6 @@ function install_yq() {
die "Please install curl"
fi
local yq_version=3.4.1
## NOTE: ${var,,} => gives lowercase value of var
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
curl -o "${yq_path}" -LSsf "${yq_url}"

View File

@@ -7,16 +7,25 @@ export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
export tests_repo_dir="$GOPATH/src/$tests_repo"
export branch="${branch:-main}"
# Clones the tests repository and checkout to the branch pointed out by
# the global $branch variable.
# If the clone exists and `CI` is exported then it does nothing. Otherwise
# it will clone the repository or `git pull` the latest code.
#
clone_tests_repo()
{
if [ -d "$tests_repo_dir" -a -n "$CI" ]
then
return
if [ -d "$tests_repo_dir" ]; then
[ -n "$CI" ] && return
pushd "${tests_repo_dir}"
git checkout "${branch}"
git pull
popd
else
git clone -q "https://${tests_repo}" "$tests_repo_dir"
pushd "${tests_repo_dir}"
git checkout "${branch}"
popd
fi
go get -d -u "$tests_repo" || true
pushd "${tests_repo_dir}" && git checkout "${branch}" && popd
}
run_static_checks()

View File

@@ -0,0 +1,9 @@
# Copyright (c) 2021 Red Hat, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This is the build root image for Kata Containers on OpenShift CI.
#
FROM centos:8
RUN yum -y update && yum -y install git sudo wget

View File

@@ -1,54 +1,55 @@
* [Warning](#warning)
* [Assumptions](#assumptions)
* [Initial setup](#initial-setup)
* [Requirements to build individual components](#requirements-to-build-individual-components)
* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
* [Check hardware requirements](#check-hardware-requirements)
* [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
* [Enable full debug](#enable-full-debug)
* [debug logs and shimv2](#debug-logs-and-shimv2)
* [Enabling full `containerd` debug](#enabling-full-containerd-debug)
* [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
* [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
* [journald rate limiting](#journald-rate-limiting)
* [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
* [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
* [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
* [Get the osbuilder](#get-the-osbuilder)
* [Create a rootfs image](#create-a-rootfs-image)
* [Create a local rootfs](#create-a-local-rootfs)
* [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
* [Build a rootfs image](#build-a-rootfs-image)
* [Install the rootfs image](#install-the-rootfs-image)
* [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
* [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
* [Build an initrd image](#build-an-initrd-image)
* [Install the initrd image](#install-the-initrd-image)
* [Install guest kernel images](#install-guest-kernel-images)
* [Install a hypervisor](#install-a-hypervisor)
* [Build a custom QEMU](#build-a-custom-qemu)
* [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
* [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
* [Appendices](#appendices)
* [Checking Docker default runtime](#checking-docker-default-runtime)
* [Set up a debug console](#set-up-a-debug-console)
* [Simple debug console setup](#simple-debug-console-setup)
* [Enable agent debug console](#enable-agent-debug-console)
* [Connect to debug console](#connect-to-debug-console)
* [Traditional debug console setup](#traditional-debug-console-setup)
* [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
* [Build the debug image](#build-the-debug-image)
* [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
* [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
* [Create a container](#create-a-container)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Obtain details of the image](#obtain-details-of-the-image)
* [Capturing kernel boot logs](#capturing-kernel-boot-logs)
- [Warning](#warning)
- [Assumptions](#assumptions)
- [Initial setup](#initial-setup)
- [Requirements to build individual components](#requirements-to-build-individual-components)
- [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
- [Check hardware requirements](#check-hardware-requirements)
- [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
- [Enable full debug](#enable-full-debug)
- [debug logs and shimv2](#debug-logs-and-shimv2)
- [Enabling full `containerd` debug](#enabling-full-containerd-debug)
- [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
- [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
- [journald rate limiting](#journald-rate-limiting)
- [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
- [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
- [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
- [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
- [Get the osbuilder](#get-the-osbuilder)
- [Create a rootfs image](#create-a-rootfs-image)
- [Create a local rootfs](#create-a-local-rootfs)
- [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
- [Build a rootfs image](#build-a-rootfs-image)
- [Install the rootfs image](#install-the-rootfs-image)
- [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
- [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
- [Build an initrd image](#build-an-initrd-image)
- [Install the initrd image](#install-the-initrd-image)
- [Install guest kernel images](#install-guest-kernel-images)
- [Install a hypervisor](#install-a-hypervisor)
- [Build a custom QEMU](#build-a-custom-qemu)
- [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
- [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
- [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
- [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
- [Appendices](#appendices)
- [Checking Docker default runtime](#checking-docker-default-runtime)
- [Set up a debug console](#set-up-a-debug-console)
- [Simple debug console setup](#simple-debug-console-setup)
- [Enable agent debug console](#enable-agent-debug-console)
- [Start `kata-monitor` - ONLY NEEDED FOR 2.0.x](#start-kata-monitor---only-needed-for-20x)
- [Connect to debug console](#connect-to-debug-console)
- [Traditional debug console setup](#traditional-debug-console-setup)
- [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
- [Build the debug image](#build-the-debug-image)
- [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
- [Create a container](#create-a-container)
- [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
- [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
- [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
- [Connecting to the debug console](#connecting-to-the-debug-console)
- [Obtain details of the image](#obtain-details-of-the-image)
- [Capturing kernel boot logs](#capturing-kernel-boot-logs)
# Warning
@@ -304,7 +305,7 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `f
> - You should only do this step if you are testing with the latest version of the agent.
```
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/usr/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
```
@@ -384,31 +385,56 @@ You can build and install the guest kernel image as shown [here](../tools/packag
# Install a hypervisor
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
`QEMU` VMM is installed automatically. Cloud-Hypervisor and Firecracker VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
You may choose to manually build your VMM/hypervisor.
## Build a custom QEMU
Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
Kata Containers makes use of upstream QEMU branch. The exact version
and repository utilized can be found by looking at the [versions file](../versions.yaml).
Find the correct version of QEMU from the versions file:
```
$ go get -d github.com/kata-containers/qemu
$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/kata-containers/versions.yaml | cut -d '"' -f2)
$ cd ${GOPATH}/src/github.com/kata-containers/qemu
$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
$ source ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/lib.sh
$ qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version")
$ echo ${qemu_version}
```
Get source from the matching branch of QEMU:
```
$ go get -d github.com/qemu/qemu
$ cd ${GOPATH}/src/github.com/qemu/qemu
$ git checkout ${qemu_version}
$ your_qemu_directory=${GOPATH}/src/github.com/qemu/qemu
```
To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
There are scripts to manage the build and packaging of QEMU. For the examples below, set your
environment as:
```
$ go get -d github.com/kata-containers/kata-containers
$ packaging_dir="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging"
```
Kata often utilizes patches for not-yet-upstream and/or backported fixes for components,
including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches),
and it's *recommended* that you apply them. For example, suppose that you are going to build QEMU
version 5.2.0, do:
```
$ go get -d github.com/kata-containers/kata-containers/tools/packaging
$ cd $your_qemu_directory
$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
$ $packaging_dir/scripts/apply_patches.sh $packaging_dir/qemu/patches/5.2.x/
```
To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
```
$ cd $your_qemu_directory
$ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
$ eval ./configure "$(cat kata.cfg)"
$ make -j $(nproc)
$ sudo -E make install
```
See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata.
### Build a custom QEMU for aarch64/arm64 - REQUIRED
> **Note:**
>
@@ -476,6 +502,16 @@ debug_console_enabled = true
This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.
#### Start `kata-monitor` - ONLY NEEDED FOR 2.0.x
For Kata Containers `2.0.x` releases, the `kata-runtime exec` command depends on the`kata-monitor` running, in order to get the sandbox's `vsock` address to connect to. Thus, first start the `kata-monitor` process.
```
$ sudo kata-monitor
```
`kata-monitor` will serve at `localhost:8090` by default.
#### Connect to debug console
Command `kata-runtime exec` is used to connect to the debug console.
@@ -613,8 +649,11 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
> **Note** Ports 1024 and 1025 are reserved for communication with the agent
> and gathering of agent logs respectively.
Next, connect to the debug console. The VSOCKS paths vary slightly between
cloud-hypervisor and firecracker.
##### Connecting to the debug console
Next, connect to the debug console. The VSOCKS paths vary slightly between each
VMM solution.
In case of cloud-hypervisor, connect to the `vsock` as shown:
```
$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
@@ -631,6 +670,12 @@ CONNECT 1026
**Note**: You need to press the `RETURN` key to see the shell prompt.
For QEMU, connect to the `vsock` as shown:
```
$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"
```
To disconnect from the virtual machine, type `CONTROL+q` (hold down the
`CONTROL` key and press `q`).

View File

@@ -19,6 +19,8 @@
* [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
* [docker --net=host](#docker---nethost)
* [docker run --link](#docker-run---link)
* [Storage limitations](#storage-limitations)
* [Kubernetes `volumeMounts.subPaths`](#kubernetes-volumemountssubpaths)
* [Host resource sharing](#host-resource-sharing)
* [docker run --privileged](#docker-run---privileged)
* [Miscellaneous](#miscellaneous)
@@ -26,7 +28,7 @@
* [Appendices](#appendices)
* [The constraints challenge](#the-constraints-challenge)
---
***
# Overview
@@ -92,7 +94,9 @@ This section lists items that might be possible to fix.
### checkpoint and restore
The runtime does not provide `checkpoint` and `restore` commands. There
are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
are discussions about using VM save and restore to give us a
`[criu](https://github.com/checkpoint-restore/criu)`-like functionality,
which might provide a solution.
Note that the OCI standard does not specify `checkpoint` and `restore`
commands.
@@ -216,6 +220,17 @@ Equivalent functionality can be achieved with the newer docker networking comman
See more documentation at
[docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).
## Storage limitations
### Kubernetes `volumeMounts.subPaths`
Kubernetes `volumeMount.subPath` is not supported by Kata Containers at the
moment.
See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
## Host resource sharing
### docker run --privileged
@@ -224,7 +239,7 @@ Privileged support in Kata is essentially different from `runc` containers.
Kata does support `docker run --privileged` command, but in this case full access
to the guest VM is provided in addition to some host access.
The container runs with elevated capabilities within the guest and is granted
The container runs with elevated capabilities within the guest and is granted
access to guest devices instead of the host devices.
This is also true with using `securityContext privileged=true` with Kubernetes.

View File

@@ -18,8 +18,7 @@
## Requirements
- [hub](https://github.com/github/hub)
- OBS account with permissions on [`/home:katacontainers`](https://build.opensuse.org/project/subprojects/home:katacontainers)
* Using an [application token](https://github.com/settings/tokens) is required for hub.
- GitHub permissions to push tags and create releases in Kata repositories.
@@ -30,16 +29,12 @@
## Release Process
### Bump all Kata repositories
- We have set up a Jenkins job to bump the version in the `VERSION` file in all Kata repositories. Go to the [Jenkins bump-job page](http://jenkins.katacontainers.io/job/release/build) to trigger a new job.
- Start a new job with variables for the job passed as:
- `BRANCH=<the-branch-you-want-to-bump>`
- `NEW_VERSION=<the-new-kata-version>`
For example, in the case where you want to make a patch release `1.10.2`, the variable `NEW_VERSION` should be `1.10.2` and `BRANCH` should point to `stable-1.10`. In case of an alpha or release candidate release, `BRANCH` should point to `master` branch.
Alternatively, you can also bump the repositories using a script in the Kata packaging repo
Bump the repositories using a script in the Kata packaging repo, where:
- `BRANCH=<the-branch-you-want-to-bump>`
- `NEW_VERSION=<the-new-kata-version>`
```
$ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
$ export NEW_VERSION=<the-new-kata-version>
@@ -47,6 +42,23 @@
$ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
```
### Point tests repository to stable branch
If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then
you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch.
The objective is that changes in the CI on the main branch will not impact the stable branch.
In the test directory, change references the main branch in:
* `README.md`
* `versions.yaml`
* `cmd/github-labels/labels.yaml.in`
* `cmd/pmemctl/pmemctl.sh`
* `.ci/lib.sh`
* `.ci/static-checks.sh`
See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes.
### Merge all bump version Pull requests
- The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
@@ -56,7 +68,7 @@
### Tag all Kata repositories
Once all the pull requests to bump versions in all Kata repositories are merged,
tag all the repositories as shown below.
tag all the repositories as shown below.
```
$ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
$ git checkout <kata-branch-to-release>
@@ -66,7 +78,7 @@
### Check Git-hub Actions
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).

View File

@@ -32,16 +32,16 @@ provides additional information regarding release `99.123.77` in the previous ex
changing the existing behavior*.
- When `MAJOR` increases, the new release adds **new features, bug fixes, or
both** and which *changes the behavior from the previous release* (incompatible with previous releases).
both** and which **changes the behavior from the previous release** (incompatible with previous releases).
A major release will also likely require a change of the container manager version used,
for example Docker\*. Please refer to the release notes for further details.
for example Containerd or CRI-O. Please refer to the release notes for further details.
## Release Strategy
Any new features added since the last release will be available in the next minor
release. These will include bug fixes as well. To facilitate a stable user environment,
Kata provides stable branch-based releases and a master branch release.
Kata provides stable branch-based releases and a main branch release.
## Stable branch patch criteria
@@ -49,9 +49,10 @@ No new features should be introduced to stable branches. This is intended to li
providing only bug and security fixes.
## Branch Management
Kata Containers will maintain two stable release branches in addition to the master branch.
Once a new MAJOR or MINOR release is created from master, a new stable branch is created for
the prior MAJOR or MINOR release and the older stable branch is no longer maintained. End of
Kata Containers will maintain **one** stable release branch, in addition to the main branch, for
each active major release.
Once a new MAJOR or MINOR release is created from main, a new stable branch is created for
the prior MAJOR or MINOR release and the previous stable branch is no longer maintained. End of
maintenance for a branch is announced on the Kata Containers mailing list. Users can determine
the version currently installed by running `kata-runtime kata-env`. It is recommended to use the
latest stable branch available.
@@ -61,59 +62,59 @@ A couple of examples follow to help clarify this process.
### New bug fix introduced
A bug fix is submitted against the runtime which does not introduce new inter-component dependencies.
This fix is applied to both the master and stable branches, and there is no need to create a new
This fix is applied to both the main and stable branches, and there is no need to create a new
stable branch.
| Branch | Original version | New version |
|--|--|--|
| `master` | `1.3.0-rc0` | `1.3.0-rc1` |
| `stable-1.2` | `1.2.0` | `1.2.1` |
| `stable-1.1` | `1.1.2` | `1.1.3` |
| `main` | `2.3.0-rc0` | `2.3.0-rc1` |
| `stable-2.2` | `2.2.0` | `2.2.1` |
| `stable-2.1` | (unmaintained) | (unmaintained) |
### New release made feature or change adding new inter-component dependency
A new feature is introduced, which adds a new inter-component dependency. In this case a new stable
branch is created (stable-1.3) starting from master and the older stable branch (stable-1.1)
branch is created (stable-2.3) starting from main and the previous stable branch (stable-2.2)
is dropped from maintenance.
| Branch | Original version | New version |
|--|--|--|
| `master` | `1.3.0-rc1` | `1.3.0` |
| `stable-1.3` | N/A| `1.3.0` |
| `stable-1.2` | `1.2.1` | `1.2.2` |
| `stable-1.1` | `1.1.3` | (unmaintained) |
| `main` | `2.3.0-rc1` | `2.3.0` |
| `stable-2.3` | N/A| `2.3.0` |
| `stable-2.2` | `2.2.1` | (unmaintained) |
| `stable-2.1` | (unmaintained) | (unmaintained) |
Note, the stable-1.1 branch will still exist with tag 1.1.3, but under current plans it is
not maintained further. The next tag applied to master will be 1.4.0-alpha0. We would then
Note, the stable-2.2 branch will still exist with tag 2.2.1, but under current plans it is
not maintained further. The next tag applied to main will be 2.4.0-alpha0. We would then
create a couple of alpha releases gathering features targeted for that particular release (in
this case 1.4.0), followed by a release candidate. The release candidate marks a feature freeze.
this case 2.4.0), followed by a release candidate. The release candidate marks a feature freeze.
A new stable branch is created for the release candidate. Only bug fixes and any security issues
are added to the branch going forward until release 1.4.0 is made.
are added to the branch going forward until release 2.4.0 is made.
## Backporting Process
Development that occurs against the master branch and applicable code commits should also be submitted
Development that occurs against the main branch and applicable code commits should also be submitted
against the stable branches. Some guidelines for this process follow::
1. Only bug and security fixes which do not introduce inter-component dependencies are
candidates for stable branches. These PRs should be marked with "bug" in GitHub.
2. Once a PR is created against master which meets requirement of (1), a comparable one
2. Once a PR is created against main which meets requirement of (1), a comparable one
should also be submitted against the stable branches. It is the responsibility of the submitter
to apply their pull request against stable, and it is the responsibility of the
reviewers to help identify stable-candidate pull requests.
## Continuous Integration Testing
The test repository is forked to create stable branches from master. Full CI
runs on each stable and master PR using its respective tests repository branch.
The test repository is forked to create stable branches from main. Full CI
runs on each stable and main PR using its respective tests repository branch.
### An alternative method for CI testing:
Ideally, the continuous integration infrastructure will run the same test suite on both master
Ideally, the continuous integration infrastructure will run the same test suite on both main
and the stable branches. When tests are modified or new feature tests are introduced, explicit
logic should exist within the testing CI to make sure only applicable tests are executed against
stable and master. While this is not in place currently, it should be considered in the long term.
stable and main. While this is not in place currently, it should be considered in the long term.
## Release Management
@@ -121,7 +122,7 @@ stable and master. While this is not in place currently, it should be considered
Releases are made every three weeks, which include a GitHub release as
well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
for the next `MAJOR` or `MINOR` is created from master. If there are no changes across all the repositories, no
for the next `MAJOR` or `MINOR` is created from main. If there are no changes across all the repositories, no
release is created and an announcement is made on the developer mailing list to highlight this.
If a release is being made, each repository is tagged for this release, regardless
of whether changes are introduced. The release schedule can be seen on the
@@ -142,10 +143,10 @@ maturity, we have increased the cadence from six weeks to twelve weeks. The rele
### Compatibility
Kata guarantees compatibility between components that are within one minor release of each other.
This is critical for dependencies which cross between host (runtime, shim, proxy) and
This is critical for dependencies which cross between host (shimv2 runtime) and
the guest (hypervisor, rootfs and agent). For example, consider a cluster with a long-running
deployment, workload-never-dies, all on Kata version 1.1.3 components. If the operator updates
the Kata components to the next new minor release (i.e. 1.2.0), we need to guarantee that the 1.2.0
runtime still communicates with 1.1.3 agent within workload-never-dies.
deployment, workload-never-dies, all on Kata version 2.1.3 components. If the operator updates
the Kata components to the next new minor release (i.e. 2.2.0), we need to guarantee that the 2.2.0
shimv2 runtime still communicates with 2.1.3 agent within workload-never-dies.
Handling live-update is out of the scope of this document. See this [`kata-runtime` issue](https://github.com/kata-containers/runtime/issues/492) for details.

View File

@@ -37,3 +37,4 @@
- [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
- [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
- [How to monitor Kata Containers in K8s](how-to-set-prometheus-in-k8s.md)
- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)

View File

@@ -0,0 +1,32 @@
# How to use memory hotplug feature in Kata Containers on arm64
- [Introduction](#introduction)
- [Install UEFI ROM](#install-uefi-rom)
- [Run for test](#run-for-test)
## Introduction
Memory hotplug is a key feature for containers to allocate memory dynamically in deployment.
As Kata Container bases on VM, this feature needs support both from VMM and guest kernel. Luckily, it has been fully supported for the current default version of QEMU and guest kernel used by Kata on arm64. For other VMMs, e.g, Cloud Hypervisor, the enablement work is on the road. Apart from VMM and guest kernel, memory hotplug also depends on ACPI which depends on firmware either. On x86, you can boot a VM using QEMU with ACPI enabled directly, because it boots up with firmware implicitly. For arm64, however, you need specify firmware explicitly. That is to say, if you are ready to run a normal Kata Container on arm64, what you need extra to do is to install the UEFI ROM before use the memory hotplug feature.
## Install UEFI ROM
We have offered a helper script for you to install the UEFI ROM. If you have installed Kata normally on your host, you just need to run the script as fellows:
```bash
$ pushd $GOPATH/src/github.com/kata-containers/tests
$ sudo .ci/aarch64/install_rom_aarch64.sh
$ popd
```
## Run for test
Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test.
```bash
$ sudo ctr image pull docker.io/library/ubuntu:latest
$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --memory-limit 536870912 --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
```
Compare the results between the two tests. If the latter is 0.5G larger than the former, you have done what you want, and congratulation!

View File

@@ -26,6 +26,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
| `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
| `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process |
## Agent Options
| Key | Value Type | Comments |
@@ -60,7 +61,7 @@ There are several kinds of Kata configurations and they are listed below.
| `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
| `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) |
| `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) |
| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
| `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
| `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory |
| `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
| `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
@@ -95,6 +96,8 @@ There are several kinds of Kata configurations and they are listed below.
In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.
# containerd Configuration
For containerd, annotations specified in the pod spec are passed down to Kata
starting with version `1.3.0` of containerd. Additionally, extra configuration is
needed for containerd, by providing a `pod_annotations` field in the containerd config
@@ -107,11 +110,9 @@ for passing annotations to Kata from containerd:
$ cat /etc/containerd/config
....
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.runc.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata.options]
BinaryName = "/usr/bin/kata-runtime"
....
```
@@ -197,6 +198,7 @@ the configuration entry:
| Key | Config file entry | Comments |
|-------| ----- | ----- |
| `ctlpath` | `valid_ctlpaths` | Valid paths for `acrnctl` binary |
| `entropy_source` | `valid_entropy_sources` | Valid entropy sources, e.g. `/dev/random` |
| `file_mem_backend` | `valid_file_mem_backends` | Valid locations for the file-based memory backend root directory |
| `jailer_path` | `valid_jailer_paths`| Valid paths for the jailer constraining the container VM (Firecracker) |
| `path` | `valid_hypervisor_paths` | Valid hypervisors to run the container VM |

View File

@@ -7,9 +7,10 @@
* [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
* [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
* [Start Kubernetes](#start-kubernetes)
* [Install a Pod Network](#install-a-pod-network)
* [Configure Pod Network](#configure-pod-network)
* [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
* [Create runtime class for Kata Containers](#create-runtime-class-for-kata-containers)
* [Run pod in Kata Containers](#run-pod-in-kata-containers)
* [Delete created pod](#delete-created-pod)
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
@@ -18,9 +19,6 @@ The Kubernetes cluster will use the
[CRI containerd plugin](https://github.com/containerd/cri) and
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
## Requirements
- Kubernetes, Kubelet, `kubeadm`
@@ -125,43 +123,33 @@ $ sudo systemctl daemon-reload
$ sudo -E kubectl get pods
```
## Install a Pod Network
## Configure Pod Network
A pod network plugin is needed to allow pods to communicate with each other.
You can find more about CNI plugins from the [Creating a cluster with `kubeadm`](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions) guide.
- Install the `flannel` plugin by following the
[Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
guide, starting from the **Installing a pod network** section.
- Create a pod network using flannel
> **Note:** There is no known way to determine programmatically the best version (commit) to use.
> See https://github.com/coreos/flannel/issues/995.
By default the CNI plugin binaries is installed under `/opt/cni/bin` (in package `kubernetes-cni`), you only need to create a configuration file for CNI plugin.
```bash
$ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
```
$ sudo -E mkdir -p /etc/cni/net.d
- Wait for the pod network to become available
```bash
# number of seconds to wait for pod network to become available
$ timeout_dns=420
$ while [ "$timeout_dns" -gt 0 ]; do
if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
break
fi
sleep 1s
((timeout_dns--))
done
```
- Check the pod network is running
```bash
$ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
$ sudo -E cat > /etc/cni/net.d/10-mynet.conf <<EOF
{
"cniVersion": "0.2.0",
"name": "mynet",
"type": "bridge",
"bridge": "cni0",
"isGateway": true,
"ipMasq": true,
"ipam": {
"type": "host-local",
"subnet": "172.19.0.0/24",
"routes": [
{ "dst": "0.0.0.0/0" }
]
}
}
EOF
```
## Allow pods to run in the master node
@@ -172,24 +160,38 @@ By default, the cluster will not schedule pods in the master node. To enable mas
$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
```
## Create an untrusted pod using Kata Containers
## Create runtime class for Kata Containers
By default, all pods are created with the default runtime configured in CRI containerd plugin.
From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods.
If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
```bash
$ cat > runtime.yaml <<EOF
apiVersion: node.k8s.io/v1beta1
kind: RuntimeClass
metadata:
name: kata
handler: kata
EOF
$ sudo -E kubectl apply -f runtime.yaml
```
## Run pod in Kata Containers
If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the
[Kata Containers runtime](../../src/runtime/README.md).
- Create an untrusted pod configuration
- Create an pod configuration that using Kata Containers runtime
```bash
$ cat << EOT | tee nginx-untrusted.yaml
$ cat << EOT | tee nginx-kata.yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx-untrusted
annotations:
io.kubernetes.cri.untrusted-workload: "true"
name: nginx-kata
spec:
runtimeClassName: kata
containers:
- name: nginx
image: nginx
@@ -197,9 +199,9 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
EOT
```
- Create an untrusted pod
- Create the pod
```bash
$ sudo -E kubectl apply -f nginx-untrusted.yaml
$ sudo -E kubectl apply -f nginx-kata.yaml
```
- Check pod is running
@@ -216,5 +218,5 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
## Delete created pod
```bash
$ sudo -E kubectl delete -f nginx-untrusted.yaml
$ sudo -E kubectl delete -f nginx-kata.yaml
```

View File

@@ -13,26 +13,23 @@ Kata Containers with `virtio-mem` supports memory resize.
## Requisites
Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
The Linux kernel and QEMU upstream version still not support `virtio-mem`. @davidhildenbrand is working on them.
Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
Kata Containers just supports `virtio-mem` with QEMU.
Install and setup Kata Containers as shown [here](../install/README.md).
The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem. Its base version of QEMU cannot work with Kata Containers. So merge the commit of `virtio-mem` to upstream QEMU.)
Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
```toml
[hypervisor.qemu]
path = "qemu-dir"
kernel = "vmlinux-dir"
### With x86_64
The `virtio-mem` config of the x86_64 Kata Linux kernel is open.
Enable `virtio-mem` as follows:
```
$ sudo sed -i -e 's/^#enable_virtio_mem.*$/enable_virtio_mem = true/g' /etc/kata-containers/configuration.toml
```
Enable `virtio-mem` with following line in the Kata Containers configuration:
```toml
enable_virtio_mem = true
### With other architectures
The `virtio-mem` config of the others Kata Linux kernel is not open.
You can open `virtio-mem` config as follows:
```
CONFIG_VIRTIO_MEM=y
```
Then you can build and install the guest kernel image as shown [here](../../tools/packaging/kernel/README.md#build-kata-containers-kernel).
## Run a Kata Container utilizing `virtio-mem`
@@ -41,13 +38,35 @@ Use following command to enable memory overcommitment of a Linux kernel. Becaus
$ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
```
Use following command start a Kata Container.
Use following command to start a Kata Container.
```
$ docker run --rm -it --runtime=kata --name test busybox
$ pod_yaml=pod.yaml
$ container_yaml=${REPORT_DIR}/container.yaml
$ image="quay.io/prometheus/busybox:latest"
$ cat << EOF > "${pod_yaml}"
metadata:
name: busybox-sandbox1
EOF
$ cat << EOF > "${container_yaml}"
metadata:
name: busybox-killed-vmm
image:
image: "$image"
command:
- top
EOF
$ sudo crictl pull $image
$ podid=$(sudo crictl runp $pod_yaml)
$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
$ sudo crictl start $cid
```
Use following command set the memory size of test to default_memory + 512m.
Use the following command to set the container memory limit to 2g and the memory size of the VM to its default_memory + 2g.
```
$ docker update -m 512m --memory-swap -1 test
$ sudo crictl update --memory $((2*1024*1024*1024)) $cid
```
Use the following command to set the container memory limit to 1g and the memory size of the VM to its default_memory + 1g.
```
$ sudo crictl update --memory $((1*1024*1024*1024)) $cid
```

View File

@@ -171,10 +171,10 @@ $ sudo systemctl daemon-reload
$ sudo systemctl restart kubelet
# If using CRI-O
$ sudo kubeadm init --skip-preflight-checks --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
# If using CRI-containerd
$ sudo kubeadm init --skip-preflight-checks --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
$ export KUBECONFIG=/etc/kubernetes/admin.conf
```

View File

@@ -50,7 +50,7 @@ Kata packages are provided by official distribution repositories for:
| Distribution (link to installation guide) | Minimum versions |
|----------------------------------------------------------|--------------------------------------------------------------------------------|
| [CentOS](centos-installation-guide.md) | 8 |
| [Fedora](fedora-installation-guide.md) | 32, Rawhide |
| [Fedora](fedora-installation-guide.md) | 34 |
> **Note::**
>

View File

@@ -3,15 +3,9 @@
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf install -y centos-release-advanced-virtualization
$ sudo -E dnf module disable -y virt:rhel
$ source /etc/os-release
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/advanced-virt.repo
[advanced-virt]
name=Advanced Virtualization
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/advanced-virtualization
enabled=1
gpgcheck=1
skip_if_unavailable=1
EOF
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
[kata-containers]
name=Kata Containers
@@ -20,8 +14,7 @@
gpgcheck=1
skip_if_unavailable=1
EOF
$ sudo -E dnf module disable -y virt:rhel
$ sudo -E dnf install -y kata-runtime
$ sudo -E dnf install -y kata-containers
```
2. Decide which container manager to use and select the corresponding link that follows:

View File

@@ -3,7 +3,7 @@
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf -y install kata-runtime
$ sudo -E dnf -y install kata-containers
```
2. Decide which container manager to use and select the corresponding link that follows:

View File

@@ -2,9 +2,6 @@
* [Install Kata Containers](#install-kata-containers)
* [Configure Kata Containers](#configure-kata-containers)
* [Integration with non-compatible shim v2 Container Engines](#integration-with-non-compatible-shim-v2-container-engines)
* [Integration with Docker](#integration-with-docker)
* [Integration with Podman](#integration-with-podman)
* [Integration with shim v2 Container Engines](#integration-with-shim-v2-container-engines)
* [Remove Kata Containers snap package](#remove-kata-containers-snap-package)
@@ -14,23 +11,10 @@
Kata Containers can be installed in any Linux distribution that supports
[snapd](https://docs.snapcraft.io/installing-snapd).
> NOTE: From Kata Containers 2.x, only the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
> is supported, note that some container engines (`docker`, `podman`, etc) may not
> be able to run Kata Containers 2.x.
Kata Containers 1.x is released through the *stable* channel while Kata Containers
2.x is available in the *candidate* channel.
Run the following command to install **Kata Containers 1.x**:
Run the following command to install **Kata Containers**:
```sh
$ sudo snap install kata-containers --classic
```
Run the following command to install **Kata Containers 2.x**:
```sh
$ sudo snap install kata-containers --candidate --classic
$ sudo snap install kata-containers --stable --classic
```
## Configure Kata Containers
@@ -46,55 +30,6 @@ $ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/confi
$ $EDITOR /etc/kata-containers/configuration.toml
```
## Integration with non-compatible shim v2 Container Engines
At the time of writing this document, `docker` and `podman` **do not support Kata
Containers 2.x, therefore Kata Containers 1.x must be used instead.**
The path to the runtime provided by the Kata Containers 1.x snap package is
`/snap/bin/kata-containers.runtime`, it should be used to run Kata Containers 1.x.
### Integration with Docker
`/etc/docker/daemon.json` is the configuration file for `docker`, use the
following configuration to add a new runtime (`kata`) to `docker`.
```json
{
"runtimes": {
"kata": {
"path": "/snap/bin/kata-containers.runtime"
}
}
}
```
Once the above configuration has been applied, use the
following commands to restart `docker` and run Kata Containers 1.x.
```sh
$ sudo systemctl restart docker
$ docker run -ti --runtime kata busybox sh
```
### Integration with Podman
`/usr/share/containers/containers.conf` is the configuration file for `podman`,
add the following configuration in the `[engine.runtimes]` section.
```toml
kata = [
"/snap/bin/kata-containers.runtime"
]
```
Once the above configuration has been applied, use the following command to run
Kata Containers 1.x with `podman`
```sh
$ sudo podman run -ti --runtime kata docker.io/library/busybox sh
```
## Integration with shim v2 Container Engines
The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the

View File

@@ -1,15 +0,0 @@
# Install Kata Containers on Ubuntu
1. Install the Kata Containers components with the following commands:
```bash
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
$ sudo -E apt-get update
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -10,9 +10,6 @@ Currently, the instructions are based on the following links:
- https://docs.openstack.org/zun/latest/admin/clear-containers.html
- ../install/ubuntu-installation-guide.md
## Install Git to use with DevStack
```sh
@@ -54,7 +51,7 @@ $ zun delete test
## Install Kata Containers
Follow [these instructions](../install/ubuntu-installation-guide.md)
Follow [these instructions](../install/README.md)
to install the Kata Containers components.
## Update Docker with new Kata Containers runtime

901
src/agent/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -13,8 +13,8 @@ lazy_static = "1.3.0"
ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
protobuf = "=2.14.0"
libc = "0.2.58"
nix = "0.17.0"
prctl = "1.0.0"
nix = "0.21.0"
capctl = "0.2.0"
serde_json = "1.0.39"
scan_fmt = "0.2.3"
scopeguard = "1.0.0"
@@ -24,8 +24,11 @@ async-trait = "0.1.42"
tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
futures = "0.3.12"
netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
tokio-vsock = "0.3.0"
rtnetlink = "0.7.0"
tokio-vsock = "0.3.1"
# Because the author has no time to maintain the crate, we switch the dependency to github,
# Once the new version released on crates.io, we switch it back.
# https://github.com/little-dude/netlink/issues/161
rtnetlink = { git = "https://github.com/little-dude/netlink", rev = "a9367bc4700496ddebc088110c28f40962923326" }
netlink-packet-utils = "0.4.0"
ipnetwork = "0.17.0"
@@ -40,12 +43,20 @@ slog-scope = "4.1.2"
slog-stdlog = "4.0.0"
log = "0.4.11"
# for testing
tempfile = "3.1.0"
prometheus = { version = "0.9.0", features = ["process"] }
procfs = "0.7.9"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.2.2" }
cgroups = { package = "cgroups-rs", version = "0.2.5" }
# Tracing
tracing = "0.1.26"
tracing-subscriber = "0.2.18"
tracing-opentelemetry = "0.13.0"
opentelemetry = "0.14.0"
vsock-exporter = { path = "vsock-exporter" }
[dev-dependencies]
tempfile = "3.1.0"
[workspace]
members = [

View File

@@ -1 +0,0 @@
2.0.0

1
src/agent/VERSION Symbolic link
View File

@@ -0,0 +1 @@
../../VERSION

View File

@@ -15,7 +15,7 @@ Wants=kata-containers.target
StandardOutput=tty
Type=simple
ExecStart=@BINDIR@/@AGENT_NAME@
LimitNOFILE=infinity
LimitNOFILE=1048576
# ExecStop is required for static agent tracing; in all other scenarios
# the runtime handles shutting down the VM.
ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff

View File

@@ -8,7 +8,7 @@ extern crate serde;
extern crate serde_derive;
extern crate serde_json;
use libc::mode_t;
use libc::{self, mode_t};
use std::collections::HashMap;
mod serialize;
@@ -27,6 +27,10 @@ where
*d == T::default()
}
fn default_seccomp_errno() -> u32 {
libc::EPERM as u32
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct Spec {
#[serde(
@@ -54,7 +58,7 @@ pub struct Spec {
#[serde(skip_serializing_if = "Option::is_none")]
pub windows: Option<Windows<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub vm: Option<VM>,
pub vm: Option<Vm>,
}
impl Spec {
@@ -67,7 +71,7 @@ impl Spec {
}
}
pub type LinuxRlimit = POSIXRlimit;
pub type LinuxRlimit = PosixRlimit;
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct Process {
@@ -89,7 +93,7 @@ pub struct Process {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub capabilities: Option<LinuxCapabilities>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub rlimits: Vec<POSIXRlimit>,
pub rlimits: Vec<PosixRlimit>,
#[serde(default, rename = "noNewPrivileges")]
pub no_new_privileges: bool,
#[serde(
@@ -195,9 +199,9 @@ pub struct Hooks {
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct Linux {
#[serde(default, rename = "uidMappings", skip_serializing_if = "Vec::is_empty")]
pub uid_mappings: Vec<LinuxIDMapping>,
pub uid_mappings: Vec<LinuxIdMapping>,
#[serde(default, rename = "gidMappings", skip_serializing_if = "Vec::is_empty")]
pub gid_mappings: Vec<LinuxIDMapping>,
pub gid_mappings: Vec<LinuxIdMapping>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub sysctl: HashMap<String, String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -257,7 +261,7 @@ pub const UTSNAMESPACE: &str = "uts";
pub const CGROUPNAMESPACE: &str = "cgroup";
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct LinuxIDMapping {
pub struct LinuxIdMapping {
#[serde(default, rename = "containerID")]
pub container_id: u32,
#[serde(default, rename = "hostID")]
@@ -267,7 +271,7 @@ pub struct LinuxIDMapping {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct POSIXRlimit {
pub struct PosixRlimit {
#[serde(default)]
pub r#type: String,
#[serde(default)]
@@ -293,7 +297,7 @@ pub struct LinuxInterfacePriority {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct LinuxBlockIODevice {
pub struct LinuxBlockIoDevice {
#[serde(default)]
pub major: i64,
#[serde(default)]
@@ -303,7 +307,7 @@ pub struct LinuxBlockIODevice {
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct LinuxWeightDevice {
#[serde(flatten)]
pub blk: LinuxBlockIODevice,
pub blk: LinuxBlockIoDevice,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub weight: Option<u16>,
#[serde(
@@ -317,13 +321,13 @@ pub struct LinuxWeightDevice {
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct LinuxThrottleDevice {
#[serde(flatten)]
pub blk: LinuxBlockIODevice,
pub blk: LinuxBlockIoDevice,
#[serde(default)]
pub rate: u64,
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct LinuxBlockIO {
pub struct LinuxBlockIo {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub weight: Option<u16>,
#[serde(
@@ -387,7 +391,7 @@ pub struct LinuxMemory {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct LinuxCPU {
pub struct LinuxCpu {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub shares: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -449,11 +453,11 @@ pub struct LinuxResources {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub memory: Option<LinuxMemory>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cpu: Option<LinuxCPU>,
pub cpu: Option<LinuxCpu>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pids: Option<LinuxPids>,
#[serde(skip_serializing_if = "Option::is_none", rename = "blockIO")]
pub block_io: Option<LinuxBlockIO>,
pub block_io: Option<LinuxBlockIo>,
#[serde(
default,
skip_serializing_if = "Vec::is_empty",
@@ -513,7 +517,7 @@ pub struct Solaris {
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub anet: Vec<SolarisAnet>,
#[serde(default, skip_serializing_if = "Option::is_none", rename = "cappedCPU")]
pub capped_cpu: Option<SolarisCappedCPU>,
pub capped_cpu: Option<SolarisCappedCpu>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
@@ -523,7 +527,7 @@ pub struct Solaris {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct SolarisCappedCPU {
pub struct SolarisCappedCpu {
#[serde(default, skip_serializing_if = "String::is_empty")]
pub ncpus: String,
}
@@ -601,7 +605,7 @@ pub struct WindowsResources {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub memory: Option<WindowsMemoryResources>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cpu: Option<WindowsCPUResources>,
pub cpu: Option<WindowsCpuResources>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub storage: Option<WindowsStorageResources>,
}
@@ -613,7 +617,7 @@ pub struct WindowsMemoryResources {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct WindowsCPUResources {
pub struct WindowsCpuResources {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub count: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -671,14 +675,14 @@ pub struct WindowsHyperV {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct VM {
pub hypervisor: VMHypervisor,
pub kernel: VMKernel,
pub image: VMImage,
pub struct Vm {
pub hypervisor: VmHypervisor,
pub kernel: VmKernel,
pub image: VmImage,
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct VMHypervisor {
pub struct VmHypervisor {
#[serde(default)]
pub path: String,
#[serde(default, skip_serializing_if = "String::is_empty")]
@@ -686,7 +690,7 @@ pub struct VMHypervisor {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct VMKernel {
pub struct VmKernel {
#[serde(default)]
pub path: String,
#[serde(default, skip_serializing_if = "String::is_empty")]
@@ -696,7 +700,7 @@ pub struct VMKernel {
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
pub struct VMImage {
pub struct VmImage {
#[serde(default)]
pub path: String,
#[serde(default)]
@@ -710,6 +714,8 @@ pub struct LinuxSeccomp {
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub architectures: Vec<Arch>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub flags: Vec<LinuxSeccompFlag>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub syscalls: Vec<LinuxSyscall>,
}
@@ -733,14 +739,20 @@ pub const ARCHS390: &str = "SCMP_ARCH_S390";
pub const ARCHS390X: &str = "SCMP_ARCH_S390X";
pub const ARCHPARISC: &str = "SCMP_ARCH_PARISC";
pub const ARCHPARISC64: &str = "SCMP_ARCH_PARISC64";
pub const ARCHRISCV64: &str = "SCMP_ARCH_RISCV64";
pub type LinuxSeccompFlag = String;
pub type LinuxSeccompAction = String;
pub const ACTKILL: &str = "SCMP_ACT_KILL";
pub const ACTKILLPROCESS: &str = "SCMP_ACT_KILL_PROCESS";
pub const ACTKILLTHREAD: &str = "SCMP_ACT_KILL_THREAD";
pub const ACTTRAP: &str = "SCMP_ACT_TRAP";
pub const ACTERRNO: &str = "SCMP_ACT_ERRNO";
pub const ACTTRACE: &str = "SCMP_ACT_TRACE";
pub const ACTALLOW: &str = "SCMP_ACT_ALLOW";
pub const ACTLOG: &str = "SCMP_ACT_LOG";
pub type LinuxSeccompOperator = String;
@@ -770,6 +782,8 @@ pub struct LinuxSyscall {
pub names: Vec<String>,
#[serde(default, skip_serializing_if = "String::is_empty")]
pub action: LinuxSeccompAction,
#[serde(default = "default_seccomp_errno", rename = "errnoRet")]
pub errno_ret: u32,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub args: Vec<LinuxSeccompArg>,
}
@@ -787,11 +801,11 @@ pub struct LinuxIntelRdt {
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum ContainerState {
CREATING,
CREATED,
RUNNING,
STOPPED,
PAUSED,
Creating,
Created,
Running,
Stopped,
Paused,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
@@ -832,7 +846,7 @@ mod tests {
let expected = State {
version: "0.2.0".to_string(),
id: "oci-container1".to_string(),
status: ContainerState::RUNNING,
status: ContainerState::Running,
pid: 4422,
bundle: "/containers/redis".to_string(),
annotations: [("myKey".to_string(), "myValue".to_string())]
@@ -1257,12 +1271,12 @@ mod tests {
ambient: vec!["CAP_NET_BIND_SERVICE".to_string()],
}),
rlimits: vec![
crate::POSIXRlimit {
crate::PosixRlimit {
r#type: "RLIMIT_CORE".to_string(),
hard: 1024,
soft: 1024,
},
crate::POSIXRlimit {
crate::PosixRlimit {
r#type: "RLIMIT_NOFILE".to_string(),
hard: 1024,
soft: 1024,
@@ -1394,12 +1408,12 @@ mod tests {
.cloned()
.collect(),
linux: Some(crate::Linux {
uid_mappings: vec![crate::LinuxIDMapping {
uid_mappings: vec![crate::LinuxIdMapping {
container_id: 0,
host_id: 1000,
size: 32000,
}],
gid_mappings: vec![crate::LinuxIDMapping {
gid_mappings: vec![crate::LinuxIdMapping {
container_id: 0,
host_id: 1000,
size: 32000,
@@ -1444,7 +1458,7 @@ mod tests {
swappiness: Some(0),
disable_oom_killer: Some(false),
}),
cpu: Some(crate::LinuxCPU {
cpu: Some(crate::LinuxCpu {
shares: Some(1024),
quota: Some(1000000),
period: Some(500000),
@@ -1454,17 +1468,17 @@ mod tests {
mems: "0-7".to_string(),
}),
pids: Some(crate::LinuxPids { limit: 32771 }),
block_io: Some(crate::LinuxBlockIO {
block_io: Some(crate::LinuxBlockIo {
weight: Some(10),
leaf_weight: Some(10),
weight_device: vec![
crate::LinuxWeightDevice {
blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
weight: Some(500),
leaf_weight: Some(300),
},
crate::LinuxWeightDevice {
blk: crate::LinuxBlockIODevice {
blk: crate::LinuxBlockIoDevice {
major: 8,
minor: 16,
},
@@ -1473,13 +1487,13 @@ mod tests {
},
],
throttle_read_bps_device: vec![crate::LinuxThrottleDevice {
blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
rate: 600,
}],
throttle_write_bps_device: vec![],
throttle_read_iops_device: vec![],
throttle_write_iops_device: vec![crate::LinuxThrottleDevice {
blk: crate::LinuxBlockIODevice {
blk: crate::LinuxBlockIoDevice {
major: 8,
minor: 16,
},
@@ -1565,9 +1579,11 @@ mod tests {
seccomp: Some(crate::LinuxSeccomp {
default_action: "SCMP_ACT_ALLOW".to_string(),
architectures: vec!["SCMP_ARCH_X86".to_string(), "SCMP_ARCH_X32".to_string()],
flags: vec![],
syscalls: vec![crate::LinuxSyscall {
names: vec!["getcwd".to_string(), "chmod".to_string()],
action: "SCMP_ACT_ERRNO".to_string(),
errno_ret: crate::default_seccomp_errno(),
args: vec![],
}],
}),

View File

@@ -65,7 +65,7 @@ $GOPATH/src/github.com/kata-containers/kata-containers/src/agent/protocols/proto
}
if [ "$(basename $(pwd))" != "agent" ]; then
die "Please go to directory of protocols before execute this shell"
die "Please go to root directory of agent before execute this shell"
fi
# Protocol buffer files required to generate golang/rust bindings.

View File

@@ -32,7 +32,6 @@ service AgentService {
rpc ExecProcess(ExecProcessRequest) returns (google.protobuf.Empty);
rpc SignalProcess(SignalProcessRequest) returns (google.protobuf.Empty);
rpc WaitProcess(WaitProcessRequest) returns (WaitProcessResponse); // wait & reap like waitpid(2)
rpc ListProcesses(ListProcessesRequest) returns (ListProcessesResponse);
rpc UpdateContainer(UpdateContainerRequest) returns (google.protobuf.Empty);
rpc StatsContainer(StatsContainerRequest) returns (StatsContainerResponse);
rpc PauseContainer(PauseContainerRequest) returns (google.protobuf.Empty);
@@ -126,18 +125,6 @@ message WaitProcessResponse {
int32 status = 1;
}
// ListProcessesRequest contains the options used to list running processes inside the container
message ListProcessesRequest {
string container_id = 1;
string format = 2;
repeated string args = 3;
}
// ListProcessesResponse represents the list of running processes inside the container
message ListProcessesResponse {
bytes process_list = 1;
}
message UpdateContainerRequest {
string container_id = 1;
LinuxResources resources = 2;

View File

@@ -441,7 +441,8 @@ message LinuxInterfacePriority {
message LinuxSeccomp {
string DefaultAction = 1;
repeated string Architectures = 2;
repeated LinuxSyscall Syscalls = 3 [(gogoproto.nullable) = false];
repeated string Flags = 3;
repeated LinuxSyscall Syscalls = 4 [(gogoproto.nullable) = false];
}
message LinuxSeccompArg {
@@ -454,7 +455,10 @@ message LinuxSeccompArg {
message LinuxSyscall {
repeated string Names = 1;
string Action = 2;
repeated LinuxSeccompArg Args = 3 [(gogoproto.nullable) = false];
oneof ErrnoRet {
uint32 errnoret = 3;
}
repeated LinuxSeccompArg Args = 4 [(gogoproto.nullable) = false];
}
message LinuxIntelRdt {

View File

@@ -11,9 +11,9 @@ serde_derive = "1.0.91"
oci = { path = "../oci" }
protocols = { path ="../protocols" }
caps = "0.5.0"
nix = "0.17.0"
nix = "0.21.0"
scopeguard = "1.0.0"
prctl = "1.0.0"
capctl = "0.2.0"
lazy_static = "1.3.0"
libc = "0.2.58"
protobuf = "=2.14.0"
@@ -23,8 +23,7 @@ scan_fmt = "0.2"
regex = "1.1"
path-absolutize = "1.2.0"
anyhow = "1.0.32"
cgroups = { package = "cgroups-rs", version = "0.2.1" }
tempfile = "3.1.0"
cgroups = { package = "cgroups-rs", version = "0.2.5" }
rlimit = "0.5.3"
tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
@@ -34,3 +33,4 @@ inotify = "0.9.2"
[dev-dependencies]
serial_test = "0.5.0"
tempfile = "3.1.0"

View File

@@ -24,7 +24,7 @@ use anyhow::{anyhow, Context, Result};
use libc::{self, pid_t};
use nix::errno::Errno;
use oci::{
LinuxBlockIO, LinuxCPU, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
LinuxNetwork, LinuxPids, LinuxResources,
};
@@ -272,7 +272,7 @@ fn set_hugepages_resources(
fn set_block_io_resources(
_cg: &cgroups::Cgroup,
blkio: &LinuxBlockIO,
blkio: &LinuxBlockIo,
res: &mut cgroups::Resources,
) {
info!(sl!(), "cgroup manager set block io");
@@ -302,7 +302,7 @@ fn set_block_io_resources(
build_blk_io_device_throttle_resource(&blkio.throttle_write_iops_device);
}
fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCpu) -> Result<()> {
info!(sl!(), "cgroup manager set cpu");
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
@@ -349,14 +349,34 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
mem_controller.set_kmem_limit(-1)?;
}
set_resource!(mem_controller, set_limit, memory, limit);
set_resource!(mem_controller, set_soft_limit, memory, reservation);
set_resource!(mem_controller, set_kmem_limit, memory, kernel);
set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
let mut swap = memory.swap.unwrap_or(0);
if memory.limit == Some(-1) {
swap = -1;
}
if let Some(swap) = memory.swap {
// set memory swap
let swap = if cg.v2() {
if memory.limit.is_some() && swap != 0 {
let memstat = get_memory_stats(cg)
.into_option()
.ok_or_else(|| anyhow!("failed to get the cgroup memory stats"))?;
let memusage = memstat.get_usage();
// When update memory limit, the kernel would check the current memory limit
// set against the new swap setting, if the current memory limit is large than
// the new swap, then set limit first, otherwise the kernel would complain and
// refused to set; on the other hand, if the current memory limit is smaller than
// the new swap, then we should set the swap first and then set the memor limit.
if swap == -1 || memusage.get_limit() < swap as u64 {
mem_controller.set_memswap_limit(swap)?;
set_resource!(mem_controller, set_limit, memory, limit);
} else {
set_resource!(mem_controller, set_limit, memory, limit);
mem_controller.set_memswap_limit(swap)?;
}
} else {
set_resource!(mem_controller, set_limit, memory, limit);
swap = if cg.v2() {
convert_memory_swap_to_v2_value(swap, memory.limit.unwrap_or(0))?
} else {
swap
@@ -366,6 +386,10 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
}
}
set_resource!(mem_controller, set_soft_limit, memory, reservation);
set_resource!(mem_controller, set_kmem_limit, memory, kernel);
set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
if let Some(swappiness) = memory.swappiness {
if (0..=100).contains(&swappiness) {
mem_controller.set_swappiness(swappiness as u64)?;
@@ -489,63 +513,61 @@ lazy_static! {
};
pub static ref DEFAULT_ALLOWED_DEVICES: Vec<LinuxDeviceCgroup> = {
let mut v = Vec::new();
vec![
// all mknod to all char devices
LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(WILDCARD),
minor: Some(WILDCARD),
access: "m".to_string(),
},
// all mknod to all char devices
v.push(LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(WILDCARD),
minor: Some(WILDCARD),
access: "m".to_string(),
});
// all mknod to all block devices
LinuxDeviceCgroup {
allow: true,
r#type: "b".to_string(),
major: Some(WILDCARD),
minor: Some(WILDCARD),
access: "m".to_string(),
},
// all mknod to all block devices
v.push(LinuxDeviceCgroup {
allow: true,
r#type: "b".to_string(),
major: Some(WILDCARD),
minor: Some(WILDCARD),
access: "m".to_string(),
});
// all read/write/mknod to char device /dev/console
LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(5),
minor: Some(1),
access: "rwm".to_string(),
},
// all read/write/mknod to char device /dev/console
v.push(LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(5),
minor: Some(1),
access: "rwm".to_string(),
});
// all read/write/mknod to char device /dev/pts/<N>
LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(136),
minor: Some(WILDCARD),
access: "rwm".to_string(),
},
// all read/write/mknod to char device /dev/pts/<N>
v.push(LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(136),
minor: Some(WILDCARD),
access: "rwm".to_string(),
});
// all read/write/mknod to char device /dev/ptmx
LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(5),
minor: Some(2),
access: "rwm".to_string(),
},
// all read/write/mknod to char device /dev/ptmx
v.push(LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(5),
minor: Some(2),
access: "rwm".to_string(),
});
// all read/write/mknod to char device /dev/net/tun
v.push(LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(10),
minor: Some(200),
access: "rwm".to_string(),
});
v
// all read/write/mknod to char device /dev/net/tun
LinuxDeviceCgroup {
allow: true,
r#type: "c".to_string(),
major: Some(10),
minor: Some(200),
access: "rwm".to_string(),
},
]
};
}

View File

@@ -8,7 +8,7 @@ use eventfd::{eventfd, EfdFlags};
use nix::sys::eventfd;
use std::fs::{self, File};
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::path::{Path, PathBuf};
use std::path::Path;
use crate::pipestream::PipeStream;
use futures::StreamExt as _;
@@ -35,7 +35,7 @@ pub async fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
// Flat keyed file format:
// KEY0 VAL0\n
// KEY1 VAL1\n
fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {
fn get_value_from_cgroup(path: &Path, key: &str) -> Result<i64> {
let content = fs::read_to_string(path)?;
info!(
sl!(),
@@ -117,12 +117,12 @@ async fn register_memory_event_v2(
return;
}
}
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if !Path::new(&event_control_path).exists() {
return;
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if !Path::new(&event_control_path).exists() {
return;
}
}
});

View File

@@ -1,56 +0,0 @@
// Copyright (c) 2019 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//
use libc::*;
use serde;
#[macro_use]
use serde_derive;
use serde_json;
#[derive(Serialize, Deserialize, Debug)]
pub struct Device {
#[serde(default)]
r#type: char,
#[serde(default)]
path: String,
#[serde(default)]
major: i64,
#[serde(default)]
minor: i64,
#[serde(default)]
permissions: String,
#[serde(default)]
file_mode: mode_t,
#[serde(default)]
uid: i32,
#[serde(default)]
gid: i32,
#[serde(default)]
allow: bool,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct BlockIODevice {
#[serde(default)]
major: i64,
#[serde(default)]
minor: i64,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct WeightDevice {
block: BlockIODevice,
#[serde(default)]
weight: u16,
#[serde(default, rename = "leafWeight")]
leaf_weight: u16,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct ThrottleDevice {
block: BlockIODevice,
#[serde(default)]
rate: u64,
}

View File

@@ -1,368 +0,0 @@
// Copyright (c) 2019 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//
use serde;
#[macro_use]
use serde_derive;
use serde_json;
use protocols::oci::State as OCIState;
use std::collections::HashMap;
use std::fmt;
use std::path::PathBuf;
use std::time::Duration;
use nix::unistd;
use self::device::{Device, ThrottleDevice, WeightDevice};
use self::namespaces::Namespaces;
use crate::specconv::CreateOpts;
pub mod device;
pub mod namespaces;
pub mod validator;
#[derive(Serialize, Deserialize, Debug)]
pub struct Rlimit {
#[serde(default)]
r#type: i32,
#[serde(default)]
hard: i32,
#[serde(default)]
soft: i32,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct IDMap {
#[serde(default)]
container_id: i32,
#[serde(default)]
host_id: i32,
#[serde(default)]
size: i32,
}
type Action = i32;
#[derive(Serialize, Deserialize, Debug)]
pub struct Seccomp {
#[serde(default)]
default_action: Action,
#[serde(default)]
architectures: Vec<String>,
#[serde(default)]
syscalls: Vec<Syscall>,
}
type Operator = i32;
#[derive(Serialize, Deserialize, Debug)]
pub struct Arg {
#[serde(default)]
index: u32,
#[serde(default)]
value: u64,
#[serde(default)]
value_two: u64,
#[serde(default)]
op: Operator,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Syscall {
#[serde(default, skip_serializing_if = "String::is_empty")]
name: String,
#[serde(default)]
action: Action,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
args: Vec<Arg>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Config<'a> {
#[serde(default)]
no_pivot_root: bool,
#[serde(default)]
parent_death_signal: i32,
#[serde(default)]
rootfs: String,
#[serde(default)]
readonlyfs: bool,
#[serde(default, rename = "rootPropagation")]
root_propagation: i32,
#[serde(default)]
mounts: Vec<Mount>,
#[serde(default)]
devices: Vec<Device>,
#[serde(default)]
mount_label: String,
#[serde(default)]
hostname: String,
#[serde(default)]
namespaces: Namespaces,
#[serde(default)]
capabilities: Option<Capabilities>,
#[serde(default)]
networks: Vec<Network>,
#[serde(default)]
routes: Vec<Route>,
#[serde(default)]
cgroups: Option<Cgroup<'a>>,
#[serde(default, skip_serializing_if = "String::is_empty")]
apparmor_profile: String,
#[serde(default, skip_serializing_if = "String::is_empty")]
process_label: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
rlimits: Vec<Rlimit>,
#[serde(default)]
oom_score_adj: Option<i32>,
#[serde(default)]
uid_mappings: Vec<IDMap>,
#[serde(default)]
gid_mappings: Vec<IDMap>,
#[serde(default)]
mask_paths: Vec<String>,
#[serde(default)]
readonly_paths: Vec<String>,
#[serde(default)]
sysctl: HashMap<String, String>,
#[serde(default)]
seccomp: Option<Seccomp>,
#[serde(default)]
no_new_privileges: bool,
hooks: Option<Hooks>,
#[serde(default)]
version: String,
#[serde(default)]
labels: Vec<String>,
#[serde(default)]
no_new_keyring: bool,
#[serde(default)]
intel_rdt: Option<IntelRdt>,
#[serde(default)]
rootless_euid: bool,
#[serde(default)]
rootless_cgroups: bool,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Hooks {
prestart: Vec<Box<Hook>>,
poststart: Vec<Box<Hook>>,
poststop: Vec<Box<Hook>>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Capabilities {
bounding: Vec<String>,
effective: Vec<String>,
inheritable: Vec<String>,
permitted: Vec<String>,
ambient: Vec<String>,
}
pub trait Hook {
fn run(&self, state: &OCIState) -> Result<()>;
}
pub struct FuncHook {
// run: fn(&OCIState) -> Result<()>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Command {
#[serde(default)]
path: String,
#[serde(default)]
args: Vec<String>,
#[serde(default)]
env: Vec<String>,
#[serde(default)]
dir: String,
#[serde(default)]
timeout: Duration,
}
pub struct CommandHook {
command: Command,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Mount {
#[serde(default)]
source: String,
#[serde(default)]
destination: String,
#[serde(default)]
device: String,
#[serde(default)]
flags: i32,
#[serde(default)]
propagation_flags: Vec<i32>,
#[serde(default)]
data: String,
#[serde(default)]
relabel: String,
#[serde(default)]
extensions: i32,
#[serde(default)]
premount_cmds: Vec<Command>,
#[serde(default)]
postmount_cmds: Vec<Command>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct HugepageLimit {
#[serde(default)]
page_size: String,
#[serde(default)]
limit: u64,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct IntelRdt {
#[serde(default, skip_serializing_if = "String::is_empty")]
l3_cache_schema: String,
#[serde(
default,
rename = "memBwSchema",
skip_serializing_if = "String::is_empty"
)]
mem_bw_schema: String,
}
pub type FreezerState = String;
#[derive(Serialize, Deserialize, Debug)]
pub struct Cgroup<'a> {
#[serde(default, skip_serializing_if = "String::is_empty")]
name: String,
#[serde(default, skip_serializing_if = "String::is_empty")]
parent: String,
#[serde(default)]
path: String,
#[serde(default)]
scope_prefix: String,
paths: HashMap<String, String>,
resource: &'a Resources<'a>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Resources<'a> {
#[serde(default)]
allow_all_devices: bool,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
allowed_devices: Vec<&'a Device>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
denied_devices: Vec<&'a Device>,
#[serde(default)]
devices: Vec<&'a Device>,
#[serde(default)]
memory: i64,
#[serde(default)]
memory_reservation: i64,
#[serde(default)]
memory_swap: i64,
#[serde(default)]
kernel_memory: i64,
#[serde(default)]
kernel_memory_tcp: i64,
#[serde(default)]
cpu_shares: u64,
#[serde(default)]
cpu_quota: i64,
#[serde(default)]
cpu_period: u64,
#[serde(default)]
cpu_rt_quota: i64,
#[serde(default)]
cpu_rt_period: u64,
#[serde(default)]
cpuset_cpus: String,
#[serde(default)]
cpuset_mems: String,
#[serde(default)]
pids_limit: i64,
#[serde(default)]
blkio_weight: u64,
#[serde(default)]
blkio_leaf_weight: u64,
#[serde(default)]
blkio_weight_device: Vec<&'a WeightDevice>,
#[serde(default)]
blkio_throttle_read_bps_device: Vec<&'a ThrottleDevice>,
#[serde(default)]
blkio_throttle_write_bps_device: Vec<&'a ThrottleDevice>,
#[serde(default)]
blkio_throttle_read_iops_device: Vec<&'a ThrottleDevice>,
#[serde(default)]
blkio_throttle_write_iops_device: Vec<&'a ThrottleDevice>,
#[serde(default)]
freezer: FreezerState,
#[serde(default)]
hugetlb_limit: Vec<&'a HugepageLimit>,
#[serde(default)]
oom_kill_disable: bool,
#[serde(default)]
memory_swapiness: u64,
#[serde(default)]
net_prio_ifpriomap: Vec<&'a IfPrioMap>,
#[serde(default)]
net_cls_classid_u: u32,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Network {
#[serde(default)]
r#type: String,
#[serde(default)]
name: String,
#[serde(default)]
bridge: String,
#[serde(default)]
mac_address: String,
#[serde(default)]
address: String,
#[serde(default)]
gateway: String,
#[serde(default)]
ipv6_address: String,
#[serde(default)]
ipv6_gateway: String,
#[serde(default)]
mtu: i32,
#[serde(default)]
txqueuelen: i32,
#[serde(default)]
host_interface_name: String,
#[serde(default)]
hairpin_mode: bool,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Route {
#[serde(default)]
destination: String,
#[serde(default)]
source: String,
#[serde(default)]
gateway: String,
#[serde(default)]
interface_name: String,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct IfPrioMap {
#[serde(default)]
interface: String,
#[serde(default)]
priority: i32,
}
impl IfPrioMap {
fn cgroup_string(&self) -> String {
format!("{} {}", self.interface, self.priority)
}
}

View File

@@ -1,46 +0,0 @@
// Copyright (c) 2019 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//
use serde;
#[macro_use]
use serde_derive;
use serde_json;
use std::collections::HashMap;
#[macro_use]
use lazy_static;
pub type NamespaceType = String;
pub type Namespaces = Vec<Namespace>;
#[derive(Serialize, Deserialize, Debug)]
pub struct Namespace {
#[serde(default)]
r#type: NamespaceType,
#[serde(default)]
path: String,
}
pub const NEWNET: &'static str = "NEWNET";
pub const NEWPID: &'static str = "NEWPID";
pub const NEWNS: &'static str = "NEWNS";
pub const NEWUTS: &'static str = "NEWUTS";
pub const NEWUSER: &'static str = "NEWUSER";
pub const NEWCGROUP: &'static str = "NEWCGROUP";
pub const NEWIPC: &'static str = "NEWIPC";
lazy_static! {
static ref TYPETONAME: HashMap<&'static str, &'static str> = {
let mut m = HashMap::new();
m.insert("pid", "pid");
m.insert("network", "net");
m.insert("mount", "mnt");
m.insert("user", "user");
m.insert("uts", "uts");
m.insert("ipc", "ipc");
m.insert("cgroup", "cgroup");
m
};
}

View File

@@ -1,23 +0,0 @@
// Copyright (c) 2019 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::configs::Config;
use std::io::Result;
pub trait Validator {
fn validate(&self, config: &Config) -> Result<()> {
Ok(())
}
}
pub struct ConfigValidator {}
impl Validator for ConfigValidator {}
impl ConfigValidator {
fn new() -> Self {
ConfigValidator {}
}
}

View File

@@ -5,10 +5,10 @@
use anyhow::{anyhow, Context, Result};
use libc::pid_t;
use oci::{ContainerState, LinuxDevice, LinuxIDMapping};
use oci::{ContainerState, LinuxDevice, LinuxIdMapping};
use oci::{Hook, Linux, LinuxNamespace, LinuxResources, Spec};
use std::clone::Clone;
use std::ffi::{CStr, CString};
use std::ffi::CString;
use std::fmt::Display;
use std::fs;
use std::os::unix::io::RawFd;
@@ -48,6 +48,7 @@ use oci::State as OCIState;
use std::collections::HashMap;
use std::os::unix::io::FromRawFd;
use std::str::FromStr;
use std::sync::Arc;
use slog::{info, o, Logger};
@@ -57,6 +58,7 @@ use crate::sync_with_async::{read_async, write_async};
use async_trait::async_trait;
use rlimit::{setrlimit, Resource, Rlim};
use tokio::io::AsyncBufReadExt;
use tokio::sync::Mutex;
use crate::utils;
@@ -83,8 +85,8 @@ pub struct ContainerStatus {
impl ContainerStatus {
fn new() -> Self {
ContainerStatus {
pre_status: ContainerState::CREATED,
cur_status: ContainerState::CREATED,
pre_status: ContainerState::Created,
cur_status: ContainerState::Created,
}
}
@@ -106,6 +108,9 @@ pub type Config = CreateOpts;
type NamespaceType = String;
lazy_static! {
// This locker ensures the child exit signal will be received by the right receiver.
pub static ref WAIT_PID_LOCKER: Arc<Mutex<bool>> = Arc::new(Mutex::new(false));
static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
let mut m = HashMap::new();
m.insert("user", CloneFlags::CLONE_NEWUSER);
@@ -132,62 +137,62 @@ lazy_static! {
};
pub static ref DEFAULT_DEVICES: Vec<LinuxDevice> = {
let mut v = Vec::new();
v.push(LinuxDevice {
path: "/dev/null".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 3,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
v.push(LinuxDevice {
path: "/dev/zero".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 5,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
v.push(LinuxDevice {
path: "/dev/full".to_string(),
r#type: String::from("c"),
major: 1,
minor: 7,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
v.push(LinuxDevice {
path: "/dev/tty".to_string(),
r#type: "c".to_string(),
major: 5,
minor: 0,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
v.push(LinuxDevice {
path: "/dev/urandom".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 9,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
v.push(LinuxDevice {
path: "/dev/random".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 8,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
v
vec![
LinuxDevice {
path: "/dev/null".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 3,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
},
LinuxDevice {
path: "/dev/zero".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 5,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
},
LinuxDevice {
path: "/dev/full".to_string(),
r#type: String::from("c"),
major: 1,
minor: 7,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
},
LinuxDevice {
path: "/dev/tty".to_string(),
r#type: "c".to_string(),
major: 5,
minor: 0,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
},
LinuxDevice {
path: "/dev/urandom".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 9,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
},
LinuxDevice {
path: "/dev/random".to_string(),
r#type: "c".to_string(),
major: 1,
minor: 8,
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
},
]
};
}
@@ -255,7 +260,7 @@ pub struct State {
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct SyncPC {
pub struct SyncPc {
#[serde(default)]
pid: pid_t,
}
@@ -268,7 +273,7 @@ pub trait Container: BaseContainer {
impl Container for LinuxContainer {
fn pause(&mut self) -> Result<()> {
let status = self.status();
if status != ContainerState::RUNNING && status != ContainerState::CREATED {
if status != ContainerState::Running && status != ContainerState::Created {
return Err(anyhow!(
"failed to pause container: current status is: {:?}",
status
@@ -281,7 +286,7 @@ impl Container for LinuxContainer {
.unwrap()
.freeze(FreezerState::Frozen)?;
self.status.transition(ContainerState::PAUSED);
self.status.transition(ContainerState::Paused);
return Ok(());
}
Err(anyhow!("failed to get container's cgroup manager"))
@@ -289,7 +294,7 @@ impl Container for LinuxContainer {
fn resume(&mut self) -> Result<()> {
let status = self.status();
if status != ContainerState::PAUSED {
if status != ContainerState::Paused {
return Err(anyhow!("container status is: {:?}, not paused", status));
}
@@ -299,7 +304,7 @@ impl Container for LinuxContainer {
.unwrap()
.freeze(FreezerState::Thawed)?;
self.status.transition(ContainerState::RUNNING);
self.status.transition(ContainerState::Running);
return Ok(());
}
Err(anyhow!("failed to get container's cgroup manager"))
@@ -341,7 +346,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
Err(_e) => sched::unshare(CloneFlags::CLONE_NEWPID)?,
}
match fork() {
match unsafe { fork() } {
Ok(ForkResult::Parent { child, .. }) => {
log_child!(
cfd_log,
@@ -464,7 +469,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
// Ref: https://github.com/opencontainers/runc/commit/50a19c6ff828c58e5dab13830bd3dacde268afe5
//
if !nses.is_empty() {
prctl::set_dumpable(false)
capctl::prctl::set_dumpable(false)
.map_err(|e| anyhow!(e).context("set process non-dumpable failed"))?;
}
@@ -540,7 +545,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
// notify parent to run prestart hooks
write_sync(cwfd, SYNC_SUCCESS, "")?;
// wait parent run prestart hooks
let _ = read_sync(crfd)?;
read_sync(crfd)?;
}
if mount_fd != -1 {
@@ -597,7 +602,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
// NoNewPeiviledges, Drop capabilities
if oci_process.no_new_privileges {
prctl::set_no_new_privileges(true).map_err(|_| anyhow!("cannot set no new privileges"))?;
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
}
if oci_process.capabilities.is_some() {
@@ -607,8 +612,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
if init {
// notify parent to run poststart hooks
// cfd is closed when return from join_namespaces
// should retunr cfile instead of cfd?
write_sync(cwfd, SYNC_SUCCESS, "")?;
}
@@ -634,12 +637,12 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
env::set_var(v[0], v[1]);
}
// set the "HOME" env getting from "/etc/passwd"
// set the "HOME" env getting from "/etc/passwd", if
// there's no uid entry in /etc/passwd, set "/" as the
// home env.
if env::var_os(HOME_ENV_KEY).is_none() {
match utils::home_dir(guser.uid) {
Ok(home_dir) => env::set_var(HOME_ENV_KEY, home_dir),
Err(e) => log_child!(cfd_log, "failed to get home dir: {:?}", e),
}
let home_dir = utils::home_dir(guser.uid).unwrap_or_else(|_| String::from("/"));
env::set_var(HOME_ENV_KEY, home_dir);
}
let exec_file = Path::new(&args[0]);
@@ -734,7 +737,7 @@ impl BaseContainer for LinuxContainer {
};
let status = self.status();
let pid = if status != ContainerState::STOPPED {
let pid = if status != ContainerState::Stopped {
self.init_process_pid
} else {
0
@@ -817,7 +820,7 @@ impl BaseContainer for LinuxContainer {
if stat::stat(fifo_file.as_str()).is_ok() {
return Err(anyhow!("exec fifo exists"));
}
unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o622).unwrap())?;
unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o644).unwrap())?;
fifofd = fcntl::open(
fifo_file.as_str(),
@@ -908,7 +911,7 @@ impl BaseContainer for LinuxContainer {
child = child.env(PIDNS_FD, format!("{}", pidns.unwrap()));
}
let child = child.spawn()?;
child.spawn()?;
unistd::close(crfd)?;
unistd::close(cwfd)?;
@@ -964,19 +967,6 @@ impl BaseContainer for LinuxContainer {
self.created = SystemTime::now();
// create the pipes for notify process exited
let (exit_pipe_r, exit_pipe_w) = unistd::pipe2(OFlag::O_CLOEXEC)
.context("failed to create pipe")
.map_err(|e| {
let _ = signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL))
.map_err(|e| warn!(logger, "signal::kill creating pipe {:?}", e));
e
})?;
p.exit_pipe_w = Some(exit_pipe_w);
p.exit_pipe_r = Some(exit_pipe_r);
if p.init {
let spec = self.config.spec.as_mut().unwrap();
update_namespaces(&self.logger, spec, p.pid)?;
@@ -997,7 +987,7 @@ impl BaseContainer for LinuxContainer {
if init {
self.exec()?;
self.status.transition(ContainerState::RUNNING);
self.status.transition(ContainerState::Running);
}
Ok(())
@@ -1019,7 +1009,7 @@ impl BaseContainer for LinuxContainer {
}
}
self.status.transition(ContainerState::STOPPED);
self.status.transition(ContainerState::Stopped);
mount::umount2(
spec.root.as_ref().unwrap().path.as_str(),
MntFlags::MNT_DETACH,
@@ -1055,7 +1045,7 @@ impl BaseContainer for LinuxContainer {
.unwrap()
.as_secs();
self.status.transition(ContainerState::RUNNING);
self.status.transition(ContainerState::Running);
unistd::close(fd)?;
Ok(())
@@ -1089,9 +1079,8 @@ fn do_exec(args: &[String]) -> ! {
.iter()
.map(|s| CString::new(s.to_string()).unwrap_or_default())
.collect();
let a: Vec<&CStr> = sa.iter().map(|s| s.as_c_str()).collect();
let _ = unistd::execvp(p.as_c_str(), a.as_slice()).map_err(|e| match e {
let _ = unistd::execvp(p.as_c_str(), &sa).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
@@ -1264,7 +1253,7 @@ async fn join_namespaces(
if p.init {
info!(logger, "notify child parent ready to run prestart hook!");
let _ = read_async(pipe_r).await?;
read_async(pipe_r).await?;
info!(logger, "get ready to run prestart hook!");
@@ -1302,7 +1291,7 @@ async fn join_namespaces(
Ok(())
}
fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIDMapping]) -> Result<()> {
fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIdMapping]) -> Result<()> {
let data = maps
.iter()
.filter(|m| m.size != 0)
@@ -1324,7 +1313,7 @@ fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIDMapping]) -> Resul
fn setid(uid: Uid, gid: Gid) -> Result<()> {
// set uid/gid
prctl::set_keep_capabilities(true)
capctl::prctl::set_keepcaps(true)
.map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;
{
@@ -1338,7 +1327,7 @@ fn setid(uid: Uid, gid: Gid) -> Result<()> {
capabilities::reset_effective()?;
}
prctl::set_keep_capabilities(false)
capctl::prctl::set_keepcaps(false)
.map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;
Ok(())
@@ -1480,6 +1469,8 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
})
.collect();
// Avoid the exit signal to be reaped by the global reaper.
let _wait_locker = WAIT_PID_LOCKER.lock().await;
let mut child = tokio::process::Command::new(path)
.args(args.iter())
.envs(env.iter())
@@ -1528,28 +1519,23 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
match child.wait().await {
Ok(exit) => {
let code = match exit.code() {
Some(c) => c,
None => {
return Err(anyhow!("hook exit status has no status code"));
}
};
let code = exit
.code()
.ok_or_else(|| anyhow!("hook exit status has no status code"))?;
if code == 0 {
debug!(logger, "hook {} exit status is 0", &path);
return Ok(());
} else {
if code != 0 {
error!(logger, "hook {} exit status is {}", &path, code);
return Err(anyhow!(nix::Error::from_errno(Errno::UnknownErrno)));
}
debug!(logger, "hook {} exit status is 0", &path);
Ok(())
}
Err(e) => {
return Err(anyhow!(
"wait child error: {} {}",
e,
e.raw_os_error().unwrap()
));
}
Err(e) => Err(anyhow!(
"wait child error: {} {}",
e,
e.raw_os_error().unwrap()
)),
}
});
@@ -1588,7 +1574,7 @@ mod tests {
&OCIState {
version: "1.2.3".to_string(),
id: "321".to_string(),
status: ContainerState::RUNNING,
status: ContainerState::Running,
pid: 2,
bundle: "".to_string(),
annotations: Default::default(),
@@ -1611,7 +1597,7 @@ mod tests {
&OCIState {
version: "1.2.3".to_string(),
id: "321".to_string(),
status: ContainerState::RUNNING,
status: ContainerState::Running,
pid: 2,
bundle: "".to_string(),
annotations: Default::default(),
@@ -1630,10 +1616,10 @@ mod tests {
fn test_status_transtition() {
let mut status = ContainerStatus::new();
let status_table: [ContainerState; 4] = [
ContainerState::CREATED,
ContainerState::RUNNING,
ContainerState::PAUSED,
ContainerState::STOPPED,
ContainerState::Created,
ContainerState::Running,
ContainerState::Paused,
ContainerState::Stopped,
];
for s in status_table.iter() {
@@ -1770,7 +1756,7 @@ mod tests {
fn test_linuxcontainer_pause_bad_status() {
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
// Change state to pause, c.pause() should fail
c.status.transition(ContainerState::PAUSED);
c.status.transition(ContainerState::Paused);
c.pause().map_err(|e| anyhow!(e))
});
@@ -1802,7 +1788,7 @@ mod tests {
fn test_linuxcontainer_resume_bad_status() {
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
// Change state to created, c.resume() should fail
c.status.transition(ContainerState::CREATED);
c.status.transition(ContainerState::Created);
c.resume().map_err(|e| anyhow!(e))
});
@@ -1813,7 +1799,7 @@ mod tests {
#[test]
fn test_linuxcontainer_resume_cgroupmgr_is_none() {
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
c.status.transition(ContainerState::PAUSED);
c.status.transition(ContainerState::Paused);
c.cgroup_manager = None;
c.resume().map_err(|e| anyhow!(e))
});
@@ -1826,7 +1812,7 @@ mod tests {
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
c.cgroup_manager = FsManager::new("").ok();
// Change status to paused, this way we can resume it
c.status.transition(ContainerState::PAUSED);
c.status.transition(ContainerState::Paused);
c.resume().map_err(|e| anyhow!(e))
});

View File

@@ -23,7 +23,7 @@ extern crate caps;
extern crate protocols;
#[macro_use]
extern crate scopeguard;
extern crate prctl;
extern crate capctl;
#[macro_use]
extern crate lazy_static;
extern crate libc;
@@ -47,35 +47,17 @@ pub mod sync;
pub mod sync_with_async;
pub mod utils;
pub mod validator;
// pub mod factory;
//pub mod configs;
// pub mod devices;
// pub mod init;
// pub mod rootfs;
// pub mod capabilities;
// pub mod console;
// pub mod stats;
// pub mod user;
//pub mod intelrdt;
// construtc ociSpec from grpcSpec, which is needed for hook
// execution. since hooks read config.json
use oci::{
Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities,
Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot,
Spec as ociSpec, User as ociUser,
};
use protocols::oci::{
Hooks as grpcHooks, Linux as grpcLinux, Mount as grpcMount, Process as grpcProcess,
Root as grpcRoot, Spec as grpcSpec,
};
use std::collections::HashMap;
pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
use protocols::oci as grpc;
// construct ociSpec from grpc::Spec, which is needed for hook
// execution. since hooks read config.json
pub fn process_grpc_to_oci(p: &grpc::Process) -> oci::Process {
let console_size = if p.ConsoleSize.is_some() {
let c = p.ConsoleSize.as_ref().unwrap();
Some(ociBox {
Some(oci::Box {
height: c.Height,
width: c.Width,
})
@@ -85,14 +67,14 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
let user = if p.User.is_some() {
let u = p.User.as_ref().unwrap();
ociUser {
oci::User {
uid: u.UID,
gid: u.GID,
additional_gids: u.AdditionalGids.clone(),
username: u.Username.clone(),
}
} else {
ociUser {
oci::User {
uid: 0,
gid: 0,
additional_gids: vec![],
@@ -103,7 +85,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
let capabilities = if p.Capabilities.is_some() {
let cap = p.Capabilities.as_ref().unwrap();
Some(ociLinuxCapabilities {
Some(oci::LinuxCapabilities {
bounding: cap.Bounding.clone().into_vec(),
effective: cap.Effective.clone().into_vec(),
inheritable: cap.Inheritable.clone().into_vec(),
@@ -117,7 +99,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
let rlimits = {
let mut r = Vec::new();
for lm in p.Rlimits.iter() {
r.push(ociPOSIXRlimit {
r.push(oci::PosixRlimit {
r#type: lm.Type.clone(),
hard: lm.Hard,
soft: lm.Soft,
@@ -126,7 +108,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
r
};
ociProcess {
oci::Process {
terminal: p.Terminal,
console_size,
user,
@@ -142,15 +124,15 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
}
}
fn root_grpc_to_oci(root: &grpcRoot) -> ociRoot {
ociRoot {
fn root_grpc_to_oci(root: &grpc::Root) -> oci::Root {
oci::Root {
path: root.Path.clone(),
readonly: root.Readonly,
}
}
fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
ociMount {
fn mount_grpc_to_oci(m: &grpc::Mount) -> oci::Mount {
oci::Mount {
destination: m.destination.clone(),
r#type: m.field_type.clone(),
source: m.source.clone(),
@@ -158,13 +140,12 @@ fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
}
}
use oci::Hook as ociHook;
use protocols::oci::Hook as grpcHook;
fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
let mut r = Vec::new();
for e in h.iter() {
r.push(ociHook {
r.push(oci::Hook {
path: e.Path.clone(),
args: e.Args.clone().into_vec(),
env: e.Env.clone().into_vec(),
@@ -174,39 +155,29 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
r
}
fn hooks_grpc_to_oci(h: &grpcHooks) -> ociHooks {
fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
let prestart = hook_grpc_to_oci(h.Prestart.as_ref());
let poststart = hook_grpc_to_oci(h.Poststart.as_ref());
let poststop = hook_grpc_to_oci(h.Poststop.as_ref());
ociHooks {
oci::Hooks {
prestart,
poststart,
poststop,
}
}
use oci::{
LinuxDevice as ociLinuxDevice, LinuxIDMapping as ociLinuxIDMapping,
LinuxIntelRdt as ociLinuxIntelRdt, LinuxNamespace as ociLinuxNamespace,
LinuxResources as ociLinuxResources, LinuxSeccomp as ociLinuxSeccomp,
};
use protocols::oci::{
LinuxIDMapping as grpcLinuxIDMapping, LinuxResources as grpcLinuxResources,
LinuxSeccomp as grpcLinuxSeccomp,
};
fn idmap_grpc_to_oci(im: &grpcLinuxIDMapping) -> ociLinuxIDMapping {
ociLinuxIDMapping {
fn idmap_grpc_to_oci(im: &grpc::LinuxIDMapping) -> oci::LinuxIdMapping {
oci::LinuxIdMapping {
container_id: im.ContainerID,
host_id: im.HostID,
size: im.Size,
}
}
fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
fn idmaps_grpc_to_oci(ims: &[grpc::LinuxIDMapping]) -> Vec<oci::LinuxIdMapping> {
let mut r = Vec::new();
for im in ims.iter() {
r.push(idmap_grpc_to_oci(im));
@@ -214,24 +185,13 @@ fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
r
}
use oci::{
LinuxBlockIO as ociLinuxBlockIO, LinuxBlockIODevice as ociLinuxBlockIODevice,
LinuxCPU as ociLinuxCPU, LinuxDeviceCgroup as ociLinuxDeviceCgroup,
LinuxHugepageLimit as ociLinuxHugepageLimit,
LinuxInterfacePriority as ociLinuxInterfacePriority, LinuxMemory as ociLinuxMemory,
LinuxNetwork as ociLinuxNetwork, LinuxPids as ociLinuxPids,
LinuxThrottleDevice as ociLinuxThrottleDevice, LinuxWeightDevice as ociLinuxWeightDevice,
};
use protocols::oci::{
LinuxBlockIO as grpcLinuxBlockIO, LinuxThrottleDevice as grpcLinuxThrottleDevice,
LinuxWeightDevice as grpcLinuxWeightDevice,
};
fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinuxThrottleDevice> {
fn throttle_devices_grpc_to_oci(
tds: &[grpc::LinuxThrottleDevice],
) -> Vec<oci::LinuxThrottleDevice> {
let mut r = Vec::new();
for td in tds.iter() {
r.push(ociLinuxThrottleDevice {
blk: ociLinuxBlockIODevice {
r.push(oci::LinuxThrottleDevice {
blk: oci::LinuxBlockIoDevice {
major: td.Major,
minor: td.Minor,
},
@@ -241,11 +201,11 @@ fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinux
r
}
fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeightDevice> {
fn weight_devices_grpc_to_oci(wds: &[grpc::LinuxWeightDevice]) -> Vec<oci::LinuxWeightDevice> {
let mut r = Vec::new();
for wd in wds.iter() {
r.push(ociLinuxWeightDevice {
blk: ociLinuxBlockIODevice {
r.push(oci::LinuxWeightDevice {
blk: oci::LinuxBlockIoDevice {
major: wd.Major,
minor: wd.Minor,
},
@@ -256,7 +216,7 @@ fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeig
r
}
fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
fn blockio_grpc_to_oci(blk: &grpc::LinuxBlockIO) -> oci::LinuxBlockIo {
let weight_device = weight_devices_grpc_to_oci(blk.WeightDevice.as_ref());
let throttle_read_bps_device = throttle_devices_grpc_to_oci(blk.ThrottleReadBpsDevice.as_ref());
let throttle_write_bps_device =
@@ -266,7 +226,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
let throttle_write_iops_device =
throttle_devices_grpc_to_oci(blk.ThrottleWriteIOPSDevice.as_ref());
ociLinuxBlockIO {
oci::LinuxBlockIo {
weight: Some(blk.Weight as u16),
leaf_weight: Some(blk.LeafWeight as u16),
weight_device,
@@ -277,7 +237,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
}
}
pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
pub fn resources_grpc_to_oci(res: &grpc::LinuxResources) -> oci::LinuxResources {
let devices = {
let mut d = Vec::new();
for dev in res.Devices.iter() {
@@ -292,7 +252,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
} else {
Some(dev.Minor)
};
d.push(ociLinuxDeviceCgroup {
d.push(oci::LinuxDeviceCgroup {
allow: dev.Allow,
r#type: dev.Type.clone(),
major,
@@ -305,7 +265,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
let memory = if res.Memory.is_some() {
let mem = res.Memory.as_ref().unwrap();
Some(ociLinuxMemory {
Some(oci::LinuxMemory {
limit: Some(mem.Limit),
reservation: Some(mem.Reservation),
swap: Some(mem.Swap),
@@ -320,7 +280,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
let cpu = if res.CPU.is_some() {
let c = res.CPU.as_ref().unwrap();
Some(ociLinuxCPU {
Some(oci::LinuxCpu {
shares: Some(c.Shares),
quota: Some(c.Quota),
period: Some(c.Period),
@@ -335,7 +295,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
let pids = if res.Pids.is_some() {
let p = res.Pids.as_ref().unwrap();
Some(ociLinuxPids { limit: p.Limit })
Some(oci::LinuxPids { limit: p.Limit })
} else {
None
};
@@ -351,7 +311,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
let hugepage_limits = {
let mut r = Vec::new();
for hl in res.HugepageLimits.iter() {
r.push(ociLinuxHugepageLimit {
r.push(oci::LinuxHugepageLimit {
page_size: hl.Pagesize.clone(),
limit: hl.Limit,
});
@@ -364,14 +324,14 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
let priorities = {
let mut r = Vec::new();
for pr in net.Priorities.iter() {
r.push(ociLinuxInterfacePriority {
r.push(oci::LinuxInterfacePriority {
name: pr.Name.clone(),
priority: pr.Priority,
});
}
r
};
Some(ociLinuxNetwork {
Some(oci::LinuxNetwork {
class_id: Some(net.ClassID),
priorities,
})
@@ -379,7 +339,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
None
};
ociLinuxResources {
oci::LinuxResources {
devices,
memory,
cpu,
@@ -391,17 +351,22 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
}
}
use oci::{LinuxSeccompArg as ociLinuxSeccompArg, LinuxSyscall as ociLinuxSyscall};
fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp {
let syscalls = {
let mut r = Vec::new();
for sys in sec.Syscalls.iter() {
let mut args = Vec::new();
let errno_ret: u32;
if sys.has_errnoret() {
errno_ret = sys.get_errnoret();
} else {
errno_ret = libc::EPERM as u32;
}
for arg in sys.Args.iter() {
args.push(ociLinuxSeccompArg {
args.push(oci::LinuxSeccompArg {
index: arg.Index as u32,
value: arg.Value,
value_two: arg.ValueTwo,
@@ -409,23 +374,25 @@ fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
});
}
r.push(ociLinuxSyscall {
r.push(oci::LinuxSyscall {
names: sys.Names.clone().into_vec(),
action: sys.Action.clone(),
errno_ret,
args,
});
}
r
};
ociLinuxSeccomp {
oci::LinuxSeccomp {
default_action: sec.DefaultAction.clone(),
architectures: sec.Architectures.clone().into_vec(),
flags: sec.Flags.clone().into_vec(),
syscalls,
}
}
fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux {
let uid_mappings = idmaps_grpc_to_oci(l.UIDMappings.as_ref());
let gid_mappings = idmaps_grpc_to_oci(l.GIDMappings.as_ref());
@@ -445,7 +412,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
let mut r = Vec::new();
for ns in l.Namespaces.iter() {
r.push(ociLinuxNamespace {
r.push(oci::LinuxNamespace {
r#type: ns.Type.clone(),
path: ns.Path.clone(),
});
@@ -457,7 +424,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
let mut r = Vec::new();
for d in l.Devices.iter() {
r.push(ociLinuxDevice {
r.push(oci::LinuxDevice {
path: d.Path.clone(),
r#type: d.Type.clone(),
major: d.Major,
@@ -473,14 +440,14 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
let intel_rdt = if l.IntelRdt.is_some() {
let rdt = l.IntelRdt.as_ref().unwrap();
Some(ociLinuxIntelRdt {
Some(oci::LinuxIntelRdt {
l3_cache_schema: rdt.L3CacheSchema.clone(),
})
} else {
None
};
ociLinux {
oci::Linux {
uid_mappings,
gid_mappings,
sysctl: l.Sysctl.clone(),
@@ -497,11 +464,11 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
}
}
fn linux_oci_to_grpc(_l: &ociLinux) -> grpcLinux {
grpcLinux::default()
fn linux_oci_to_grpc(_l: &oci::Linux) -> grpc::Linux {
grpc::Linux::default()
}
pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
// process
let process = if grpc.Process.is_some() {
Some(process_grpc_to_oci(grpc.Process.as_ref().unwrap()))
@@ -539,7 +506,7 @@ pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
None
};
ociSpec {
oci::Spec {
version: grpc.Version.clone(),
process,
root,

View File

@@ -52,10 +52,12 @@ const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
const PROC_PATH: &str = "/proc";
// since libc didn't defined this const for musl, thus redefined it here.
#[cfg(all(target_os = "linux", target_env = "gnu"))]
#[cfg(all(target_os = "linux", target_env = "gnu", not(target_arch = "s390x")))]
const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
#[cfg(all(target_os = "linux", target_env = "musl"))]
const PROC_SUPER_MAGIC: libc::c_ulong = 0x00009fa0;
#[cfg(all(target_os = "linux", target_env = "gnu", target_arch = "s390x"))]
const PROC_SUPER_MAGIC: libc::c_uint = 0x00009fa0;
lazy_static! {
static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
@@ -66,6 +68,8 @@ lazy_static! {
m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
m.insert("slave", MsFlags::MS_SLAVE);
m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
m.insert("unbindable", MsFlags::MS_UNBINDABLE);
m.insert("runbindable", MsFlags::MS_UNBINDABLE | MsFlags::MS_REC);
m
};
static ref OPTIONS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -91,17 +95,6 @@ lazy_static! {
m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME));
m.insert("bind", (false, MsFlags::MS_BIND));
m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC));
m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE));
m.insert(
"runbindable",
(false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC),
);
m.insert("private", (false, MsFlags::MS_PRIVATE));
m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC));
m.insert("shared", (false, MsFlags::MS_SHARED));
m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC));
m.insert("slave", (false, MsFlags::MS_SLAVE));
m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC));
m.insert("relatime", (false, MsFlags::MS_RELATIME));
m.insert("norelatime", (true, MsFlags::MS_RELATIME));
m.insert("strictatime", (false, MsFlags::MS_STRICTATIME));
@@ -190,7 +183,7 @@ pub fn init_rootfs(
let mut bind_mount_dev = false;
for m in &spec.mounts {
let (mut flags, data) = parse_mount(&m);
let (mut flags, pgflags, data) = parse_mount(&m);
if !m.destination.starts_with('/') || m.destination.contains("..") {
return Err(anyhow!(
"the mount destination {} is invalid",
@@ -232,13 +225,15 @@ pub fn init_rootfs(
// effective.
// first check that we have non-default options required before attempting a
// remount
if m.r#type == "bind" {
for o in &m.options {
if let Some(fl) = PROPAGATION.get(o.as_str()) {
let dest = secure_join(rootfs, &m.destination);
mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
}
}
if m.r#type == "bind" && !pgflags.is_empty() {
let dest = secure_join(rootfs, &m.destination);
mount(
None::<&str>,
dest.as_str(),
None::<&str>,
pgflags,
None::<&str>,
)?;
}
}
}
@@ -655,26 +650,27 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
Ok(true)
}
fn parse_mount(m: &Mount) -> (MsFlags, String) {
fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
let mut flags = MsFlags::empty();
let mut pgflags = MsFlags::empty();
let mut data = Vec::new();
for o in &m.options {
match OPTIONS.get(o.as_str()) {
Some(v) => {
let (clear, fl) = *v;
if clear {
flags &= !fl;
} else {
flags |= fl;
}
if let Some(v) = OPTIONS.get(o.as_str()) {
let (clear, fl) = *v;
if clear {
flags &= !fl;
} else {
flags |= fl;
}
None => data.push(o.clone()),
} else if let Some(fl) = PROPAGATION.get(o.as_str()) {
pgflags |= *fl;
} else {
data.push(o.clone());
}
}
(flags, data.join(","))
(flags, pgflags, data.join(","))
}
// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
@@ -920,7 +916,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {
for m in spec.mounts.iter() {
if m.destination == "/dev" {
let (flags, _) = parse_mount(m);
let (flags, _, _) = parse_mount(m);
if flags.contains(MsFlags::MS_RDONLY) {
mount(
Some("/dev"),
@@ -1365,7 +1361,7 @@ mod tests {
let msg = format!("{}, result: {:?}", msg, result);
// Perform the checks
assert!(result == t.result, msg);
assert!(result == t.result, "{}", msg);
}
}
}

View File

@@ -77,10 +77,6 @@ impl PipeStream {
Ok(Self(AsyncFd::new(StreamFd(fd))?))
}
pub fn shutdown(&mut self) -> io::Result<()> {
self.0.get_mut().close()
}
pub fn from_fd(fd: RawFd) -> Self {
unsafe { Self::from_raw_fd(fd) }
}
@@ -164,7 +160,44 @@ impl AsyncWrite for PipeStream {
}
fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
self.get_mut().shutdown()?;
// Do nothing in shutdown is very important
// The only right way to shutdown pipe is drop it
// Otherwise PipeStream will conflict with its twins
// Because they both have same fd, and both registered.
Poll::Ready(Ok(()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use nix::fcntl::OFlag;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
#[tokio::test]
// Shutdown should never close the inner fd.
async fn test_pipestream_shutdown() {
let (_, wfd1) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
let mut writer1 = PipeStream::new(wfd1).unwrap();
// if close fd in shutdown, the fd will be reused
// and the test will failed
let _ = writer1.shutdown().await.unwrap();
// let _ = unistd::close(wfd1);
let (rfd2, wfd2) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); // reuse fd number, rfd2 == wfd1
let mut reader2 = PipeStream::new(rfd2).unwrap();
let mut writer2 = PipeStream::new(wfd2).unwrap();
// deregister writer1, then reader2 which has the same fd will be deregistered from epoll
drop(writer1);
let _ = writer2.write(b"1").await;
let mut content = vec![0u8; 1];
// Will Block here if shutdown close the fd.
let _ = reader2.read(&mut content).await;
}
}

View File

@@ -29,7 +29,6 @@ pub enum StreamType {
Stdin,
Stdout,
Stderr,
ExitPipeR,
TermMaster,
ParentStdin,
ParentStdout,
@@ -45,8 +44,8 @@ pub struct Process {
pub stdin: Option<RawFd>,
pub stdout: Option<RawFd>,
pub stderr: Option<RawFd>,
pub exit_pipe_r: Option<RawFd>,
pub exit_pipe_w: Option<RawFd>,
pub exit_tx: Option<tokio::sync::watch::Sender<bool>>,
pub exit_rx: Option<tokio::sync::watch::Receiver<bool>>,
pub extra_files: Vec<File>,
pub term_master: Option<RawFd>,
pub tty: bool,
@@ -97,14 +96,15 @@ impl Process {
pipe_size: i32,
) -> Result<Self> {
let logger = logger.new(o!("subsystem" => "process"));
let (exit_tx, exit_rx) = tokio::sync::watch::channel(false);
let mut p = Process {
exec_id: String::from(id),
stdin: None,
stdout: None,
stderr: None,
exit_pipe_w: None,
exit_pipe_r: None,
exit_tx: Some(exit_tx),
exit_rx: Some(exit_rx),
extra_files: Vec::new(),
tty: ocip.terminal,
term_master: None,
@@ -152,7 +152,6 @@ impl Process {
StreamType::Stdin => self.stdin,
StreamType::Stdout => self.stdout,
StreamType::Stderr => self.stderr,
StreamType::ExitPipeR => self.exit_pipe_r,
StreamType::TermMaster => self.term_master,
StreamType::ParentStdin => self.parent_stdin,
StreamType::ParentStdout => self.parent_stdout,

View File

@@ -117,28 +117,20 @@ pub async fn write_async(pipe_w: &mut PipeStream, msg_type: i32, data_str: &str)
}
match msg_type {
SYNC_FAILED => match write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
Ok(_) => pipe_w.shutdown()?,
Err(e) => {
pipe_w.shutdown()?;
SYNC_FAILED => {
if let Err(e) = write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
return Err(anyhow!(e).context("error in send message to process"));
}
},
}
SYNC_DATA => {
let length: i32 = data_str.len() as i32;
write_count(pipe_w, &length.to_be_bytes(), MSG_SIZE)
.await
.or_else(|e| {
pipe_w.shutdown()?;
Err(anyhow!(e).context("error in send message to process"))
})?;
.map_err(|e| anyhow!(e).context("error in send message to process"))?;
write_count(pipe_w, data_str.as_bytes(), data_str.len())
.await
.or_else(|e| {
pipe_w.shutdown()?;
Err(anyhow!(e).context("error in send message to process"))
})?;
.map_err(|e| anyhow!(e).context("error in send message to process"))?;
}
_ => (),

View File

@@ -6,7 +6,7 @@
use crate::container::Config;
use anyhow::{anyhow, Context, Error, Result};
use nix::errno::Errno;
use oci::{Linux, LinuxIDMapping, LinuxNamespace, Spec};
use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
use std::collections::HashMap;
use std::path::{Component, PathBuf};
@@ -28,16 +28,6 @@ fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool {
false
}
fn get_namespace_path(nses: &[LinuxNamespace], key: &str) -> Result<String> {
for ns in nses {
if ns.r#type.as_str() == key {
return Ok(ns.path.clone());
}
}
Err(einval())
}
fn rootfs(root: &str) -> Result<()> {
let path = PathBuf::from(root);
// not absolute path or not exists
@@ -107,7 +97,7 @@ fn security(oci: &Spec) -> Result<()> {
Ok(())
}
fn idmapping(maps: &[LinuxIDMapping]) -> Result<()> {
fn idmapping(maps: &[LinuxIdMapping]) -> Result<()> {
for map in maps {
if map.size > 0 {
return Ok(());
@@ -166,31 +156,6 @@ lazy_static! {
};
}
fn check_host_ns(path: &str) -> Result<()> {
let cpath = PathBuf::from(path);
let hpath = PathBuf::from("/proc/self/ns/net");
let real_hpath = hpath
.read_link()
.context(format!("read link {:?}", hpath))?;
let meta = cpath
.symlink_metadata()
.context(format!("symlink metadata {:?}", cpath))?;
let file_type = meta.file_type();
if !file_type.is_symlink() {
return Ok(());
}
let real_cpath = cpath
.read_link()
.context(format!("read link {:?}", cpath))?;
if real_cpath == real_hpath {
return Err(einval());
}
Ok(())
}
fn sysctl(oci: &Spec) -> Result<()> {
let linux = get_linux(oci)?;
@@ -238,7 +203,7 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
Ok(())
}
fn has_idmapping(maps: &[LinuxIDMapping], id: u32) -> bool {
fn has_idmapping(maps: &[LinuxIdMapping], id: u32) -> bool {
for map in maps {
if id >= map.container_id && id < map.container_id + map.size {
return true;
@@ -334,19 +299,6 @@ mod tests {
assert_eq!(contain_namespace(&namespaces, ""), false);
assert_eq!(contain_namespace(&namespaces, "Net"), false);
assert_eq!(contain_namespace(&namespaces, "ipc"), false);
assert_eq!(
get_namespace_path(&namespaces, "net").unwrap(),
"/sys/cgroups/net"
);
assert_eq!(
get_namespace_path(&namespaces, "uts").unwrap(),
"/sys/cgroups/uts"
);
get_namespace_path(&namespaces, "").unwrap_err();
get_namespace_path(&namespaces, "Uts").unwrap_err();
get_namespace_path(&namespaces, "ipc").unwrap_err();
}
#[test]
@@ -441,7 +393,7 @@ mod tests {
usernamespace(&spec).unwrap();
let mut linux = Linux::default();
linux.uid_mappings = vec![LinuxIDMapping {
linux.uid_mappings = vec![LinuxIdMapping {
container_id: 0,
host_id: 1000,
size: 0,
@@ -450,7 +402,7 @@ mod tests {
usernamespace(&spec).unwrap_err();
let mut linux = Linux::default();
linux.uid_mappings = vec![LinuxIDMapping {
linux.uid_mappings = vec![LinuxIdMapping {
container_id: 0,
host_id: 1000,
size: 100,
@@ -497,12 +449,12 @@ mod tests {
path: "/sys/cgroups/user".to_owned(),
},
];
linux.uid_mappings = vec![LinuxIDMapping {
linux.uid_mappings = vec![LinuxIdMapping {
container_id: 0,
host_id: 1000,
size: 1000,
}];
linux.gid_mappings = vec![LinuxIDMapping {
linux.gid_mappings = vec![LinuxIdMapping {
container_id: 0,
host_id: 1000,
size: 1000,
@@ -528,12 +480,6 @@ mod tests {
rootless_euid(&spec).unwrap();
}
#[test]
fn test_check_host_ns() {
check_host_ns("/proc/self/ns/net").unwrap_err();
check_host_ns("/proc/sys/net/ipv4/tcp_sack").unwrap();
}
#[test]
fn test_sysctl() {
let mut spec = Spec::default();

View File

@@ -2,13 +2,16 @@
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::tracer;
use anyhow::{anyhow, Result};
use std::env;
use std::fs;
use std::time;
use tracing::instrument;
const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
const DEV_MODE_FLAG: &str = "agent.devmode";
const TRACE_MODE_OPTION: &str = "agent.trace";
const LOG_LEVEL_OPTION: &str = "agent.log";
const SERVER_ADDR_OPTION: &str = "agent.server_addr";
const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
@@ -26,6 +29,7 @@ const VSOCK_PORT: u16 = 1024;
// Environment variables used for development and testing
const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";
const TRACE_TYPE_ENV_VAR: &str = "KATA_AGENT_TRACE_TYPE";
const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
@@ -54,6 +58,7 @@ pub struct AgentConfig {
pub container_pipe_size: i32,
pub server_addr: String,
pub unified_cgroup_hierarchy: bool,
pub tracing: tracer::TraceType,
}
// parse_cmdline_param parse commandline parameters.
@@ -98,9 +103,11 @@ impl AgentConfig {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
}
}
#[instrument]
pub fn parse_cmdline(&mut self, file: &str) -> Result<()> {
let cmdline = fs::read_to_string(file)?;
let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
@@ -109,6 +116,15 @@ impl AgentConfig {
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);
// Support "bare" tracing option for backwards compatibility with
// Kata 1.x.
if param == &TRACE_MODE_OPTION {
self.tracing = tracer::TraceType::Isolated;
continue;
}
parse_cmdline_param!(param, TRACE_MODE_OPTION, self.tracing, get_trace_type);
// parse cmdline options
parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
parse_cmdline_param!(
@@ -167,10 +183,17 @@ impl AgentConfig {
}
}
if let Ok(value) = env::var(TRACE_TYPE_ENV_VAR) {
if let Ok(result) = value.parse::<tracer::TraceType>() {
self.tracing = result;
}
}
Ok(())
}
}
#[instrument]
fn get_vsock_port(p: &str) -> Result<i32> {
let fields: Vec<&str> = p.split('=').collect();
if fields.len() != 2 {
@@ -185,6 +208,7 @@ fn get_vsock_port(p: &str) -> Result<i32> {
//
// Note: Logrus names are used for compatability with the previous
// golang-based agent.
#[instrument]
fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
let level = match logrus_level {
// Note: different semantics to logrus: log, but don't panic.
@@ -207,6 +231,7 @@ fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
Ok(level)
}
#[instrument]
fn get_log_level(param: &str) -> Result<slog::Level> {
let fields: Vec<&str> = param.split('=').collect();
@@ -221,6 +246,28 @@ fn get_log_level(param: &str) -> Result<slog::Level> {
}
}
#[instrument]
fn get_trace_type(param: &str) -> Result<tracer::TraceType> {
if param.is_empty() {
return Err(anyhow!("invalid trace type parameter"));
}
let fields: Vec<&str> = param.split('=').collect();
if fields[0] != TRACE_MODE_OPTION {
return Err(anyhow!("invalid trace type key name"));
}
if fields.len() == 1 {
return Ok(tracer::TraceType::Isolated);
}
let result = fields[1].parse::<tracer::TraceType>()?;
Ok(result)
}
#[instrument]
fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
let fields: Vec<&str> = param.split('=').collect();
@@ -241,6 +288,7 @@ fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
Ok(time::Duration::from_secs(value.unwrap()))
}
#[instrument]
fn get_bool_value(param: &str) -> Result<bool> {
let fields: Vec<&str> = param.split('=').collect();
@@ -265,6 +313,7 @@ fn get_bool_value(param: &str) -> Result<bool> {
// - A value can contain any number of equal signs.
// - We could/should maybe check if the name is pure whitespace
// since this is considered to be invalid.
#[instrument]
fn get_string_value(param: &str) -> Result<String> {
let fields: Vec<&str> = param.split('=').collect();
@@ -273,18 +322,19 @@ fn get_string_value(param: &str) -> Result<String> {
}
// We need name (but the value can be blank)
if fields[0] == "" {
if fields[0].is_empty() {
return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_NAME));
}
let value = fields[1..].join("=");
if value == "" {
if value.is_empty() {
return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_VALUE));
}
Ok(value)
}
#[instrument]
fn get_container_pipe_size(param: &str) -> Result<i32> {
let fields: Vec<&str> = param.split('=').collect();
@@ -319,6 +369,10 @@ mod tests {
use std::time;
use tempfile::tempdir;
const ERR_INVALID_TRACE_TYPE_PARAM: &str = "invalid trace type parameter";
const ERR_INVALID_TRACE_TYPE: &str = "invalid trace type";
const ERR_INVALID_TRACE_TYPE_KEY: &str = "invalid trace type key name";
// helper function to make errors less crazy-long
fn make_err(desc: &str) -> Error {
anyhow!(desc.to_string())
@@ -334,7 +388,7 @@ mod tests {
if $expected_result.is_ok() {
let expected_level = $expected_result.as_ref().unwrap();
let actual_level = $actual_result.unwrap();
assert!(*expected_level == actual_level, $msg);
assert!(*expected_level == actual_level, "{}", $msg);
} else {
let expected_error = $expected_result.as_ref().unwrap_err();
let expected_error_msg = format!("{:?}", expected_error);
@@ -342,9 +396,9 @@ mod tests {
if let Err(actual_error) = $actual_result {
let actual_error_msg = format!("{:?}", actual_error);
assert!(expected_error_msg == actual_error_msg, $msg);
assert!(expected_error_msg == actual_error_msg, "{}", $msg);
} else {
assert!(expected_error_msg == "expected error, got OK", $msg);
assert!(expected_error_msg == "expected error, got OK", "{}", $msg);
}
}
};
@@ -374,6 +428,7 @@ mod tests {
container_pipe_size: i32,
server_addr: &'a str,
unified_cgroup_hierarchy: bool,
tracing: tracer::TraceType,
}
let tests = &[
@@ -387,6 +442,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.debug_console agent.devmodex",
@@ -398,6 +454,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.logx=debug",
@@ -409,6 +466,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.log=debug",
@@ -420,6 +478,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.log=debug",
@@ -431,6 +490,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -442,6 +502,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo",
@@ -453,6 +514,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo bar",
@@ -464,6 +526,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo bar",
@@ -475,6 +538,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent bar",
@@ -486,6 +550,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo debug_console agent bar devmode",
@@ -497,6 +562,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.debug_console",
@@ -508,6 +574,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: " agent.debug_console ",
@@ -519,6 +586,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.debug_console foo",
@@ -530,6 +598,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: " agent.debug_console foo",
@@ -541,6 +610,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent.debug_console bar",
@@ -552,6 +622,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent.debug_console",
@@ -563,6 +634,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent.debug_console ",
@@ -574,6 +646,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode",
@@ -585,6 +658,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: " agent.devmode ",
@@ -596,6 +670,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode foo",
@@ -607,6 +682,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: " agent.devmode foo",
@@ -618,6 +694,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent.devmode bar",
@@ -629,6 +706,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent.devmode",
@@ -640,6 +718,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "foo agent.devmode ",
@@ -651,6 +730,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console",
@@ -662,6 +742,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console agent.hotplug_timeout=100 agent.unified_cgroup_hierarchy=a",
@@ -673,6 +754,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console agent.hotplug_timeout=0 agent.unified_cgroup_hierarchy=11",
@@ -684,6 +766,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: true,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pipe_size=2097152 agent.unified_cgroup_hierarchy=false",
@@ -695,6 +778,7 @@ mod tests {
container_pipe_size: 2097152,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pipe_size=100 agent.unified_cgroup_hierarchy=true",
@@ -706,6 +790,7 @@ mod tests {
container_pipe_size: 100,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: true,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pipe_size=0 agent.unified_cgroup_hierarchy=0",
@@ -717,6 +802,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.devmode agent.debug_console agent.container_pip_siz=100 agent.unified_cgroup_hierarchy=1",
@@ -728,6 +814,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: true,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -739,6 +826,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -750,6 +838,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "foo",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -761,6 +850,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "=",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -772,6 +862,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "=foo",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -783,6 +874,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "foo=bar=baz=",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -794,6 +886,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -805,6 +898,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix://@/tmp/foo.socket",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -816,6 +910,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -827,6 +922,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -838,6 +934,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
@@ -849,6 +946,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "server_addr=unix:///tmp/foo.socket",
@@ -860,6 +958,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.server_address=unix:///tmp/foo.socket",
@@ -871,6 +970,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.server_addr=unix:///tmp/foo.socket",
@@ -882,6 +982,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: " agent.server_addr=unix:///tmp/foo.socket",
@@ -893,6 +994,7 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: " agent.server_addr=unix:///tmp/foo.socket a",
@@ -904,6 +1006,115 @@ mod tests {
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: "unix:///tmp/foo.socket",
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "trace",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: ".trace",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.tracer",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.trac",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "agent.trace",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Isolated,
},
TestData {
contents: "agent.trace=isolated",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Isolated,
},
TestData {
contents: "agent.trace=disabled",
env_vars: Vec::new(),
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACE_TYPE=isolated"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Isolated,
},
TestData {
contents: "",
env_vars: vec!["KATA_AGENT_TRACE_TYPE=disabled"],
debug_console: false,
dev_mode: false,
log_level: DEFAULT_LOG_LEVEL,
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
server_addr: TEST_SERVER_ADDR,
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
},
];
@@ -958,6 +1169,7 @@ mod tests {
);
assert_eq!(config.container_pipe_size, 0, "{}", msg);
assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
assert_eq!(config.tracing, tracer::TraceType::Disabled, "{}", msg);
let result = config.parse_cmdline(filename);
assert!(result.is_ok(), "{}", msg);
@@ -973,6 +1185,7 @@ mod tests {
assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{}", msg);
assert_eq!(d.container_pipe_size, config.container_pipe_size, "{}", msg);
assert_eq!(d.server_addr, config.server_addr, "{}", msg);
assert_eq!(d.tracing, config.tracing, "{}", msg);
for v in vars_to_unset {
env::remove_var(v);
@@ -1369,4 +1582,62 @@ mod tests {
assert_result!(d.result, result, msg);
}
}
#[test]
fn test_get_trace_type() {
#[derive(Debug)]
struct TestData<'a> {
param: &'a str,
result: Result<tracer::TraceType>,
}
let tests = &[
TestData {
param: "",
result: Err(make_err(ERR_INVALID_TRACE_TYPE_PARAM)),
},
TestData {
param: "agent.tracer",
result: Err(make_err(ERR_INVALID_TRACE_TYPE_KEY)),
},
TestData {
param: "agent.trac",
result: Err(make_err(ERR_INVALID_TRACE_TYPE_KEY)),
},
TestData {
param: "agent.trace=",
result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
},
TestData {
param: "agent.trace==",
result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
},
TestData {
param: "agent.trace=foo",
result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
},
TestData {
param: "agent.trace",
result: Ok(tracer::TraceType::Isolated),
},
TestData {
param: "agent.trace=isolated",
result: Ok(tracer::TraceType::Isolated),
},
TestData {
param: "agent.trace=disabled",
result: Ok(tracer::TraceType::Disabled),
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = get_trace_type(d.param);
let msg = format!("{}: result: {:?}", msg, result);
assert_result!(d.result, result, msg);
}
}
}

295
src/agent/src/console.rs Normal file
View File

@@ -0,0 +1,295 @@
// Copyright (c) 2021 Ant Group
// Copyright (c) 2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::util;
use anyhow::{anyhow, Result};
use nix::fcntl::{self, FcntlArg, FdFlag, OFlag};
use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
use nix::pty::{openpty, OpenptyResult};
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
use nix::sys::stat::Mode;
use nix::sys::wait;
use nix::unistd::{self, close, dup2, fork, setsid, ForkResult, Pid};
use rustjail::pipestream::PipeStream;
use slog::Logger;
use std::ffi::CString;
use std::os::unix::io::{FromRawFd, RawFd};
use std::path::PathBuf;
use std::process::Stdio;
use std::sync::Arc;
use std::sync::Mutex as SyncMutex;
use futures::StreamExt;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::select;
use tokio::sync::watch::Receiver;
const CONSOLE_PATH: &str = "/dev/console";
lazy_static! {
static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
let mut v = Vec::new();
if !cfg!(test) {
v.push("/bin/bash".to_string());
v.push("/bin/sh".to_string());
}
Arc::new(SyncMutex::new(v))
};
}
pub fn initialize() {
lazy_static::initialize(&SHELLS);
}
pub async fn debug_console_handler(
logger: Logger,
port: u32,
mut shutdown: Receiver<bool>,
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "debug-console"));
let shells = SHELLS.lock().unwrap().to_vec();
let shell = shells
.into_iter()
.find(|sh| PathBuf::from(sh).exists())
.ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
if port > 0 {
let listenfd = socket::socket(
AddressFamily::Vsock,
SockType::Stream,
SockFlag::SOCK_CLOEXEC,
None,
)?;
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
socket::bind(listenfd, &addr)?;
socket::listen(listenfd, 1)?;
let mut incoming = util::get_vsock_incoming(listenfd);
loop {
select! {
_ = shutdown.changed() => {
info!(logger, "debug console got shutdown request");
break;
}
conn = incoming.next() => {
if let Some(conn) = conn {
// Accept a new connection
match conn {
Ok(stream) => {
let logger = logger.clone();
let shell = shell.clone();
// Do not block(await) here, or we'll never receive the shutdown signal
tokio::spawn(async move {
let _ = run_debug_console_vsock(logger, shell, stream).await;
});
}
Err(e) => {
error!(logger, "{:?}", e);
}
}
} else {
break;
}
}
}
}
} else {
let mut flags = OFlag::empty();
flags.insert(OFlag::O_RDWR);
flags.insert(OFlag::O_CLOEXEC);
let fd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
select! {
_ = shutdown.changed() => {
info!(logger, "debug console got shutdown request");
}
result = run_debug_console_serial(shell.clone(), fd) => {
match result {
Ok(_) => {
info!(logger, "run_debug_console_shell session finished");
}
Err(err) => {
error!(logger, "run_debug_console_shell failed: {:?}", err);
}
}
}
}
};
Ok(())
}
fn run_in_child(slave_fd: libc::c_int, shell: String) -> Result<()> {
// create new session with child as session leader
setsid()?;
// dup stdin, stdout, stderr to let child act as a terminal
dup2(slave_fd, STDIN_FILENO)?;
dup2(slave_fd, STDOUT_FILENO)?;
dup2(slave_fd, STDERR_FILENO)?;
// set tty
unsafe {
libc::ioctl(0, libc::TIOCSCTTY);
}
let cmd = CString::new(shell).unwrap();
let args: Vec<CString> = Vec::new();
// run shell
let _ = unistd::execvp(cmd.as_c_str(), &args).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
_ => std::process::exit(-2),
});
Ok(())
}
async fn run_in_parent<T: AsyncRead + AsyncWrite>(
logger: Logger,
stream: T,
pseudo: OpenptyResult,
child_pid: Pid,
) -> Result<()> {
info!(logger, "get debug shell pid {:?}", child_pid);
let master_fd = pseudo.master;
let _ = close(pseudo.slave);
let (mut socket_reader, mut socket_writer) = tokio::io::split(stream);
let (mut master_reader, mut master_writer) = tokio::io::split(PipeStream::from_fd(master_fd));
select! {
res = tokio::io::copy(&mut master_reader, &mut socket_writer) => {
debug!(
logger,
"master closed: {:?}", res
);
}
res = tokio::io::copy(&mut socket_reader, &mut master_writer) => {
info!(
logger,
"socket closed: {:?}", res
);
}
}
let wait_status = wait::waitpid(child_pid, None);
info!(logger, "debug console process exit code: {:?}", wait_status);
Ok(())
}
async fn run_debug_console_vsock<T: AsyncRead + AsyncWrite>(
logger: Logger,
shell: String,
stream: T,
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "debug-console-shell"));
let pseudo = openpty(None, None)?;
let _ = fcntl::fcntl(pseudo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let _ = fcntl::fcntl(pseudo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let slave_fd = pseudo.slave;
match unsafe { fork() } {
Ok(ForkResult::Child) => run_in_child(slave_fd, shell),
Ok(ForkResult::Parent { child: child_pid }) => {
run_in_parent(logger.clone(), stream, pseudo, child_pid).await
}
Err(err) => Err(anyhow!("fork error: {:?}", err)),
}
}
async fn run_debug_console_serial(shell: String, fd: RawFd) -> Result<()> {
let mut child = match tokio::process::Command::new(shell)
.arg("-i")
.kill_on_drop(true)
.stdin(unsafe { Stdio::from_raw_fd(fd) })
.stdout(unsafe { Stdio::from_raw_fd(fd) })
.stderr(unsafe { Stdio::from_raw_fd(fd) })
.spawn()
{
Ok(c) => c,
Err(_) => return Err(anyhow!("failed to spawn shell")),
};
child.wait().await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
use tokio::sync::watch;
#[tokio::test]
async fn test_setup_debug_console_no_shells() {
{
// Guarantee no shells have been added
// (required to avoid racing with
// test_setup_debug_console_invalid_shell()).
let shells_ref = SHELLS.clone();
let mut shells = shells_ref.lock().unwrap();
shells.clear();
}
let logger = slog_scope::logger();
let (_, rx) = watch::channel(true);
let result = debug_console_handler(logger, 0, rx).await;
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"no shell found to launch debug console"
);
}
#[tokio::test]
async fn test_setup_debug_console_invalid_shell() {
{
let shells_ref = SHELLS.clone();
let mut shells = shells_ref.lock().unwrap();
let dir = tempdir().expect("failed to create tmpdir");
// Add an invalid shell
let shell = dir
.path()
.join("enoent")
.to_str()
.expect("failed to construct shell path")
.to_string();
shells.push(shell);
}
let logger = slog_scope::logger();
let (_, rx) = watch::channel(true);
let result = debug_console_handler(logger, 0, rx).await;
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"no shell found to launch debug console"
);
}
}

View File

@@ -5,6 +5,7 @@
use libc::{c_uint, major, minor};
use nix::sys::stat;
use regex::Regex;
use std::collections::HashMap;
use std::fs;
use std::os::unix::fs::MetadataExt;
@@ -17,10 +18,11 @@ use crate::linux_abi::*;
use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
use crate::pci;
use crate::sandbox::Sandbox;
use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER};
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
use anyhow::{anyhow, Result};
use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
use protocols::agent::Device;
use tracing::instrument;
// Convenience macro to obtain the scope logger
macro_rules! sl {
@@ -31,17 +33,21 @@ macro_rules! sl {
const VM_ROOTFS: &str = "/";
#[derive(Debug)]
struct DevIndexEntry {
idx: usize,
residx: Vec<usize>,
}
#[derive(Debug)]
struct DevIndex(HashMap<String, DevIndexEntry>);
#[instrument]
pub fn rescan_pci_bus() -> Result<()> {
online_device(SYSFS_PCI_BUS_RESCAN_FILE)
}
#[instrument]
pub fn online_device(path: &str) -> Result<()> {
fs::write(path, "1")?;
Ok(())
@@ -50,6 +56,7 @@ pub fn online_device(path: &str) -> Result<()> {
// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
// the sysfs path for the PCI host bridge, based on the PCI path
// provided.
#[instrument]
fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
let mut bus = "0000:00".to_string();
let mut relpath = String::new();
@@ -87,78 +94,123 @@ fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String>
Ok(relpath)
}
async fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
// Keep the same lock order as uevent::handle_block_add_event(), otherwise it may cause deadlock.
let mut w = GLOBAL_DEVICE_WATCHER.lock().await;
let sb = sandbox.lock().await;
for (key, value) in sb.pci_device_map.iter() {
if key.contains(dev_addr) {
info!(sl!(), "Device {} found in pci device map", dev_addr);
return Ok(format!("{}/{}", SYSTEM_DEV_PATH, value));
}
}
drop(sb);
// If device is not found in the device map, hotplug event has not
// been received yet, create and add channel to the watchers map.
// The key of the watchers map is the device we are interested in.
// Note this is done inside the lock, not to miss any events from the
// global udev listener.
let (tx, rx) = tokio::sync::oneshot::channel::<String>();
w.insert(dev_addr.to_string(), Some(tx));
drop(w);
info!(sl!(), "Waiting on channel for device notification\n");
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
let dev_name = match tokio::time::timeout(hotplug_timeout, rx).await {
Ok(v) => v?,
Err(_) => {
let watcher = GLOBAL_DEVICE_WATCHER.clone();
let mut w = watcher.lock().await;
w.remove_entry(dev_addr);
return Err(anyhow!(
"Timeout reached after {:?} waiting for device {}",
hotplug_timeout,
dev_addr
));
}
};
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
// FIXME: This matcher is only correct if the guest has at most one
// SCSI host.
#[derive(Debug)]
struct ScsiBlockMatcher {
search: String,
}
impl ScsiBlockMatcher {
fn new(scsi_addr: &str) -> ScsiBlockMatcher {
let search = format!(r"/0:0:{}/block/", scsi_addr);
ScsiBlockMatcher { search }
}
}
impl UeventMatcher for ScsiBlockMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.subsystem == "block" && uev.devpath.contains(&self.search) && !uev.devname.is_empty()
}
}
#[instrument]
pub async fn get_scsi_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
scsi_addr: &str,
) -> Result<String> {
let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX);
let matcher = ScsiBlockMatcher::new(scsi_addr);
scan_scsi_bus(scsi_addr)?;
get_device_name(sandbox, &dev_sub_path).await
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}
pub async fn get_pci_device_name(
#[derive(Debug)]
struct VirtioBlkPciMatcher {
rex: Regex,
}
impl VirtioBlkPciMatcher {
fn new(relpath: &str) -> VirtioBlkPciMatcher {
let root_bus = create_pci_root_bus_path();
let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath);
VirtioBlkPciMatcher {
rex: Regex::new(&re).unwrap(),
}
}
}
impl UeventMatcher for VirtioBlkPciMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.subsystem == "block" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty()
}
}
#[instrument]
pub async fn get_virtio_blk_pci_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
pcipath: &pci::Path,
) -> Result<String> {
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);
rescan_pci_bus()?;
get_device_name(sandbox, &sysfs_rel_path).await
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}
pub async fn get_pmem_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
pmem_devname: &str,
) -> Result<String> {
let dev_sub_path = format!("/{}/{}", SCSI_BLOCK_SUFFIX, pmem_devname);
get_device_name(sandbox, &dev_sub_path).await
#[derive(Debug)]
struct PmemBlockMatcher {
suffix: String,
}
impl PmemBlockMatcher {
fn new(devname: &str) -> PmemBlockMatcher {
let suffix = format!(r"/block/{}", devname);
PmemBlockMatcher { suffix }
}
}
impl UeventMatcher for PmemBlockMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.subsystem == "block"
&& uev.devpath.starts_with(ACPI_DEV_PATH)
&& uev.devpath.ends_with(&self.suffix)
&& !uev.devname.is_empty()
}
}
#[instrument]
pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str) -> Result<()> {
let devname = match devpath.strip_prefix("/dev/") {
Some(dev) => dev,
None => {
return Err(anyhow!(
"Storage source '{}' must start with /dev/",
devpath
))
}
};
let matcher = PmemBlockMatcher::new(devname);
let uev = wait_for_uevent(sandbox, matcher).await?;
if uev.devname != devname {
return Err(anyhow!(
"Unexpected device name {} for pmem device (expected {})",
uev.devname,
devname
));
}
Ok(())
}
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
#[instrument]
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
let tokens: Vec<&str> = scsi_addr.split(':').collect();
if tokens.len() != 2 {
@@ -193,6 +245,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
// the same device in the list of devices provided through the OCI spec.
// This is needed to update information about minor/major numbers that cannot
// be predicted from the caller.
#[instrument]
fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> {
let major_id: c_uint;
let minor_id: c_uint;
@@ -269,6 +322,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)
// device.Id should be the predicted device name (vda, vdb, ...)
// device.VmPath already provides a way to send it in
#[instrument]
async fn virtiommio_blk_device_handler(
device: &Device,
spec: &mut Spec,
@@ -283,6 +337,7 @@ async fn virtiommio_blk_device_handler(
}
// device.Id should be a PCI path string
#[instrument]
async fn virtio_blk_device_handler(
device: &Device,
spec: &mut Spec,
@@ -290,19 +345,15 @@ async fn virtio_blk_device_handler(
devidx: &DevIndex,
) -> Result<()> {
let mut dev = device.clone();
let pcipath = pci::Path::from_str(&device.id)?;
// When "Id (PCI path)" is not set, we allow to use the predicted
// "VmPath" passed from kata-runtime Note this is a special code
// path for cloud-hypervisor when BDF information is not available
if !device.id.is_empty() {
let pcipath = pci::Path::from_str(&device.id)?;
dev.vm_path = get_pci_device_name(sandbox, &pcipath).await?;
}
dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
update_spec_device_list(&dev, spec, devidx)
}
// device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
#[instrument]
async fn virtio_scsi_device_handler(
device: &Device,
spec: &mut Spec,
@@ -314,6 +365,7 @@ async fn virtio_scsi_device_handler(
update_spec_device_list(&dev, spec, devidx)
}
#[instrument]
async fn virtio_nvdimm_device_handler(
device: &Device,
spec: &mut Spec,
@@ -352,6 +404,7 @@ impl DevIndex {
}
}
#[instrument]
pub async fn add_devices(
devices: &[Device],
spec: &mut Spec,
@@ -366,6 +419,7 @@ pub async fn add_devices(
Ok(())
}
#[instrument]
async fn add_device(
device: &Device,
spec: &mut Spec,
@@ -400,6 +454,7 @@ async fn add_device(
// update_device_cgroup update the device cgroup for container
// to not allow access to the guest root partition. This prevents
// the container from being able to access the VM rootfs.
#[instrument]
pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
let meta = fs::metadata(VM_ROOTFS)?;
let rdev = meta.dev();
@@ -430,6 +485,7 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use crate::uevent::spawn_test_watcher;
use oci::Linux;
use tempfile::tempdir;
@@ -776,4 +832,107 @@ mod tests {
let relpath = pcipath_to_sysfs(rootbuspath, &path234);
assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0");
}
// We use device specific variants of this for real cases, but
// they have some complications that make them troublesome to unit
// test
async fn example_get_device_name(
sandbox: &Arc<Mutex<Sandbox>>,
relpath: &str,
) -> Result<String> {
let matcher = VirtioBlkPciMatcher::new(relpath);
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(uev.devname)
}
#[tokio::test]
async fn test_get_device_name() {
let devname = "vda";
let root_bus = create_pci_root_bus_path();
let relpath = "/0000:00:0a.0/0000:03:0b.0";
let devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath, devname);
let mut uev = crate::uevent::Uevent::default();
uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev.subsystem = "block".to_string();
uev.devpath = devpath.clone();
uev.devname = devname.to_string();
let logger = slog::Logger::root(slog::Discard, o!());
let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
let mut sb = sandbox.lock().await;
sb.uevent_map.insert(devpath.clone(), uev);
drop(sb); // unlock
let name = example_get_device_name(&sandbox, relpath).await;
assert!(name.is_ok(), "{}", name.unwrap_err());
assert_eq!(name.unwrap(), devname);
let mut sb = sandbox.lock().await;
let uev = sb.uevent_map.remove(&devpath).unwrap();
drop(sb); // unlock
spawn_test_watcher(sandbox.clone(), uev);
let name = example_get_device_name(&sandbox, relpath).await;
assert!(name.is_ok(), "{}", name.unwrap_err());
assert_eq!(name.unwrap(), devname);
}
#[tokio::test]
async fn test_virtio_blk_matcher() {
let root_bus = create_pci_root_bus_path();
let devname = "vda";
let mut uev_a = crate::uevent::Uevent::default();
let relpath_a = "/0000:00:0a.0";
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.subsystem = "block".to_string();
uev_a.devname = devname.to_string();
uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname);
let matcher_a = VirtioBlkPciMatcher::new(&relpath_a);
let mut uev_b = uev_a.clone();
let relpath_b = "/0000:00:0a.0/0000:00:0b.0";
uev_b.devpath = format!("{}{}/virtio0/block/{}", root_bus, relpath_b, devname);
let matcher_b = VirtioBlkPciMatcher::new(&relpath_b);
assert!(matcher_a.is_match(&uev_a));
assert!(matcher_b.is_match(&uev_b));
assert!(!matcher_b.is_match(&uev_a));
assert!(!matcher_a.is_match(&uev_b));
}
#[tokio::test]
async fn test_scsi_block_matcher() {
let root_bus = create_pci_root_bus_path();
let devname = "sda";
let mut uev_a = crate::uevent::Uevent::default();
let addr_a = "0:0";
uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
uev_a.subsystem = "block".to_string();
uev_a.devname = devname.to_string();
uev_a.devpath = format!(
"{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda",
root_bus, addr_a
);
let matcher_a = ScsiBlockMatcher::new(&addr_a);
let mut uev_b = uev_a.clone();
let addr_b = "2:0";
uev_b.devpath = format!(
"{}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{}/block/sdb",
root_bus, addr_b
);
let matcher_b = ScsiBlockMatcher::new(&addr_b);
assert!(matcher_a.is_match(&uev_a));
assert!(matcher_b.is_match(&uev_b));
assert!(!matcher_b.is_match(&uev_a));
assert!(!matcher_a.is_match(&uev_b));
}
}

View File

@@ -78,11 +78,6 @@ pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block
pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe";
pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory";
// Here in "0:0", the first number is the SCSI host number because
// only one SCSI controller has been plugged, while the second number
// is always 0.
pub const SCSI_HOST_CHANNEL: &str = "0:0:";
pub const SCSI_BLOCK_SUFFIX: &str = "block";
pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host";
pub const SYSFS_CGROUPPATH: &str = "/sys/fs/cgroup";

View File

@@ -5,8 +5,8 @@
#[macro_use]
extern crate lazy_static;
extern crate capctl;
extern crate oci;
extern crate prctl;
extern crate prometheus;
extern crate protocols;
extern crate regex;
@@ -20,27 +20,22 @@ extern crate scopeguard;
extern crate slog;
use anyhow::{anyhow, Context, Result};
use nix::fcntl::{self, OFlag};
use nix::fcntl::{FcntlArg, FdFlag};
use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
use nix::pty;
use nix::sys::select::{select, FdSet};
use nix::fcntl::OFlag;
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
use nix::sys::wait;
use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
use std::collections::HashMap;
use nix::unistd::{self, dup, Pid};
use std::env;
use std::ffi::{CStr, CString, OsStr};
use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::{Read, Write};
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs as unixfs;
use std::os::unix::io::AsRawFd;
use std::path::Path;
use std::process::exit;
use std::sync::Arc;
use unistd::Pid;
use tracing::{instrument, span};
mod config;
mod console;
mod device;
mod linux_abi;
mod metrics;
@@ -61,40 +56,32 @@ mod version;
use mount::{cgroups_mount, general_mount};
use sandbox::Sandbox;
use signal::setup_signal_handler;
use slog::Logger;
use slog::{error, info, o, warn, Logger};
use uevent::watch_uevents;
use std::sync::Mutex as SyncMutex;
use futures::future::join_all;
use futures::StreamExt as _;
use rustjail::pipestream::PipeStream;
use tokio::{
io::AsyncWrite,
sync::{
oneshot::Sender,
watch::{channel, Receiver},
Mutex, RwLock,
},
task::JoinHandle,
};
use tokio_vsock::{Incoming, VsockListener, VsockStream};
mod rpc;
mod tracer;
const NAME: &str = "kata-agent";
const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
const CONSOLE_PATH: &str = "/dev/console";
const DEFAULT_BUF_SIZE: usize = 8 * 1024;
lazy_static! {
static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Option<Sender<String>>>>> =
Arc::new(Mutex::new(HashMap::new()));
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
Arc::new(RwLock::new(config::AgentConfig::new()));
}
#[instrument]
fn announce(logger: &Logger, config: &AgentConfig) {
info!(logger, "announce";
"agent-commit" => version::VERSION_COMMIT,
@@ -108,27 +95,6 @@ fn announce(logger: &Logger, config: &AgentConfig) {
);
}
fn set_fd_close_exec(fd: RawFd) -> Result<RawFd> {
if let Err(e) = fcntl::fcntl(fd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)) {
return Err(anyhow!("failed to set fd: {} as close-on-exec: {}", fd, e));
}
Ok(fd)
}
fn get_vsock_incoming(fd: RawFd) -> Incoming {
let incoming;
unsafe {
incoming = VsockListener::from_raw_fd(fd).incoming();
}
incoming
}
async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
let stream = get_vsock_incoming(fd).next().await.unwrap().unwrap();
set_fd_close_exec(stream.as_raw_fd())?;
Ok(stream)
}
// Create a thread to handle reading from the logger pipe. The thread will
// output to the vsock port specified, or stdout.
async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool>) -> Result<()> {
@@ -147,7 +113,7 @@ async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool
socket::bind(listenfd, &addr).unwrap();
socket::listen(listenfd, 1).unwrap();
writer = Box::new(get_vsock_stream(listenfd).await.unwrap());
writer = Box::new(util::get_vsock_stream(listenfd).await.unwrap());
} else {
writer = Box::new(tokio::io::stdout());
}
@@ -163,7 +129,7 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
// List of tasks that need to be stopped for a clean shutdown
let mut tasks: Vec<JoinHandle<Result<()>>> = vec![];
lazy_static::initialize(&SHELLS);
console::initialize();
lazy_static::initialize(&AGENT_CONFIG);
@@ -236,6 +202,17 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
}
if config.tracing != tracer::TraceType::Disabled {
let _ = tracer::setup_tracing(NAME, &logger, &config)?;
}
let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
// XXX: Start the root trace transaction.
//
// XXX: Note that *ALL* spans needs to start after this point!!
let _enter = root.enter();
// Start the sandbox and wait for its ttRPC server to end
start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
@@ -264,6 +241,10 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
}
if config.tracing != tracer::TraceType::Disabled {
tracer::end_tracing();
}
eprintln!("{} shutdown complete", NAME);
Ok(())
@@ -285,6 +266,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
if args.len() == 2 && args[1] == "init" {
reset_sigpipe();
rustjail::container::init_child();
exit(0);
}
@@ -296,6 +278,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
rt.block_on(real_main())
}
#[instrument]
async fn start_sandbox(
logger: &Logger,
config: &AgentConfig,
@@ -303,26 +286,17 @@ async fn start_sandbox(
tasks: &mut Vec<JoinHandle<Result<()>>>,
shutdown: Receiver<bool>,
) -> Result<()> {
let shells = SHELLS.clone();
let debug_console_vport = config.debug_console_vport as u32;
let shell_handle = if config.debug_console {
let thread_logger = logger.clone();
let shells = shells.lock().unwrap().to_vec();
if config.debug_console {
let debug_console_task = tokio::task::spawn(console::debug_console_handler(
logger.clone(),
debug_console_vport,
shutdown.clone(),
));
let handle = tokio::task::spawn_blocking(move || {
let result = setup_debug_console(&thread_logger, shells, debug_console_vport);
if result.is_err() {
// Report error, but don't fail
warn!(thread_logger, "failed to setup debug console";
"error" => format!("{}", result.unwrap_err()));
}
});
Some(handle)
} else {
None
};
tasks.push(debug_console_task);
}
// Initialize unique sandbox structure.
let s = Sandbox::new(&logger).context("Failed to create sandbox")?;
@@ -351,13 +325,9 @@ async fn start_sandbox(
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
server.start().await?;
let _ = rx.await?;
rx.await?;
server.shutdown().await?;
if let Some(handle) = shell_handle {
handle.await.map_err(|e| anyhow!("{:?}", e))?;
}
Ok(())
}
@@ -395,6 +365,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
Ok(())
}
#[instrument]
fn sethostname(hostname: &OsStr) -> Result<()> {
let size = hostname.len() as usize;
@@ -408,284 +379,16 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
}
}
lazy_static! {
static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
let mut v = Vec::new();
if !cfg!(test) {
v.push("/bin/bash".to_string());
v.push("/bin/sh".to_string());
}
Arc::new(SyncMutex::new(v))
};
// The Rust standard library had suppressed the default SIGPIPE behavior,
// see https://github.com/rust-lang/rust/pull/13158.
// Since the parent's signal handler would be inherited by it's child process,
// thus we should re-enable the standard SIGPIPE behavior as a workaround to
// fix the issue of https://github.com/kata-containers/kata-containers/issues/1887.
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
use crate::config::AgentConfig;
use nix::sys::stat::Mode;
use std::os::unix::io::{FromRawFd, RawFd};
use std::path::PathBuf;
use std::process::exit;
fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
let shell = shells
.iter()
.find(|sh| PathBuf::from(sh).exists())
.ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
if port > 0 {
let listenfd = socket::socket(
AddressFamily::Vsock,
SockType::Stream,
SockFlag::SOCK_CLOEXEC,
None,
)?;
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
socket::bind(listenfd, &addr)?;
socket::listen(listenfd, 1)?;
loop {
let f: RawFd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
match run_debug_console_shell(logger, shell, f) {
Ok(_) => {
info!(logger, "run_debug_console_shell session finished");
}
Err(err) => {
error!(logger, "run_debug_console_shell failed: {:?}", err);
}
}
}
} else {
let mut flags = OFlag::empty();
flags.insert(OFlag::O_RDWR);
flags.insert(OFlag::O_CLOEXEC);
loop {
let f: RawFd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
match run_debug_console_shell(logger, shell, f) {
Ok(_) => {
info!(logger, "run_debug_console_shell session finished");
}
Err(err) => {
error!(logger, "run_debug_console_shell failed: {:?}", err);
}
}
}
};
}
fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> std::io::Result<u64>
where
R: Read,
W: Write,
{
let mut buf = [0; DEFAULT_BUF_SIZE];
let buf_len;
match reader.read(&mut buf) {
Ok(0) => return Ok(0),
Ok(len) => buf_len = len,
Err(err) => return Err(err),
};
// write and return
match writer.write_all(&buf[..buf_len]) {
Ok(_) => Ok(buf_len as u64),
Err(err) => Err(err),
}
}
fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Result<()> {
let pseduo = pty::openpty(None, None)?;
let _ = fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let _ = fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
let slave_fd = pseduo.slave;
match fork() {
Ok(ForkResult::Child) => {
// create new session with child as session leader
setsid()?;
// dup stdin, stdout, stderr to let child act as a terminal
dup2(slave_fd, STDIN_FILENO)?;
dup2(slave_fd, STDOUT_FILENO)?;
dup2(slave_fd, STDERR_FILENO)?;
// set tty
unsafe {
libc::ioctl(0, libc::TIOCSCTTY);
}
let cmd = CString::new(shell).unwrap();
let args: Vec<&CStr> = vec![];
// run shell
let _ = unistd::execvp(cmd.as_c_str(), args.as_slice()).map_err(|e| match e {
nix::Error::Sys(errno) => {
std::process::exit(errno as i32);
}
_ => std::process::exit(-2),
});
}
Ok(ForkResult::Parent { child: child_pid }) => {
info!(logger, "get debug shell pid {:?}", child_pid);
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let master_fd = pseduo.master;
let debug_shell_logger = logger.clone();
// channel that used to sync between thread and main process
let (tx, rx) = std::sync::mpsc::channel::<i32>();
// start a thread to do IO copy between socket and pseduo.master
std::thread::spawn(move || {
let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };
let mut socket_writer = unsafe { File::from_raw_fd(socket_fd) };
loop {
let mut fd_set = FdSet::new();
fd_set.insert(rfd);
fd_set.insert(master_fd);
fd_set.insert(socket_fd);
match select(
Some(fd_set.highest().unwrap() + 1),
&mut fd_set,
None,
None,
None,
) {
Ok(_) => (),
Err(e) => {
if e == nix::Error::from(nix::errno::Errno::EINTR) {
continue;
} else {
error!(debug_shell_logger, "select error {:?}", e);
tx.send(1).unwrap();
break;
}
}
}
if fd_set.contains(rfd) {
info!(
debug_shell_logger,
"debug shell process {} exited", child_pid
);
tx.send(1).unwrap();
break;
}
if fd_set.contains(master_fd) {
match io_copy(&mut master_reader, &mut socket_writer) {
Ok(0) => {
debug!(debug_shell_logger, "master fd closed");
tx.send(1).unwrap();
break;
}
Ok(_) => {}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => {
error!(debug_shell_logger, "read master fd error {:?}", e);
tx.send(1).unwrap();
break;
}
}
}
if fd_set.contains(socket_fd) {
match io_copy(&mut socket_reader, &mut master_writer) {
Ok(0) => {
debug!(debug_shell_logger, "socket fd closed");
tx.send(1).unwrap();
break;
}
Ok(_) => {}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => {
error!(debug_shell_logger, "read socket fd error {:?}", e);
tx.send(1).unwrap();
break;
}
}
}
}
});
let wait_status = wait::waitpid(child_pid, None);
info!(logger, "debug console process exit code: {:?}", wait_status);
info!(logger, "notify debug monitor thread to exit");
// close pipe to exit select loop
let _ = close(wfd);
// wait for thread exit.
let _ = rx.recv().unwrap();
info!(logger, "debug monitor thread has exited");
// close files
let _ = close(rfd);
let _ = close(master_fd);
let _ = close(slave_fd);
}
Err(err) => {
return Err(anyhow!("fork error: {:?}", err));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_setup_debug_console_no_shells() {
// Guarantee no shells have been added
// (required to avoid racing with
// test_setup_debug_console_invalid_shell()).
let shells_ref = SHELLS.clone();
let mut shells = shells_ref.lock().unwrap();
shells.clear();
let logger = slog_scope::logger();
let result = setup_debug_console(&logger, shells.to_vec(), 0);
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"no shell found to launch debug console"
);
}
#[test]
fn test_setup_debug_console_invalid_shell() {
let shells_ref = SHELLS.clone();
let mut shells = shells_ref.lock().unwrap();
let dir = tempdir().expect("failed to create tmpdir");
// Add an invalid shell
let shell = dir
.path()
.join("enoent")
.to_str()
.expect("failed to construct shell path")
.to_string();
shells.push(shell);
let logger = slog_scope::logger();
let result = setup_debug_console(&logger, shells.to_vec(), 0);
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"no shell found to launch debug console"
);
}
}

View File

@@ -8,6 +8,7 @@ extern crate procfs;
use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder};
use anyhow::Result;
use tracing::instrument;
const NAMESPACE_KATA_AGENT: &str = "kata_agent";
const NAMESPACE_KATA_GUEST: &str = "kata_guest";
@@ -68,6 +69,7 @@ lazy_static! {
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo").as_ref() , "Statistics about memory usage in the system.", &["item"]).unwrap();
}
#[instrument]
pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
AGENT_SCRAPE_COUNT.inc();
@@ -87,6 +89,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
Ok(String::from_utf8(buffer).unwrap())
}
#[instrument]
fn update_agent_metrics() {
let me = procfs::process::Process::myself();
@@ -136,6 +139,7 @@ fn update_agent_metrics() {
}
}
#[instrument]
fn update_guest_metrics() {
// try get load and task info
match procfs::LoadAverage::new() {
@@ -218,6 +222,7 @@ fn update_guest_metrics() {
}
}
#[instrument]
fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
gv.with_label_values(&["mem_total"])
.set(meminfo.mem_total as f64);
@@ -332,6 +337,7 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
.set(meminfo.k_reclaimable.unwrap_or(0) as f64);
}
#[instrument]
fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
gv.with_label_values(&[cpu, "user"])
.set(cpu_time.user as f64);
@@ -355,6 +361,7 @@ fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procf
.set(cpu_time.guest_nice.unwrap_or(0.0) as f64);
}
#[instrument]
fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat) {
gv.with_label_values(&[diskstat.name.as_str(), "reads"])
.set(diskstat.reads as f64);
@@ -393,6 +400,7 @@ fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat
}
// set_gauge_vec_netdev set gauge for NetDevLine
#[instrument]
fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceStatus) {
gv.with_label_values(&[status.name.as_str(), "recv_bytes"])
.set(status.recv_bytes as f64);
@@ -429,6 +437,7 @@ fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceS
}
// set_gauge_vec_proc_status set gauge for ProcStatus
#[instrument]
fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process::Status) {
gv.with_label_values(&["vmpeak"])
.set(status.vmpeak.unwrap_or(0) as f64);
@@ -469,6 +478,7 @@ fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process
}
// set_gauge_vec_proc_io set gauge for ProcIO
#[instrument]
fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::Io) {
gv.with_label_values(&["rchar"]).set(io_stat.rchar as f64);
gv.with_label_values(&["wchar"]).set(io_stat.wchar as f64);
@@ -483,6 +493,7 @@ fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::I
}
// set_gauge_vec_proc_stat set gauge for ProcStat
#[instrument]
fn set_gauge_vec_proc_stat(gv: &prometheus::GaugeVec, stat: &procfs::process::Stat) {
gv.with_label_values(&["utime"]).set(stat.utime as f64);
gv.with_label_values(&["stime"]).set(stat.stime as f64);

View File

@@ -7,7 +7,7 @@ use std::collections::HashMap;
use std::ffi::CString;
use std::fs;
use std::io;
use std::os::unix::fs::PermissionsExt;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::Path;
use std::ptr::null;
@@ -17,13 +17,14 @@ use tokio::sync::Mutex;
use libc::{c_void, mount};
use nix::mount::{self, MsFlags};
use nix::unistd::Gid;
use regex::Regex;
use std::fs::File;
use std::io::{BufRead, BufReader};
use crate::device::{
get_pci_device_name, get_pmem_device_name, get_scsi_device_name, online_device,
get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
};
use crate::linux_abi::*;
use crate::pci;
@@ -31,6 +32,7 @@ use crate::protocols::agent::Storage;
use crate::Sandbox;
use anyhow::{anyhow, Context, Result};
use slog::Logger;
use tracing::instrument;
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
@@ -45,6 +47,9 @@ pub const TYPE_ROOTFS: &str = "rootfs";
pub const MOUNT_GUEST_TAG: &str = "kataShared";
// Allocating an FSGroup that owns the pod's volumes
const FS_GID: &str = "fsgid";
#[rustfmt::skip]
lazy_static! {
pub static ref FLAGS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -152,6 +157,7 @@ pub struct BareMount<'a> {
// * evaluate all symlinks
// * ensure the source exists
impl<'a> BareMount<'a> {
#[instrument]
pub fn new(
s: &'a str,
d: &'a str,
@@ -170,6 +176,7 @@ impl<'a> BareMount<'a> {
}
}
#[instrument]
pub fn mount(&self) -> Result<()> {
let source;
let dest;
@@ -228,6 +235,7 @@ impl<'a> BareMount<'a> {
}
}
#[instrument]
async fn ephemeral_storage_handler(
logger: &Logger,
storage: &Storage,
@@ -241,11 +249,40 @@ async fn ephemeral_storage_handler(
}
fs::create_dir_all(Path::new(&storage.mount_point))?;
common_storage_handler(logger, storage)?;
// By now we only support one option field: "fsGroup" which
// isn't an valid mount option, thus we should remove it when
// do mount.
if storage.options.len() > 0 {
// ephemeral_storage didn't support mount options except fsGroup.
let mut new_storage = storage.clone();
new_storage.options = protobuf::RepeatedField::default();
common_storage_handler(logger, &new_storage)?;
let opts_vec: Vec<String> = storage.options.to_vec();
let opts = parse_options(opts_vec);
if let Some(fsgid) = opts.get(FS_GID) {
let gid = fsgid.parse::<u32>()?;
nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
let meta = fs::metadata(&storage.mount_point)?;
let mut permission = meta.permissions();
let o_mode = meta.mode() | 0o2000;
permission.set_mode(o_mode);
fs::set_permissions(&storage.mount_point, permission)?;
}
} else {
common_storage_handler(logger, &storage)?;
}
Ok("".to_string())
}
#[instrument]
async fn local_storage_handler(
_logger: &Logger,
storage: &Storage,
@@ -266,11 +303,24 @@ async fn local_storage_handler(
let opts_vec: Vec<String> = storage.options.to_vec();
let opts = parse_options(opts_vec);
let mode = opts.get("mode");
if let Some(mode) = mode {
let mut need_set_fsgid = false;
if let Some(fsgid) = opts.get(FS_GID) {
let gid = fsgid.parse::<u32>()?;
nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
need_set_fsgid = true;
}
if let Some(mode) = opts.get("mode") {
let mut permission = fs::metadata(&storage.mount_point)?.permissions();
let o_mode = u32::from_str_radix(mode, 8)?;
let mut o_mode = u32::from_str_radix(mode, 8)?;
if need_set_fsgid {
// set SetGid mode mask.
o_mode |= 0o2000;
}
permission.set_mode(o_mode);
fs::set_permissions(&storage.mount_point, permission)?;
@@ -279,6 +329,7 @@ async fn local_storage_handler(
Ok("".to_string())
}
#[instrument]
async fn virtio9p_storage_handler(
logger: &Logger,
storage: &Storage,
@@ -288,6 +339,7 @@ async fn virtio9p_storage_handler(
}
// virtiommio_blk_storage_handler handles the storage for mmio blk driver.
#[instrument]
async fn virtiommio_blk_storage_handler(
logger: &Logger,
storage: &Storage,
@@ -298,6 +350,7 @@ async fn virtiommio_blk_storage_handler(
}
// virtiofs_storage_handler handles the storage for virtio-fs.
#[instrument]
async fn virtiofs_storage_handler(
logger: &Logger,
storage: &Storage,
@@ -307,6 +360,7 @@ async fn virtiofs_storage_handler(
}
// virtio_blk_storage_handler handles the storage for blk driver.
#[instrument]
async fn virtio_blk_storage_handler(
logger: &Logger,
storage: &Storage,
@@ -325,14 +379,15 @@ async fn virtio_blk_storage_handler(
}
} else {
let pcipath = pci::Path::from_str(&storage.source)?;
let dev_path = get_pci_device_name(&sandbox, &pcipath).await?;
let dev_path = get_virtio_blk_pci_device_name(&sandbox, &pcipath).await?;
storage.source = dev_path;
}
common_storage_handler(logger, &storage)
}
// virtio_scsi_storage_handler handles the storage for scsi driver.
// virtio_scsi_storage_handler handles the storage for scsi driver.
#[instrument]
async fn virtio_scsi_storage_handler(
logger: &Logger,
storage: &Storage,
@@ -347,6 +402,7 @@ async fn virtio_scsi_storage_handler(
common_storage_handler(logger, &storage)
}
#[instrument]
fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String> {
// Mount the storage device.
let mount_point = storage.mount_point.to_string();
@@ -355,32 +411,22 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
}
// nvdimm_storage_handler handles the storage for NVDIMM driver.
#[instrument]
async fn nvdimm_storage_handler(
logger: &Logger,
storage: &Storage,
sandbox: Arc<Mutex<Sandbox>>,
) -> Result<String> {
let mut storage = storage.clone();
// If hot-plugged, get the device node path based on the PCI address else
// use the virt path provided in Storage Source
let pmem_devname = match storage.source.strip_prefix("/dev/") {
Some(dev) => dev,
None => {
return Err(anyhow!(
"Storage source '{}' must start with /dev/",
storage.source
))
}
};
let storage = storage.clone();
// Retrieve the device path from NVDIMM address.
let dev_path = get_pmem_device_name(&sandbox, pmem_devname).await?;
storage.source = dev_path;
wait_for_pmem_device(&sandbox, &storage.source).await?;
common_storage_handler(logger, &storage)
}
// mount_storage performs the mount described by the storage structure.
#[instrument]
fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount"));
@@ -388,7 +434,10 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
// If so, skip doing the mount. This facilitates mounting the sharedfs automatically
// in the guest before the agent service starts.
if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? {
warn!(logger, "kataShared already mounted, ignoring...");
warn!(
logger,
"{} already mounted on {}, ignoring...", MOUNT_GUEST_TAG, &storage.mount_point
);
return Ok(());
}
@@ -428,6 +477,7 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
}
/// Looks for `mount_point` entry in the /proc/mounts.
#[instrument]
fn is_mounted(mount_point: &str) -> Result<bool> {
let mount_point = mount_point.trim_end_matches('/');
let found = fs::metadata(mount_point).is_ok()
@@ -445,6 +495,7 @@ fn is_mounted(mount_point: &str) -> Result<bool> {
Ok(found)
}
#[instrument]
fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
let mut flags = MsFlags::empty();
let mut options: String = "".to_string();
@@ -473,6 +524,7 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
// associated operations such as waiting for the device to show up, and mount
// it to a specific location, according to the type of handler chosen, and for
// each storage.
#[instrument]
pub async fn add_storages(
logger: Logger,
storages: Vec<Storage>,
@@ -522,6 +574,7 @@ pub async fn add_storages(
Ok(mount_list)
}
#[instrument]
fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
let options_vec: Vec<&str> = m.options.clone();
@@ -547,6 +600,7 @@ fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
Ok(())
}
#[instrument]
pub fn general_mount(logger: &Logger) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount"));
@@ -564,6 +618,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {
// get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
// any error ecountered.
#[instrument]
pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
if mount_point.is_empty() {
return Err(anyhow!("Invalid mount point {}", mount_point));
@@ -594,6 +649,7 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
))
}
#[instrument]
pub fn get_cgroup_mounts(
logger: &Logger,
cg_path: &str,
@@ -684,6 +740,7 @@ pub fn get_cgroup_mounts(
Ok(cg_mounts)
}
#[instrument]
pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
let logger = logger.new(o!("subsystem" => "mount"));
@@ -699,6 +756,7 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
Ok(())
}
#[instrument]
pub fn remove_mounts(mounts: &[String]) -> Result<()> {
for m in mounts.iter() {
mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
@@ -708,6 +766,7 @@ pub fn remove_mounts(mounts: &[String]) -> Result<()> {
// ensure_destination_exists will recursively create a given mountpoint. If directories
// are created, their permissions are initialized to mountPerm(0755)
#[instrument]
fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
let d = Path::new(destination);
if !d.exists() {
@@ -728,6 +787,7 @@ fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
Ok(())
}
#[instrument]
fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
let mut options = HashMap::new();
for opt in option_list.iter() {
@@ -900,7 +960,7 @@ mod tests {
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), msg);
assert!(result.is_ok(), "{}", msg);
// Cleanup
unsafe {
@@ -912,7 +972,7 @@ mod tests {
let msg = format!("{}: umount result: {:?}", msg, result);
assert!(ret == 0, msg);
assert!(ret == 0, "{}", msg);
};
continue;
@@ -920,7 +980,7 @@ mod tests {
let err = result.unwrap_err();
let error_msg = format!("{}", err);
assert!(error_msg.contains(d.error_contains), msg);
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
@@ -1026,13 +1086,13 @@ mod tests {
let msg = format!("{}: result: {:?}", msg, result);
if d.error_contains.is_empty() {
assert!(result.is_ok(), msg);
assert!(result.is_ok(), "{}", msg);
continue;
}
let error_msg = format!("{:#}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), msg);
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
@@ -1108,6 +1168,7 @@ mod tests {
assert!(
format!("{}", err).contains("No such file or directory"),
"{}",
msg
);
}
@@ -1136,13 +1197,13 @@ mod tests {
if d.error_contains.is_empty() {
let fs_type = result.unwrap();
assert!(d.fs_type == fs_type, msg);
assert!(d.fs_type == fs_type, "{}", msg);
continue;
}
let error_msg = format!("{}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), msg);
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}
@@ -1291,34 +1352,34 @@ mod tests {
let msg = format!("{}: result: {:?}", msg, result);
if !d.error_contains.is_empty() {
assert!(result.is_err(), msg);
assert!(result.is_err(), "{}", msg);
let error_msg = format!("{}", result.unwrap_err());
assert!(error_msg.contains(d.error_contains), msg);
assert!(error_msg.contains(d.error_contains), "{}", msg);
continue;
}
assert!(result.is_ok(), msg);
assert!(result.is_ok(), "{}", msg);
let mounts = result.unwrap();
let count = mounts.len();
if !d.devices_cgroup {
assert!(count == 0, msg);
assert!(count == 0, "{}", msg);
continue;
}
// get_cgroup_mounts() adds the device cgroup plus two other mounts.
assert!(count == (1 + 2), msg);
assert!(count == (1 + 2), "{}", msg);
// First mount
assert!(mounts[0].eq(&first_mount), msg);
assert!(mounts[0].eq(&first_mount), "{}", msg);
// Last mount
assert!(mounts[2].eq(&last_mount), msg);
assert!(mounts[2].eq(&last_mount), "{}", msg);
// Devices cgroup
assert!(mounts[1].eq(&cg_devices_mount), msg);
assert!(mounts[1].eq(&cg_devices_mount), "{}", msg);
}
}
}

View File

@@ -11,6 +11,7 @@ use std::fmt;
use std::fs;
use std::fs::File;
use std::path::{Path, PathBuf};
use tracing::instrument;
use crate::mount::{BareMount, FLAGS};
use slog::Logger;
@@ -20,6 +21,7 @@ pub const NSTYPEIPC: &str = "ipc";
pub const NSTYPEUTS: &str = "uts";
pub const NSTYPEPID: &str = "pid";
#[instrument]
pub fn get_current_thread_ns_path(ns_type: &str) -> String {
format!(
"/proc/{}/task/{}/ns/{}",
@@ -40,31 +42,35 @@ pub struct Namespace {
}
impl Namespace {
#[instrument]
pub fn new(logger: &Logger) -> Self {
Namespace {
logger: logger.clone(),
path: String::from(""),
persistent_ns_dir: String::from(PERSISTENT_NS_DIR),
ns_type: NamespaceType::IPC,
ns_type: NamespaceType::Ipc,
hostname: None,
}
}
#[instrument]
pub fn get_ipc(mut self) -> Self {
self.ns_type = NamespaceType::IPC;
self.ns_type = NamespaceType::Ipc;
self
}
#[instrument]
pub fn get_uts(mut self, hostname: &str) -> Self {
self.ns_type = NamespaceType::UTS;
self.ns_type = NamespaceType::Uts;
if !hostname.is_empty() {
self.hostname = Some(String::from(hostname));
}
self
}
#[instrument]
pub fn get_pid(mut self) -> Self {
self.ns_type = NamespaceType::PID;
self.ns_type = NamespaceType::Pid;
self
}
@@ -76,12 +82,13 @@ impl Namespace {
// setup creates persistent namespace without switching to it.
// Note, pid namespaces cannot be persisted.
#[instrument]
pub async fn setup(mut self) -> Result<Self> {
fs::create_dir_all(&self.persistent_ns_dir)?;
let ns_path = PathBuf::from(&self.persistent_ns_dir);
let ns_type = self.ns_type;
if ns_type == NamespaceType::PID {
if ns_type == NamespaceType::Pid {
return Err(anyhow!("Cannot persist namespace of PID type"));
}
let logger = self.logger.clone();
@@ -104,7 +111,7 @@ impl Namespace {
unshare(cf)?;
if ns_type == NamespaceType::UTS && hostname.is_some() {
if ns_type == NamespaceType::Uts && hostname.is_some() {
nix::unistd::sethostname(hostname.unwrap())?;
}
// Bind mount the new namespace from the current thread onto the mount point to persist it.
@@ -147,27 +154,27 @@ impl Namespace {
/// Represents the Namespace type.
#[derive(Clone, Copy, PartialEq)]
enum NamespaceType {
IPC,
UTS,
PID,
Ipc,
Uts,
Pid,
}
impl NamespaceType {
/// Get the string representation of the namespace type.
pub fn get(&self) -> &str {
match *self {
Self::IPC => "ipc",
Self::UTS => "uts",
Self::PID => "pid",
Self::Ipc => "ipc",
Self::Uts => "uts",
Self::Pid => "pid",
}
}
/// Get the associate flags with the namespace type.
pub fn get_flags(&self) -> CloneFlags {
match *self {
Self::IPC => CloneFlags::CLONE_NEWIPC,
Self::UTS => CloneFlags::CLONE_NEWUTS,
Self::PID => CloneFlags::CLONE_NEWPID,
Self::Ipc => CloneFlags::CLONE_NEWIPC,
Self::Uts => CloneFlags::CLONE_NEWUTS,
Self::Pid => CloneFlags::CLONE_NEWPID,
}
}
}
@@ -178,12 +185,6 @@ impl fmt::Debug for NamespaceType {
}
}
impl Default for NamespaceType {
fn default() -> Self {
NamespaceType::IPC
}
}
#[cfg(test)]
mod tests {
use super::{Namespace, NamespaceType};
@@ -234,15 +235,15 @@ mod tests {
#[test]
fn test_namespace_type() {
let ipc = NamespaceType::IPC;
let ipc = NamespaceType::Ipc;
assert_eq!("ipc", ipc.get());
assert_eq!(CloneFlags::CLONE_NEWIPC, ipc.get_flags());
let uts = NamespaceType::UTS;
let uts = NamespaceType::Uts;
assert_eq!("uts", uts.get());
assert_eq!(CloneFlags::CLONE_NEWUTS, uts.get_flags());
let pid = NamespaceType::PID;
let pid = NamespaceType::Pid;
assert_eq!("pid", pid.get());
assert_eq!(CloneFlags::CLONE_NEWPID, pid.get_flags());
}

View File

@@ -542,12 +542,10 @@ impl Handle {
ntype: NDA_UNSPEC as u8,
},
nlas: {
let mut nlas = vec![];
nlas.push(Nla::Destination(match ip {
let mut nlas = vec![Nla::Destination(match ip {
IpAddr::V4(v4) => v4.octets().to_vec(),
IpAddr::V6(v6) => v6.octets().to_vec(),
}));
})];
if !neigh.lladdr.is_empty() {
nlas.push(Nla::LinkLocalAddress(

View File

@@ -9,6 +9,7 @@ use nix::fcntl::{self, OFlag};
use nix::sys::stat::Mode;
use std::fs;
use std::os::unix::io::{AsRawFd, FromRawFd};
use tracing::instrument;
pub const RNGDEV: &str = "/dev/random";
pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
@@ -20,6 +21,7 @@ type IoctlRequestType = libc::c_int;
#[cfg(target_env = "gnu")]
type IoctlRequestType = libc::c_ulong;
#[instrument]
pub fn reseed_rng(data: &[u8]) -> Result<()> {
let len = data.len() as libc::c_long;
fs::write(RNGDEV, data)?;
@@ -37,10 +39,10 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
&len as *const libc::c_long,
)
};
let _ = Errno::result(ret).map(drop)?;
Errno::result(ret).map(drop)?;
let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
let _ = Errno::result(ret).map(drop)?;
Errno::result(ret).map(drop)?;
Ok(())
}

View File

@@ -20,9 +20,8 @@ use anyhow::{anyhow, Context, Result};
use oci::{LinuxNamespace, Root, Spec};
use protobuf::{RepeatedField, SingularPtrField};
use protocols::agent::{
AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, ListProcessesResponse,
Metrics, OOMEvent, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse,
WriteStreamResponse,
AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, Metrics, OOMEvent,
ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse, WriteStreamResponse,
};
use protocols::empty::Empty;
use protocols::health::{
@@ -104,7 +103,7 @@ impl AgentService {
) -> Result<()> {
let cid = req.container_id.clone();
let _ = verify_cid(&cid)?;
verify_cid(&cid)?;
let mut oci_spec = req.OCI.clone();
let use_sandbox_pidns = req.get_sandbox_pidns();
@@ -370,7 +369,6 @@ impl AgentService {
let s = self.sandbox.clone();
let mut resp = WaitProcessResponse::new();
let pid: pid_t;
let stream;
let (exit_send, mut exit_recv) = tokio::sync::mpsc::channel(100);
@@ -381,22 +379,20 @@ impl AgentService {
"exec-id" => eid.clone()
);
{
let exit_rx = {
let mut sandbox = s.lock().await;
let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false)?;
stream = p.get_reader(StreamType::ExitPipeR);
p.exit_watchers.push(exit_send);
pid = p.pid;
}
if stream.is_some() {
info!(sl!(), "reading exit pipe");
p.exit_rx.clone()
};
let reader = stream.unwrap();
let mut content: Vec<u8> = vec![0, 1];
let _ = reader.lock().await.read(&mut content).await;
if let Some(mut exit_rx) = exit_rx {
info!(sl!(), "cid {} eid {} waiting for exit signal", &cid, &eid);
while exit_rx.changed().await.is_ok() {}
info!(sl!(), "cid {} eid {} received exit signal", &cid, &eid);
}
let mut sandbox = s.lock().await;
@@ -576,91 +572,6 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
}
async fn list_processes(
&self,
_ctx: &TtrpcContext,
req: protocols::agent::ListProcessesRequest,
) -> ttrpc::Result<ListProcessesResponse> {
let cid = req.container_id.clone();
let format = req.format.clone();
let mut args = req.args.into_vec();
let mut resp = ListProcessesResponse::new();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
let ctr = sandbox.get_container(&cid).ok_or_else(|| {
ttrpc_error(
ttrpc::Code::INVALID_ARGUMENT,
"invalid container id".to_string(),
)
})?;
let pids = ctr.processes().unwrap();
match format.as_str() {
"table" => {}
"json" => {
resp.process_list = serde_json::to_vec(&pids).unwrap();
return Ok(resp);
}
_ => {
return Err(ttrpc_error(
ttrpc::Code::INVALID_ARGUMENT,
"invalid format!".to_string(),
));
}
}
// format "table"
if args.is_empty() {
// default argument
args = vec!["-ef".to_string()];
}
let output = tokio::process::Command::new("ps")
.args(args.as_slice())
.stdout(Stdio::piped())
.output()
.await
.expect("ps failed");
let out: String = String::from_utf8(output.stdout).unwrap();
let mut lines: Vec<String> = out.split('\n').map(|v| v.to_string()).collect();
let pid_index = lines[0]
.split_whitespace()
.position(|v| v == "PID")
.unwrap();
let mut result = String::new();
result.push_str(lines[0].as_str());
lines.remove(0);
for line in &lines {
if line.trim().is_empty() {
continue;
}
let fields: Vec<String> = line.split_whitespace().map(|v| v.to_string()).collect();
if fields.len() < pid_index + 1 {
warn!(sl!(), "corrupted output?");
continue;
}
let pid = fields[pid_index].trim().parse::<i32>().unwrap();
for p in &pids {
if pid == *p {
result.push_str(line.as_str());
}
}
}
resp.process_list = Vec::from(result);
Ok(resp)
}
async fn update_container(
&self,
_ctx: &TtrpcContext,
@@ -1015,7 +926,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
}
for m in req.kernel_modules.iter() {
let _ = load_kernel_module(m)
load_kernel_module(m)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
}
@@ -1618,27 +1529,22 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
fn cleanup_process(p: &mut Process) -> Result<()> {
if p.parent_stdin.is_some() {
p.close_stream(StreamType::ParentStdin);
let _ = unistd::close(p.parent_stdin.unwrap())?;
unistd::close(p.parent_stdin.unwrap())?;
}
if p.parent_stdout.is_some() {
p.close_stream(StreamType::ParentStdout);
let _ = unistd::close(p.parent_stdout.unwrap())?;
unistd::close(p.parent_stdout.unwrap())?;
}
if p.parent_stderr.is_some() {
p.close_stream(StreamType::ParentStderr);
let _ = unistd::close(p.parent_stderr.unwrap())?;
unistd::close(p.parent_stderr.unwrap())?;
}
if p.term_master.is_some() {
p.close_stream(StreamType::TermMaster);
let _ = unistd::close(p.term_master.unwrap())?;
}
if p.exit_pipe_r.is_some() {
p.close_stream(StreamType::ExitPipeR);
let _ = unistd::close(p.exit_pipe_r.unwrap())?;
unistd::close(p.term_master.unwrap())?;
}
p.notify_term_close();
@@ -2018,9 +1924,9 @@ mod tests {
let msg = format!("{}, result: {:?}", msg, result);
if result.is_ok() {
assert!(!d.expect_error, msg);
assert!(!d.expect_error, "{}", msg);
} else {
assert!(d.expect_error, msg);
assert!(d.expect_error, "{}", msg);
}
}
}

View File

@@ -8,6 +8,7 @@ use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
use crate::namespace::Namespace;
use crate::netlink::Handle;
use crate::network::Network;
use crate::uevent::{Uevent, UeventMatcher};
use anyhow::{anyhow, Context, Result};
use libc::pid_t;
use oci::{Hook, Hooks};
@@ -25,7 +26,11 @@ use std::path::Path;
use std::sync::Arc;
use std::{thread, time};
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::sync::oneshot;
use tokio::sync::Mutex;
use tracing::instrument;
type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);
#[derive(Debug)]
pub struct Sandbox {
@@ -36,7 +41,8 @@ pub struct Sandbox {
pub network: Network,
pub mounts: Vec<String>,
pub container_mounts: HashMap<String, Vec<String>>,
pub pci_device_map: HashMap<String, String>,
pub uevent_map: HashMap<String, Uevent>,
pub uevent_watchers: Vec<Option<UeventWatcher>>,
pub shared_utsns: Namespace,
pub shared_ipcns: Namespace,
pub sandbox_pidns: Option<Namespace>,
@@ -51,6 +57,7 @@ pub struct Sandbox {
}
impl Sandbox {
#[instrument]
pub fn new(logger: &Logger) -> Result<Self> {
let fs_type = get_mount_fs_type("/")?;
let logger = logger.new(o!("subsystem" => "sandbox"));
@@ -65,7 +72,8 @@ impl Sandbox {
containers: HashMap::new(),
mounts: Vec::new(),
container_mounts: HashMap::new(),
pci_device_map: HashMap::new(),
uevent_map: HashMap::new(),
uevent_watchers: Vec::new(),
shared_utsns: Namespace::new(&logger),
shared_ipcns: Namespace::new(&logger),
sandbox_pidns: None,
@@ -88,6 +96,7 @@ impl Sandbox {
//
// It's assumed that caller is calling this method after
// acquiring a lock on sandbox.
#[instrument]
pub fn set_sandbox_storage(&mut self, path: &str) -> bool {
match self.storages.get_mut(path) {
None => {
@@ -110,6 +119,7 @@ impl Sandbox {
//
// It's assumed that caller is calling this method after
// acquiring a lock on sandbox.
#[instrument]
pub fn unset_sandbox_storage(&mut self, path: &str) -> Result<bool> {
match self.storages.get_mut(path) {
None => Err(anyhow!("Sandbox storage with path {} not found", path)),
@@ -129,6 +139,7 @@ impl Sandbox {
//
// It's assumed that caller is calling this method after
// acquiring a lock on sandbox.
#[instrument]
pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> {
let mounts = vec![path.to_string()];
remove_mounts(&mounts)?;
@@ -142,6 +153,7 @@ impl Sandbox {
//
// It's assumed that caller is calling this method after
// acquiring a lock on sandbox.
#[instrument]
pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
if self.unset_sandbox_storage(path)? {
return self.remove_sandbox_storage(path);
@@ -150,6 +162,7 @@ impl Sandbox {
Ok(())
}
#[instrument]
pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
// Set up shared IPC namespace
self.shared_ipcns = Namespace::new(&self.logger)
@@ -168,10 +181,12 @@ impl Sandbox {
Ok(true)
}
#[instrument]
pub fn add_container(&mut self, c: LinuxContainer) {
self.containers.insert(c.id.clone(), c);
}
#[instrument]
pub fn update_shared_pidns(&mut self, c: &LinuxContainer) -> Result<()> {
// Populate the shared pid path only if this is an infra container and
// sandbox_pidns has not been passed in the create_sandbox request.
@@ -195,10 +210,12 @@ impl Sandbox {
Ok(())
}
#[instrument]
pub fn get_container(&mut self, id: &str) -> Option<&mut LinuxContainer> {
self.containers.get_mut(id)
}
#[instrument]
pub fn find_process(&mut self, pid: pid_t) -> Option<&mut Process> {
for (_, c) in self.containers.iter_mut() {
if c.processes.get(&pid).is_some() {
@@ -209,6 +226,7 @@ impl Sandbox {
None
}
#[instrument]
pub async fn destroy(&mut self) -> Result<()> {
for ctr in self.containers.values_mut() {
ctr.destroy().await?;
@@ -216,6 +234,7 @@ impl Sandbox {
Ok(())
}
#[instrument]
pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> {
if req.nb_cpus > 0 {
// online cpus
@@ -259,6 +278,7 @@ impl Sandbox {
Ok(())
}
#[instrument]
pub fn add_hooks(&mut self, dir: &str) -> Result<()> {
let mut hooks = Hooks::default();
if let Ok(hook) = self.find_hooks(dir, "prestart") {
@@ -274,6 +294,7 @@ impl Sandbox {
Ok(())
}
#[instrument]
fn find_hooks(&self, hook_path: &str, hook_type: &str) -> Result<Vec<Hook>> {
let mut hooks = Vec::new();
for entry in fs::read_dir(Path::new(hook_path).join(hook_type))? {
@@ -310,6 +331,7 @@ impl Sandbox {
Ok(hooks)
}
#[instrument]
pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
let logger = self.logger.clone();
@@ -342,6 +364,7 @@ impl Sandbox {
}
}
#[instrument]
fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Result<i32> {
let mut count = 0;
let re = Regex::new(pattern)?;
@@ -387,6 +410,7 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res
const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50;
const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100;
#[instrument]
fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
let mut onlined_count: i32 = 0;
@@ -416,6 +440,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
))
}
#[instrument]
fn online_memory(logger: &Logger) -> Result<()> {
online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?;
Ok(())

View File

@@ -6,10 +6,10 @@
use crate::sandbox::Sandbox;
use anyhow::{anyhow, Result};
use capctl::prctl::set_subreaper;
use nix::sys::wait::WaitPidFlag;
use nix::sys::wait::{self, WaitStatus};
use nix::unistd;
use prctl::set_child_subreaper;
use slog::{error, info, o, Logger};
use std::sync::Arc;
use tokio::select;
@@ -22,6 +22,9 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
info!(logger, "handling signal"; "signal" => "SIGCHLD");
loop {
// Avoid reaping the undesirable child's signal, e.g., execute_hook's
// The lock should be released immediately.
rustjail::container::WAIT_PID_LOCKER.lock().await;
let result = wait::waitpid(
Some(Pid::from_raw(-1)),
Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
@@ -55,13 +58,6 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
}
let mut p = process.unwrap();
if p.exit_pipe_w.is_none() {
info!(logger, "process exit pipe not set");
continue;
}
let pipe_write = p.exit_pipe_w.unwrap();
let ret: i32;
match wait_status {
@@ -75,7 +71,7 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
}
p.exit_code = ret;
let _ = unistd::close(pipe_write);
let _ = p.exit_tx.take();
info!(logger, "notify term to close");
// close the socket file to notify readStdio to close terminal specifically
@@ -92,7 +88,7 @@ pub async fn setup_signal_handler(
) -> Result<()> {
let logger = logger.new(o!("subsystem" => "signals"));
set_child_subreaper(true)
set_subreaper(true)
.map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
let mut sigchild_stream = signal(SignalKind::child())?;

91
src/agent/src/tracer.rs Normal file
View File

@@ -0,0 +1,91 @@
// Copyright (c) 2020-2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::config::AgentConfig;
use anyhow::Result;
use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
use slog::{info, o, Logger};
use std::error::Error;
use std::fmt;
use std::str::FromStr;
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Registry;
#[derive(Debug, PartialEq)]
pub enum TraceType {
Disabled,
Isolated,
}
#[derive(Debug)]
pub struct TraceTypeError {
details: String,
}
impl TraceTypeError {
fn new(msg: &str) -> TraceTypeError {
TraceTypeError {
details: msg.into(),
}
}
}
impl Error for TraceTypeError {}
impl fmt::Display for TraceTypeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
impl FromStr for TraceType {
type Err = TraceTypeError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"isolated" => Ok(TraceType::Isolated),
"disabled" => Ok(TraceType::Disabled),
_ => Err(TraceTypeError::new("invalid trace type")),
}
}
}
pub fn setup_tracing(name: &'static str, logger: &Logger, _agent_cfg: &AgentConfig) -> Result<()> {
let logger = logger.new(o!("subsystem" => "vsock-tracer"));
let exporter = vsock_exporter::Exporter::builder()
.with_logger(&logger)
.init();
let config = Config::default();
let builder = opentelemetry::sdk::trace::TracerProvider::builder()
.with_simple_exporter(exporter)
.with_config(config);
let provider = builder.build();
// We don't need a versioned tracer.
let version = None;
let tracer = provider.get_tracer(name, version);
let _global_provider = global::set_tracer_provider(provider);
let layer = OpenTelemetryLayer::new(tracer);
let subscriber = Registry::default().with(layer);
tracing::subscriber::set_global_default(subscriber)?;
info!(logger, "tracing setup");
Ok(())
}
pub fn end_tracing() {
global::shutdown_tracer_provider();
}

View File

@@ -6,26 +6,39 @@
use crate::device::online_device;
use crate::linux_abi::*;
use crate::sandbox::Sandbox;
use crate::GLOBAL_DEVICE_WATCHER;
use crate::AGENT_CONFIG;
use slog::Logger;
use anyhow::Result;
use anyhow::{anyhow, Result};
use netlink_sys::{protocols, SocketAddr, TokioSocket};
use nix::errno::Errno;
use std::fmt::Debug;
use std::os::unix::io::FromRawFd;
use std::sync::Arc;
use tokio::select;
use tokio::sync::watch::Receiver;
use tokio::sync::Mutex;
use tracing::instrument;
#[derive(Debug, Default)]
struct Uevent {
action: String,
devpath: String,
devname: String,
subsystem: String,
// Convenience macro to obtain the scope logger
macro_rules! sl {
() => {
slog_scope::logger().new(o!("subsystem" => "uevent"))
};
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct Uevent {
pub action: String,
pub devpath: String,
pub devname: String,
pub subsystem: String,
seqnum: String,
interface: String,
pub interface: String,
}
pub trait UeventMatcher: Sync + Send + Debug + 'static {
fn is_match(&self, uev: &Uevent) -> bool;
}
impl Uevent {
@@ -52,89 +65,91 @@ impl Uevent {
event
}
// Check whether this is a block device hot-add event.
fn is_block_add_event(&self) -> bool {
let pci_root_bus_path = create_pci_root_bus_path();
self.action == U_EVENT_ACTION_ADD
&& self.subsystem == "block"
&& {
self.devpath.starts_with(pci_root_bus_path.as_str())
|| self.devpath.starts_with(ACPI_DEV_PATH) // NVDIMM/PMEM devices
}
&& !self.devname.is_empty()
}
#[instrument]
async fn process_add(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
// Special case for memory hot-adds first
let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
let _ = online_device(online_path.as_ref()).map_err(|e| {
error!(
*logger,
"failed to online device";
"device" => &self.devpath,
"error" => format!("{}", e),
)
});
return;
}
async fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
let pci_root_bus_path = create_pci_root_bus_path();
// Keep the same lock order as device::get_device_name(), otherwise it may cause deadlock.
let watcher = GLOBAL_DEVICE_WATCHER.clone();
let mut w = watcher.lock().await;
let mut sb = sandbox.lock().await;
// Add the device node name to the pci device map.
sb.pci_device_map
.insert(self.devpath.clone(), self.devname.clone());
// Record the event by sysfs path
sb.uevent_map.insert(self.devpath.clone(), self.clone());
// Notify watchers that are interested in the udev event.
// Close the channel after watcher has been notified.
let devpath = self.devpath.clone();
let empties: Vec<_> = w
.iter_mut()
.filter(|(dev_addr, _)| {
let pci_p = format!("{}/{}", pci_root_bus_path, *dev_addr);
// blk block device
devpath.starts_with(pci_p.as_str()) ||
// scsi block device
{
(*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
devpath.contains(*dev_addr)
} ||
// nvdimm/pmem device
{
let pmem_suffix = format!("/{}/{}", SCSI_BLOCK_SUFFIX, self.devname);
devpath.starts_with(ACPI_DEV_PATH) &&
devpath.ends_with(pmem_suffix.as_str()) &&
dev_addr.ends_with(pmem_suffix.as_str())
for watch in &mut sb.uevent_watchers {
if let Some((matcher, _)) = watch {
if matcher.is_match(&self) {
let (_, sender) = watch.take().unwrap();
let _ = sender.send(self.clone());
}
})
.map(|(k, sender)| {
let devname = self.devname.clone();
let sender = sender.take().unwrap();
let _ = sender.send(devname);
k.clone()
})
.collect();
// Remove notified nodes from the watcher map.
for empty in empties {
w.remove(&empty);
}
}
}
#[instrument]
async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
if self.is_block_add_event() {
return self.handle_block_add_event(sandbox).await;
} else if self.action == U_EVENT_ACTION_ADD {
let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
// It's a memory hot-add event.
if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
let _ = online_device(online_path.as_ref()).map_err(|e| {
error!(
*logger,
"failed to online device";
"device" => &self.devpath,
"error" => format!("{}", e),
)
});
return;
}
if self.action == U_EVENT_ACTION_ADD {
return self.process_add(logger, sandbox).await;
}
debug!(*logger, "ignoring event"; "uevent" => format!("{:?}", self));
}
}
#[instrument]
pub async fn wait_for_uevent(
sandbox: &Arc<Mutex<Sandbox>>,
matcher: impl UeventMatcher,
) -> Result<Uevent> {
let mut sb = sandbox.lock().await;
for uev in sb.uevent_map.values() {
if matcher.is_match(uev) {
info!(sl!(), "Device {:?} found in pci device map", uev);
return Ok(uev.clone());
}
}
// If device is not found in the device map, hotplug event has not
// been received yet, create and add channel to the watchers map.
// The key of the watchers map is the device we are interested in.
// Note this is done inside the lock, not to miss any events from the
// global udev listener.
let (tx, rx) = tokio::sync::oneshot::channel::<Uevent>();
let idx = sb.uevent_watchers.len();
sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
drop(sb); // unlock
info!(sl!(), "Waiting on channel for uevent notification\n");
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
Ok(v) => v?,
Err(_) => {
let mut sb = sandbox.lock().await;
let matcher = sb.uevent_watchers[idx].take().unwrap().0;
return Err(anyhow!(
"Timeout after {:?} waiting for uevent {:?}",
hotplug_timeout,
&matcher
));
}
};
Ok(uev)
}
#[instrument]
pub async fn watch_uevents(
sandbox: Arc<Mutex<Sandbox>>,
mut shutdown: Receiver<bool>,
@@ -199,3 +214,71 @@ pub async fn watch_uevents(
Ok(())
}
// Used in the device module unit tests
#[cfg(test)]
pub(crate) fn spawn_test_watcher(sandbox: Arc<Mutex<Sandbox>>, uev: Uevent) {
tokio::spawn(async move {
loop {
let mut sb = sandbox.lock().await;
for w in &mut sb.uevent_watchers {
if let Some((matcher, _)) = w {
if matcher.is_match(&uev) {
let (_, sender) = w.take().unwrap();
let _ = sender.send(uev);
return;
}
}
}
drop(sb); // unlock
}
});
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug, Clone, Copy)]
struct AlwaysMatch();
impl UeventMatcher for AlwaysMatch {
fn is_match(&self, _: &Uevent) -> bool {
true
}
}
#[tokio::test]
async fn test_wait_for_uevent() {
let uev = Uevent {
action: crate::linux_abi::U_EVENT_ACTION_ADD.to_string(),
subsystem: "test".to_string(),
devpath: "/test/sysfs/path".to_string(),
devname: "testdevname".to_string(),
..Default::default()
};
let matcher = AlwaysMatch();
let logger = slog::Logger::root(slog::Discard, o!());
let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
let mut sb = sandbox.lock().await;
sb.uevent_map.insert(uev.devpath.clone(), uev.clone());
drop(sb); // unlock
let uev2 = wait_for_uevent(&sandbox, matcher).await;
assert!(uev2.is_ok());
assert_eq!(uev2.unwrap(), uev);
let mut sb = sandbox.lock().await;
sb.uevent_map.remove(&uev.devpath).unwrap();
drop(sb); // unlock
spawn_test_watcher(sandbox.clone(), uev.clone());
let uev2 = wait_for_uevent(&sandbox, matcher).await;
assert!(uev2.is_ok());
assert_eq!(uev2.unwrap(), uev);
}
}

View File

@@ -3,10 +3,15 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
use futures::StreamExt;
use std::io;
use std::io::ErrorKind;
use std::os::unix::io::{FromRawFd, RawFd};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::watch::Receiver;
use tokio_vsock::{Incoming, VsockListener, VsockStream};
use tracing::instrument;
// Size of I/O read buffer
const BUF_SIZE: usize = 8192;
@@ -52,6 +57,17 @@ where
Ok(total_bytes)
}
#[instrument]
pub fn get_vsock_incoming(fd: RawFd) -> Incoming {
unsafe { VsockListener::from_raw_fd(fd).incoming() }
}
#[instrument]
pub async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
let stream = get_vsock_incoming(fd).next().await.unwrap()?;
Ok(stream)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -0,0 +1,19 @@
[package]
name = "vsock-exporter"
version = "0.1.0"
authors = ["James O. D. Hunt <james.o.hunt@intel.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nix = "0.20.0"
libc = "0.2.94"
thiserror = "1.0.24"
opentelemetry = { version = "0.14.0", features=["serialize"] }
serde = { version = "1.0.126", features = ["derive"] }
vsock = "0.2.3"
bincode = "1.3.3"
byteorder = "1.4.3"
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
async-trait = "0.1.50"

View File

@@ -0,0 +1,196 @@
// Copyright (c) 2020-2021 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
// The VSOCK Exporter sends trace spans "out" to the forwarder running on the
// host (which then forwards them on to a trace collector). The data is sent
// via a VSOCK socket that the forwarder process is listening on. To allow the
// forwarder to know how much data to each for each trace span the simplest
// protocol is employed which uses a header packet and the payload (trace
// span) data. The header packet is a simple count of the number of bytes in the
// payload, which allows the forwarder to know how many bytes it must read to
// consume the trace span. The payload is a serialised version of the trace span.
use async_trait::async_trait;
use byteorder::{ByteOrder, NetworkEndian};
use opentelemetry::sdk::export::trace::{ExportResult, SpanData, SpanExporter};
use opentelemetry::sdk::export::ExportError;
use slog::{error, o, Logger};
use std::io::{ErrorKind, Write};
use std::net::Shutdown;
use std::sync::Mutex;
use vsock::{SockAddr, VsockStream};
const ANY_CID: &str = "any";
// Must match the value of the variable of the same name in the trace forwarder.
const HEADER_SIZE_BYTES: u64 = std::mem::size_of::<u64>() as u64;
// By default, the VSOCK exporter should talk "out" to the host where the
// forwarder is running.
const DEFAULT_CID: u32 = libc::VMADDR_CID_HOST;
// The VSOCK port the forwarders listens on by default
const DEFAULT_PORT: u32 = 10240;
#[derive(Debug)]
pub struct Exporter {
port: u32,
cid: u32,
conn: Mutex<VsockStream>,
logger: Logger,
}
impl Exporter {
/// Create a new exporter builder.
pub fn builder() -> Builder {
Builder::default()
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("connection error: {0}")]
ConnectionError(String),
#[error("serialisation error: {0}")]
SerialisationError(#[from] bincode::Error),
#[error("I/O error: {0}")]
IOError(#[from] std::io::Error),
}
impl ExportError for Error {
fn exporter_name(&self) -> &'static str {
"vsock-exporter"
}
}
fn make_io_error(desc: String) -> std::io::Error {
std::io::Error::new(ErrorKind::Other, desc)
}
// Send a trace span to the forwarder running on the host.
fn write_span(writer: &mut dyn Write, span: &SpanData) -> Result<(), std::io::Error> {
let encoded_payload: Vec<u8> =
bincode::serialize(&span).map_err(|e| make_io_error(e.to_string()))?;
let payload_len: u64 = encoded_payload.len() as u64;
let mut payload_len_as_bytes: [u8; HEADER_SIZE_BYTES as usize] =
[0; HEADER_SIZE_BYTES as usize];
// Encode the header
NetworkEndian::write_u64(&mut payload_len_as_bytes, payload_len);
// Send the header
writer
.write_all(&payload_len_as_bytes)
.map_err(|e| make_io_error(format!("failed to write trace header: {:?}", e)))?;
writer
.write_all(&encoded_payload)
.map_err(|e| make_io_error(format!("failed to write trace payload: {:?}", e)))
}
fn handle_batch(writer: &mut dyn Write, batch: Vec<SpanData>) -> ExportResult {
for span_data in batch {
write_span(writer, &span_data).map_err(Error::IOError)?;
}
Ok(())
}
#[async_trait]
impl SpanExporter for Exporter {
async fn export(&mut self, batch: Vec<SpanData>) -> ExportResult {
let conn = self.conn.lock();
match conn {
Ok(mut c) => handle_batch(&mut *c, batch),
Err(e) => {
error!(self.logger, "failed to obtain connection";
"error" => format!("{}", e));
return Err(Error::ConnectionError(e.to_string()).into());
}
}
}
fn shutdown(&mut self) {
let conn = match self.conn.lock() {
Ok(conn) => conn,
Err(e) => {
error!(self.logger, "failed to obtain connection";
"error" => format!("{}", e));
return;
}
};
conn.shutdown(Shutdown::Write)
.expect("failed to shutdown VSOCK connection");
}
}
#[derive(Debug)]
pub struct Builder {
port: u32,
cid: u32,
logger: Logger,
}
impl Default for Builder {
fn default() -> Self {
let logger = Logger::root(slog::Discard, o!());
Builder {
cid: DEFAULT_CID,
port: DEFAULT_PORT,
logger,
}
}
}
impl Builder {
pub fn with_cid(self, cid: u32) -> Self {
Builder { cid, ..self }
}
pub fn with_port(self, port: u32) -> Self {
Builder { port, ..self }
}
pub fn with_logger(self, logger: &Logger) -> Self {
Builder {
logger: logger.new(o!()),
..self
}
}
pub fn init(self) -> Exporter {
let Builder { port, cid, logger } = self;
let sock_addr = SockAddr::new_vsock(self.cid, self.port);
let cid_str: String;
if self.cid == libc::VMADDR_CID_ANY {
cid_str = ANY_CID.to_string();
} else {
cid_str = format!("{}", self.cid);
}
let msg = format!(
"failed to connect to VSOCK server (port: {}, cid: {}) - {}",
self.port, cid_str, "ensure trace forwarder is running on host"
);
let conn = VsockStream::connect(&sock_addr).expect(&msg);
Exporter {
port,
cid,
conn: Mutex::new(conn),
logger: logger.new(o!("cid" => cid_str, "port" => port)),
}
}
}

View File

@@ -166,6 +166,7 @@ DEFAULTEXPFEATURES := []
#Default entropy source
DEFENTROPYSOURCE := /dev/urandom
DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"]
DEFDISABLEBLOCK := false
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
@@ -454,6 +455,7 @@ USER_VARS += DEFFILEMEMBACKEND
USER_VARS += DEFVALIDFILEMEMBACKENDS
USER_VARS += DEFMSIZE9P
USER_VARS += DEFENTROPYSOURCE
USER_VARS += DEFVALIDENTROPYSOURCES
USER_VARS += DEFSANDBOXCGROUPONLY
USER_VARS += DEFBINDMOUNTS
USER_VARS += FEATURE_SELINUX

View File

@@ -1 +0,0 @@
2.0.0

1
src/runtime/VERSION Symbolic link
View File

@@ -0,0 +1 @@
../../VERSION

View File

@@ -11,3 +11,10 @@ MACHINEACCELERATORS :=
CPUFEATURES :=
QEMUCMD := qemu-system-s390x
# See https://github.com/kata-containers/osbuilder/issues/217
FEDORA_LIKE = $(shell grep -E "\<fedora\>" /etc/os-release 2> /dev/null)
ifneq (,$(FEDORA_LIKE))
CC := gcc
export CC
endif

View File

@@ -150,6 +150,10 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@"
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional

View File

@@ -165,6 +165,10 @@ block_device_driver = "virtio-blk"
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional

View File

@@ -161,23 +161,23 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available.
#
#
# Default false
#enable_debug = true
# Disable the customizations done in the runtime when it detects
# that it is running on top a VMM. This will result in the runtime
# behaving as it would when running on bare metal.
#
#
#disable_nesting_checks = true
# This is the msize used for 9p shares. It is the number of bytes
# This is the msize used for 9p shares. It is the number of bytes
# used for 9p packet payload.
#msize_9p = @DEFMSIZE9P@
# VFIO devices are hotplugged on a bridge by default.
# VFIO devices are hotplugged on a bridge by default.
# Enable hotplugging on root bus. This may be required for devices with
# a large PCI bar, as this is a current limitation with hotplugging on
# a large PCI bar, as this is a current limitation with hotplugging on
# a bridge. This value is valid for "pc" machine type.
# Default false
#hotplug_vfio_on_root_bus = true
@@ -194,6 +194,11 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
# all practical purposes.
#entropy_source= "@DEFENTROPYSOURCE@"
# List of valid annotations values for entropy_source
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# Path to OCI hook binaries in the *guest rootfs*.
# This does not affect host-side hooks which must instead be added to
# the OCI spec passed to the runtime.
@@ -282,6 +287,10 @@ kernel_modules=[]
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional

View File

@@ -16,6 +16,14 @@ kernel = "@KERNELPATH@"
image = "@IMAGEPATH@"
machine_type = "@MACHINETYPE@"
# Enable confidential guest support.
# Toggling that setting may trigger different hardware features, ranging
# from memory encryption to both memory and CPU-state encryption and integrity.
# The Kata Containers runtime dynamically detects the available feature set and
# aims at enabling the largest possible one.
# Default false
# confidential_guest = true
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -296,6 +304,11 @@ pflashes = []
# all practical purposes.
#entropy_source= "@DEFENTROPYSOURCE@"
# List of valid annotations values for entropy_source
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
# Path to OCI hook binaries in the *guest rootfs*.
# This does not affect host-side hooks which must instead be added to
# the OCI spec passed to the runtime.
@@ -432,6 +445,10 @@ kernel_modules=[]
#debug_console_enabled = true
# Agent connection dialing timeout value in seconds
# (default: 30)
#dial_timeout = 30
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional
@@ -527,3 +544,30 @@ experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# enable_pprof = true
# WARNING: All the options in the following section have not been implemented yet.
# This section was added as a placeholder. DO NOT USE IT!
[image]
# Container image service.
#
# Offload the CRI image management service to the Kata agent.
# (default: false)
#service_offload = true
# Container image decryption keys provisioning.
# Applies only if service_offload is true.
# Keys can be provisioned locally (e.g. through a special command or
# a local file) or remotely (usually after the guest is remotely attested).
# The provision setting is a complete URL that lets the Kata agent decide
# which method to use in order to fetch the keys.
#
# Keys can be stored in a local file, in a measured and attested initrd:
#provision=data:///local/key/file
#
# Keys could be fetched through a special command or binary from the
# initrd (guest) image, e.g. a firmware call:
#provision=file:///path/to/bin/fetcher/in/guest
#
# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
# a HTTPS URL:
#provision=https://my-key-broker.foo/tenant/<tenant-id>

View File

@@ -1,134 +0,0 @@
// Copyright (c) 2014,2015,2016 Docker, Inc.
// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
"io"
"os"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
var ptmxPath = "/dev/ptmx"
// Console represents a pseudo TTY.
type Console struct {
io.ReadWriteCloser
master *os.File
slavePath string
}
// isTerminal returns true if fd is a terminal, else false
func isTerminal(fd uintptr) bool {
var termios syscall.Termios
_, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, syscall.TCGETS, uintptr(unsafe.Pointer(&termios)))
return err == 0
}
// ConsoleFromFile creates a console from a file
func ConsoleFromFile(f *os.File) *Console {
return &Console{
master: f,
}
}
// NewConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole() (*Console, error) {
master, err := os.OpenFile(ptmxPath, unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
if err := saneTerminal(master); err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
return &Console{
slavePath: console,
master: master,
}, nil
}
// File returns master
func (c *Console) File() *os.File {
return c.master
}
// Path to slave
func (c *Console) Path() string {
return c.slavePath
}
// Read from master
func (c *Console) Read(b []byte) (int, error) {
return c.master.Read(b)
}
// Write to master
func (c *Console) Write(b []byte) (int, error) {
return c.master.Write(b)
}
// Close master
func (c *Console) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))); err != 0 {
return err
}
return nil
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var u uint32
if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&u))); err != 0 {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", u), nil
}
// saneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts normally. In particular, a not-very-well-known default of
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
// problem for terminal emulators, because we relay data from the terminal we
// also relay that funky line discipline.
func saneTerminal(terminal *os.File) error {
// Go doesn't have a wrapper for any of the termios ioctls.
var termios unix.Termios
if _, _, err := unix.Syscall(unix.SYS_IOCTL, terminal.Fd(), unix.TCGETS, uintptr(unsafe.Pointer(&termios))); err != 0 {
return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
}
// Set -onlcr so we don't have to deal with \r.
termios.Oflag &^= unix.ONLCR
if _, _, err := unix.Syscall(unix.SYS_IOCTL, terminal.Fd(), unix.TCSETS, uintptr(unsafe.Pointer(&termios))); err != 0 {
return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
}
return nil
}

View File

@@ -1,129 +0,0 @@
// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"io/ioutil"
"os"
"testing"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/stretchr/testify/assert"
)
func TestConsoleFromFile(t *testing.T) {
assert := assert.New(t)
console := ConsoleFromFile(os.Stdout)
assert.NotNil(console.File(), "console file is nil")
}
func TestNewConsole(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
assert := assert.New(t)
console, err := newConsole()
assert.NoError(err, "failed to create a new console: %s", err)
defer console.Close()
assert.NotEmpty(console.Path(), "console path is empty")
assert.NotNil(console.File(), "console file is nil")
}
func TestIsTerminal(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
assert := assert.New(t)
var fd uintptr = 4
assert.False(isTerminal(fd), "Fd %d is not a terminal", fd)
console, err := newConsole()
assert.NoError(err, "failed to create a new console: %s", err)
defer console.Close()
fd = console.File().Fd()
assert.True(isTerminal(fd), "Fd %d is a terminal", fd)
}
func TestReadWrite(t *testing.T) {
assert := assert.New(t)
// write operation
f, err := ioutil.TempFile(os.TempDir(), ".tty")
assert.NoError(err, "failed to create a temporal file")
defer os.Remove(f.Name())
console := ConsoleFromFile(f)
assert.NotNil(console)
defer console.Close()
msgWrite := "hello"
l, err := console.Write([]byte(msgWrite))
assert.NoError(err, "failed to write message: %s", msgWrite)
assert.Equal(len(msgWrite), l)
console.master.Sync()
console.master.Seek(0, 0)
// Read operation
msgRead := make([]byte, len(msgWrite))
l, err = console.Read(msgRead)
assert.NoError(err, "failed to read message: %s", msgWrite)
assert.Equal(len(msgWrite), l)
assert.Equal(msgWrite, string(msgRead))
}
func TestNewConsoleFail(t *testing.T) {
assert := assert.New(t)
orgPtmxPath := ptmxPath
defer func() { ptmxPath = orgPtmxPath }()
// OpenFile failure
ptmxPath = "/this/file/does/not/exist"
c, err := newConsole()
assert.Error(err)
assert.Nil(c)
// saneTerminal failure
f, err := ioutil.TempFile("", "")
assert.NoError(err)
assert.NoError(f.Close())
defer os.Remove(f.Name())
ptmxPath = f.Name()
c, err = newConsole()
assert.Error(err)
assert.Nil(c)
}
func TestConsoleClose(t *testing.T) {
assert := assert.New(t)
// nil master
c := &Console{}
assert.NoError(c.Close())
f, err := ioutil.TempFile("", "")
assert.NoError(err)
defer os.Remove(f.Name())
c.master = f
assert.NoError(c.Close())
}
func TestConsolePtsnameFail(t *testing.T) {
assert := assert.New(t)
pts, err := ptsname(nil)
assert.Error(err)
assert.Empty(pts)
}

View File

@@ -25,6 +25,7 @@ import (
"strings"
"syscall"
"github.com/containerd/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@@ -389,13 +390,6 @@ EXAMPLES:
if verbose {
kataLog.Logger.SetLevel(logrus.InfoLevel)
}
ctx, err := cliContextToContext(context)
if err != nil {
return err
}
span, _ := katautils.Trace(ctx, "check")
defer span.End()
if !context.Bool("no-network-checks") && os.Getenv(noNetworkEnvVar) == "" {
cmd := RelCmdCheck
@@ -407,8 +401,7 @@ EXAMPLES:
if os.Geteuid() == 0 {
kataLog.Warn("Not running network checks as super user")
} else {
err = HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
err := HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
if err != nil {
return err
}
@@ -424,7 +417,12 @@ EXAMPLES:
return errors.New("check: cannot determine runtime config")
}
err = setCPUtype(runtimeConfig.HypervisorType)
// check if cgroup can work use the same logic for creating containers
if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly {
return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.")
}
err := setCPUtype(runtimeConfig.HypervisorType)
if err != nil {
return err
}
@@ -437,7 +435,6 @@ EXAMPLES:
}
err = hostIsVMContainerCapable(details)
if err != nil {
return err
}

View File

@@ -13,7 +13,6 @@ import (
"strings"
"github.com/BurntSushi/toml"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
@@ -448,14 +447,6 @@ var kataEnvCLICommand = cli.Command{
},
},
Action: func(context *cli.Context) error {
ctx, err := cliContextToContext(context)
if err != nil {
return err
}
span, _ := katautils.Trace(ctx, "kata-env")
defer span.End()
return handleSettings(defaultOutputFile, context)
},
}

View File

@@ -1,91 +1,13 @@
// Copyright (c) 2018 IBM
// Copyright (c) 2021 IBM
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"path/filepath"
goruntime "runtime"
)
func getExpectedHostDetails(tmpdir string) (HostInfo, error) {
type filesToCreate struct {
file string
contents string
}
const expectedKernelVersion = "99.1"
const expectedArch = goruntime.GOARCH
expectedDistro := DistroInfo{
Name: "Foo",
Version: "42",
}
expectedCPU := CPUInfo{
Vendor: "moi",
Model: "awesome XI",
}
expectedHostDetails := HostInfo{
Kernel: expectedKernelVersion,
Architecture: expectedArch,
Distro: expectedDistro,
CPU: expectedCPU,
VMContainerCapable: true,
SupportVSocks: vcUtils.SupportsVsocks(),
}
testProcCPUInfo := filepath.Join(tmpdir, "cpuinfo")
testOSRelease := filepath.Join(tmpdir, "os-release")
// XXX: This file is *NOT* created by this function on purpose
// (to ensure the only file checked by the tests is
// testOSRelease). osReleaseClr handling is tested in
// utils_test.go.
testOSReleaseClr := filepath.Join(tmpdir, "os-release-clr")
testProcVersion := filepath.Join(tmpdir, "proc-version")
// override
procVersion = testProcVersion
osRelease = testOSRelease
osReleaseClr = testOSReleaseClr
procCPUInfo = testProcCPUInfo
procVersionContents := fmt.Sprintf("Linux version %s a b c",
expectedKernelVersion)
osReleaseContents := fmt.Sprintf(`
NAME="%s"
VERSION_ID="%s"
`, expectedDistro.Name, expectedDistro.Version)
procCPUInfoContents := fmt.Sprintf(`
%s : %s
processor 0: version = 00, identification = 3929E7, %s = %s
`,
archCPUVendorField,
expectedCPU.Vendor,
archCPUModelField,
expectedCPU.Model)
data := []filesToCreate{
{procVersion, procVersionContents},
{osRelease, osReleaseContents},
{procCPUInfo, procCPUInfoContents},
}
for _, d := range data {
err := createFile(d.file, d.contents)
if err != nil {
return HostInfo{}, err
}
}
return expectedHostDetails, nil
expectedVendor := "moi"
expectedModel := "awesome XI"
expectedVMContainerCapable := true
return genericGetExpectedHostDetails(tmpdir, expectedVendor, expectedModel, expectedVMContainerCapable)
}

View File

@@ -161,7 +161,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
return "", oci.RuntimeConfig{}, err
}
_, config, err = katautils.LoadConfiguration(configFile, true, false)
_, config, err = katautils.LoadConfiguration(configFile, true)
if err != nil {
return "", oci.RuntimeConfig{}, err
}

View File

@@ -14,7 +14,6 @@ import (
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"sync"
@@ -26,7 +25,6 @@ import (
clientUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client"
"github.com/pkg/errors"
"github.com/urfave/cli"
"go.opentelemetry.io/otel/label"
)
const (
@@ -38,10 +36,8 @@ const (
subCommandName = "exec"
// command-line parameters name
paramRuntimeNamespace = "runtime-namespace"
paramDebugConsolePort = "kata-debug-port"
defaultKernelParamDebugConsoleVPortValue = 1026
defaultRuntimeNamespace = "k8s.io"
)
var (
@@ -57,34 +53,16 @@ var kataExecCLICommand = cli.Command{
Name: subCommandName,
Usage: "Enter into guest by debug console",
Flags: []cli.Flag{
cli.StringFlag{
Name: paramRuntimeNamespace,
Usage: "Namespace that containerd or CRI-O are using for containers. (Default: k8s.io, only works for containerd)",
},
cli.Uint64Flag{
Name: paramDebugConsolePort,
Usage: "Port that debug console is listening on. (Default: 1026)",
},
},
Action: func(context *cli.Context) error {
ctx, err := cliContextToContext(context)
if err != nil {
return err
}
span, _ := katautils.Trace(ctx, subCommandName)
defer span.End()
namespace := context.String(paramRuntimeNamespace)
if namespace == "" {
namespace = defaultRuntimeNamespace
}
span.SetAttributes(label.Key("namespace").String(namespace))
port := context.Uint64(paramDebugConsolePort)
if port == 0 {
port = defaultKernelParamDebugConsoleVPortValue
}
span.SetAttributes(label.Key("port").Uint64(port))
sandboxID := context.Args().Get(0)
@@ -92,9 +70,8 @@ var kataExecCLICommand = cli.Command{
return err
}
span.SetAttributes(label.Key("sandbox").String(sandboxID))
conn, err := getConn(sandboxID, port)
conn, err := getConn(namespace, sandboxID, port)
if err != nil {
return err
}
@@ -177,9 +154,8 @@ func (s *iostream) Read(data []byte) (n int, err error) {
return s.conn.Read(data)
}
func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
socketAddr := filepath.Join(string(filepath.Separator), "containerd-shim", namespace, sandboxID, "shim-monitor.sock")
client, err := kataMonitor.BuildUnixSocketClient(socketAddr, defaultTimeout)
func getConn(sandboxID string, port uint64) (net.Conn, error) {
client, err := kataMonitor.BuildShimClient(sandboxID, defaultTimeout)
if err != nil {
return nil, err
}
@@ -190,7 +166,7 @@ func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Failed to get %s: %d", socketAddr, resp.StatusCode)
return nil, fmt.Errorf("Failure from %s shim-monitor: %d", sandboxID, resp.StatusCode)
}
defer resp.Body.Close()

View File

@@ -0,0 +1,38 @@
// Copyright (c) 2021 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package main
import (
"fmt"
kataMonitor "github.com/kata-containers/kata-containers/src/runtime/pkg/kata-monitor"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/urfave/cli"
)
var kataMetricsCLICommand = cli.Command{
Name: "metrics",
Usage: "gather metrics associated with infrastructure used to run a sandbox",
UsageText: "metrics <sandbox id>",
Action: func(context *cli.Context) error {
sandboxID := context.Args().Get(0)
if err := katautils.VerifyContainerID(sandboxID); err != nil {
return err
}
// Get the metrics!
metrics, err := kataMonitor.GetSandboxMetrics(sandboxID)
if err != nil {
return err
}
fmt.Printf("%s\n", metrics)
return nil
},
}

View File

@@ -22,14 +22,12 @@ import (
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
tl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory/template"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/label"
otelTrace "go.opentelemetry.io/otel/trace"
)
// specConfig is the name of the file holding the containers configuration
@@ -125,6 +123,7 @@ var runtimeCommands = []cli.Command{
kataCheckCLICommand,
kataEnvCLICommand,
kataExecCLICommand,
kataMetricsCLICommand,
factoryCLICommand,
}
@@ -132,10 +131,6 @@ var runtimeCommands = []cli.Command{
// parsing occurs.
var runtimeBeforeSubcommands = beforeSubcommands
// runtimeAfterSubcommands is the function to run after the command-line
// has been parsed.
var runtimeAfterSubcommands = afterSubcommands
// runtimeCommandNotFound is the function to handle an invalid sub-command.
var runtimeCommandNotFound = commandNotFound
@@ -168,10 +163,6 @@ func init() {
// setupSignalHandler sets up signal handling, starting a go routine to deal
// with signals as they arrive.
//
// Note that the specified context is NOT used to create a trace span (since the
// first (root) span must be created in beforeSubcommands()): it is simply
// used to pass to the crash handling functions to finalise tracing.
func setupSignalHandler(ctx context.Context) {
signals.SetLogger(kataLog)
@@ -181,10 +172,6 @@ func setupSignalHandler(ctx context.Context) {
signal.Notify(sigCh, sig)
}
dieCb := func() {
katautils.StopTracing(ctx)
}
go func() {
for {
sig := <-sigCh
@@ -198,7 +185,6 @@ func setupSignalHandler(ctx context.Context) {
if signals.FatalSignal(nativeSignal) {
kataLog.WithField("signal", sig).Error("received fatal signal")
signals.Die(dieCb)
} else if debug && signals.NonFatalSignal(nativeSignal) {
kataLog.WithField("signal", sig).Debug("handling signal")
signals.Backtrace()
@@ -210,24 +196,15 @@ func setupSignalHandler(ctx context.Context) {
// setExternalLoggers registers the specified logger with the external
// packages which accept a logger to handle their own logging.
func setExternalLoggers(ctx context.Context, logger *logrus.Entry) {
var span otelTrace.Span
// Only create a new span if a root span already exists. This is
// required to ensure that this function will not disrupt the root
// span logic by creating a span before the proper root span has been
// created.
if otelTrace.SpanFromContext(ctx) != nil {
span, ctx = katautils.Trace(ctx, "setExternalLoggers")
defer span.End()
}
// Set virtcontainers logger.
vci.SetLogger(ctx, logger)
// Set vm factory logger.
vf.SetLogger(ctx, logger)
// Set vm factory template logger.
tl.SetLogger(ctx, logger)
// Set the OCI package logger.
oci.SetLogger(ctx, logger)
@@ -244,7 +221,6 @@ func beforeSubcommands(c *cli.Context) error {
var configFile string
var runtimeConfig oci.RuntimeConfig
var err error
var traceFlushFunc func()
katautils.SetConfigOptions(name, defaultRuntimeConfiguration, defaultSysConfRuntimeConfiguration)
@@ -270,7 +246,6 @@ func beforeSubcommands(c *cli.Context) error {
// Issue: https://github.com/kata-containers/runtime/issues/2428
ignoreConfigLogs := false
var traceRootSpan string
subCmdIsCheckCmd := (c.NArg() >= 1 && ((c.Args()[0] == "kata-check") || (c.Args()[0] == "check")))
if subCmdIsCheckCmd {
@@ -302,16 +277,13 @@ func beforeSubcommands(c *cli.Context) error {
cmdName := c.Args().First()
if c.App.Command(cmdName) != nil {
kataLog = kataLog.WithField("command", cmdName)
// Name for the root span (used for tracing) now the
// sub-command name is known.
traceRootSpan = name + " " + cmdName
}
// Since a context is required, pass a new (throw-away) one - we
// cannot use the main context as tracing hasn't been enabled yet
// (meaning any spans created at this point will be silently ignored).
setExternalLoggers(context.Background(), kataLog)
ctx, err := cliContextToContext(c)
if err != nil {
return err
}
setExternalLoggers(ctx, kataLog)
if c.NArg() == 1 && (c.Args()[0] == "kata-env" || c.Args()[0] == "env") {
// simply report the logging setup
@@ -319,26 +291,12 @@ func beforeSubcommands(c *cli.Context) error {
}
}
configFile, runtimeConfig, err = katautils.LoadConfiguration(c.GlobalString("kata-config"), ignoreConfigLogs, false)
configFile, runtimeConfig, err = katautils.LoadConfiguration(c.GlobalString("kata-config"), ignoreConfigLogs)
if err != nil {
fatal(err)
}
if !subCmdIsCheckCmd {
debug = runtimeConfig.Debug
if traceRootSpan != "" {
// Create the tracer.
//
// Note: no spans are created until the command-line has been parsed.
// This delays collection of trace data slightly but benefits the user by
// ensuring the first span is the name of the sub-command being
// invoked from the command-line.
traceFlushFunc, err = setupTracing(c, traceRootSpan, &runtimeConfig)
if err != nil {
return err
}
defer traceFlushFunc()
}
}
args := strings.Join(c.Args(), " ")
@@ -377,36 +335,6 @@ func handleShowConfig(context *cli.Context) {
}
}
func setupTracing(context *cli.Context, rootSpanName string, config *oci.RuntimeConfig) (func(), error) {
flush, err := katautils.CreateTracer(name, config)
if err != nil {
return nil, err
}
ctx, err := cliContextToContext(context)
if err != nil {
return nil, err
}
// Create the root span now that the sub-command name is
// known.
//
// Note that this "Before" function is called (and returns)
// before the subcommand handler is called. As such, we cannot
// "Finish()" the span here - that is handled in the .After
// function.
tracer := otel.Tracer("kata")
newCtx, span := tracer.Start(ctx, rootSpanName)
span.SetAttributes(label.Key("subsystem").String("runtime"))
// Add tracer to metadata and update the context
context.App.Metadata["tracer"] = tracer
context.App.Metadata["context"] = newCtx
return flush, nil
}
// add supported experimental features in context
func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error {
ctx, err := cliContextToContext(clictx)
@@ -420,22 +348,11 @@ func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error
}
ctx = exp.ContextWithExp(ctx, exps)
// Add tracer to metadata and update the context
// Add experimental features to metadata and update the context
clictx.App.Metadata["context"] = ctx
return nil
}
func afterSubcommands(c *cli.Context) error {
ctx, err := cliContextToContext(c)
if err != nil {
return err
}
katautils.StopTracing(ctx)
return nil
}
// function called when an invalid command is specified which causes the
// runtime to error.
func commandNotFound(c *cli.Context, command string) {
@@ -502,7 +419,6 @@ func createRuntimeApp(ctx context.Context, args []string) error {
app.Flags = runtimeFlags
app.Commands = runtimeCommands
app.Before = runtimeBeforeSubcommands
app.After = runtimeAfterSubcommands
app.EnableBashCompletion = true
// allow sub-commands to access context
@@ -578,12 +494,5 @@ func cliContextToContext(c *cli.Context) (context.Context, error) {
func main() {
// create a new empty context
ctx := context.Background()
dieCb := func() {
katautils.StopTracing(ctx)
}
defer signals.HandlePanic(dieCb)
createRuntime(ctx)
}

View File

@@ -20,7 +20,6 @@ import (
"strings"
"testing"
"github.com/dlespiau/covertool/pkg/cover"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
@@ -152,16 +151,6 @@ func runUnitTests(m *testing.M) {
// TestMain is the common main function used by ALL the test functions
// for this package.
func TestMain(m *testing.M) {
// Parse the command line using the stdlib flag package so the flags defined
// in the testing package get populated.
cover.ParseAndStripTestFlags()
// Make sure we have the opportunity to flush the coverage report to disk when
// terminating the process.
defer func() {
cover.FlushProfiles()
}()
// If the test binary name is kata-runtime.coverage, we've are being asked to
// run the coverage-instrumented kata-runtime.
if path.Base(os.Args[0]) == name+".coverage" ||
@@ -869,7 +858,7 @@ func TestMainCreateRuntime(t *testing.T) {
assert := assert.New(t)
const cmd = "foo"
const msg = "moo FAILURE"
const msg = "moo message"
resetCLIGlobals()
@@ -942,7 +931,7 @@ func TestMainFatalWriter(t *testing.T) {
assert := assert.New(t)
const cmd = "foo"
const msg = "moo FAILURE"
const msg = "moo message"
// create buffer to save logger output
buf := &bytes.Buffer{}

View File

@@ -6,7 +6,6 @@
package main
import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/urfave/cli"
)
@@ -14,14 +13,6 @@ var versionCLICommand = cli.Command{
Name: "version",
Usage: "display version details",
Action: func(context *cli.Context) error {
ctx, err := cliContextToContext(context)
if err != nil {
return err
}
span, _ := katautils.Trace(ctx, "version")
defer span.End()
cli.VersionPrinter(context)
return nil
},

View File

@@ -23,6 +23,7 @@ import (
// only register the proto type
_ "github.com/containerd/containerd/runtime/linux/runctypes"
_ "github.com/containerd/containerd/runtime/v2/runc/options"
crioption "github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
@@ -191,7 +192,7 @@ func loadRuntimeConfig(s *service, r *taskAPI.CreateTaskRequest, anno map[string
configPath = os.Getenv("KATA_CONF_FILE")
}
_, runtimeConfig, err := katautils.LoadConfiguration(configPath, false, true)
_, runtimeConfig, err := katautils.LoadConfiguration(configPath, false)
if err != nil {
return nil, err
}

View File

@@ -294,8 +294,7 @@ func trace(ctx context.Context, name string) (otelTrace.Span, context.Context) {
ctx = context.Background()
}
tracer := otel.Tracer("kata")
ctx, span := tracer.Start(ctx, name)
span.SetAttributes([]label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("containerdshim")}...)
ctx, span := tracer.Start(ctx, name, otelTrace.WithAttributes(label.String("source", "runtime"), label.String("package", "containerdshim")))
return span, ctx
}

View File

@@ -16,7 +16,6 @@ import (
"strconv"
"strings"
"github.com/containerd/containerd/namespaces"
cdshim "github.com/containerd/containerd/runtime/v2/shim"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
@@ -129,11 +128,7 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily {
func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
// metrics socket will under sandbox's bundle path
metricsAddress, err := socketAddress(ctx, s.id)
if err != nil {
shimMgtLog.WithError(err).Error("failed to create socket address")
return
}
metricsAddress := SocketAddress(s.id)
listener, err := cdshim.NewSocket(metricsAddress)
if err != nil {
@@ -166,7 +161,7 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec
svr.Serve(listener)
}
// mountServeDebug provides a debug endpoint
// mountPprofHandle provides a debug endpoint
func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {
// return if not enabled
@@ -188,10 +183,8 @@ func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {
m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
}
func socketAddress(ctx context.Context, id string) (string, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return "", err
}
return filepath.Join(string(filepath.Separator), "containerd-shim", ns, id, "shim-monitor.sock"), nil
// SocketAddress returns the address of the abstract domain socket for communicating with the
// shim management endpoint
func SocketAddress(id string) string {
return filepath.Join(string(filepath.Separator), "run", "vc", id, "shim-monitor")
}

View File

@@ -14,7 +14,7 @@ import (
)
func startContainer(ctx context.Context, s *service, c *container) error {
//start a container
// start a container
if c.cType == "" {
err := fmt.Errorf("Bug, the container %s type is empty", c.id)
return err
@@ -37,8 +37,8 @@ func startContainer(ctx context.Context, s *service, c *container) error {
}
go watchSandbox(ctx, s)
// We don't rely on the context passed to startContainer as it can be cancelled after
// this rpc call.
// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
// shim context and the context passed to startContainer for tracing.
go watchOOMEvents(ctx, s)
} else {
_, err := s.sandbox.StartContainer(ctx, c.id)
@@ -74,10 +74,10 @@ func startContainer(ctx context.Context, s *service, c *container) error {
c.ttyio = tty
go ioCopy(c.exitIOch, c.stdinCloser, tty, stdin, stdout, stderr)
} else {
//close the io exit channel, since there is no io for this container,
//otherwise the following wait goroutine will hang on this channel.
// close the io exit channel, since there is no io for this container,
// otherwise the following wait goroutine will hang on this channel.
close(c.exitIOch)
//close the stdin closer channel to notify that it's safe to close process's
// close the stdin closer channel to notify that it's safe to close process's
// io.
close(c.stdinCloser)
}
@@ -88,7 +88,7 @@ func startContainer(ctx context.Context, s *service, c *container) error {
}
func startExec(ctx context.Context, s *service, containerID, execID string) (*exec, error) {
//start an exec
// start an exec
c, err := s.getContainer(containerID)
if err != nil {
return nil, err

View File

@@ -87,7 +87,6 @@ func newTtyIO(ctx context.Context, stdin, stdout, stderr string, console bool) (
func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPipe, stderrPipe io.Reader) {
var wg sync.WaitGroup
var closeOnce sync.Once
if tty.Stdin != nil {
wg.Add(1)
@@ -109,7 +108,11 @@ func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteClo
defer bufPool.Put(p)
io.CopyBuffer(tty.Stdout, stdoutPipe, *p)
wg.Done()
closeOnce.Do(tty.close)
if tty.Stdin != nil {
// close stdin to make the other routine stop
tty.Stdin.Close()
tty.Stdin = nil
}
}()
}
@@ -124,6 +127,6 @@ func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteClo
}
wg.Wait()
closeOnce.Do(tty.close)
tty.close()
close(exitch)
}

View File

@@ -142,7 +142,7 @@ func watchOOMEvents(ctx context.Context, s *service) {
for {
select {
case <-ctx.Done():
case <-s.ctx.Done():
return
default:
containerID, err := s.sandbox.GetOOMEvent(ctx)

View File

@@ -18,7 +18,6 @@ require (
github.com/containerd/typeurl v1.0.1-0.20190228175220-2a93cfde8c20
github.com/containernetworking/plugins v0.8.2
github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af
github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a
github.com/docker/distribution v2.7.1+incompatible // indirect
github.com/docker/docker v1.13.1 // indirect
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
@@ -31,7 +30,7 @@ require (
github.com/gogo/googleapis v1.4.0 // indirect
github.com/gogo/protobuf v1.3.1
github.com/hashicorp/go-multierror v1.0.0
github.com/kata-containers/govmm v0.0.0-20210112013750-7d320e8f5dca
github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f
github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
github.com/opencontainers/image-spec v1.0.1 // indirect
github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c

View File

@@ -108,8 +108,6 @@ github.com/d2g/hardwareaddr v0.0.0-20190221164911-e7d9fbe030e4/go.mod h1:bMl4RjI
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a h1:+cYgqwB++gEE09SluRYGqJyDhWmLmdWZ2cXlOXSGV8w=
github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a/go.mod h1:/eQMcW3eA1bzKx23ZYI2H3tXPdJB5JWYTHzoUPBvQY4=
github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=
github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v1.13.1 h1:IkZjBSIc8hBjLpqeAbeE5mca5mNgeatLHBy3GO78BWo=
@@ -200,7 +198,6 @@ github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
@@ -218,9 +215,7 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -242,7 +237,6 @@ github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hf
github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.0 h1:Jf4mxPC/ziBnoPIdpQdPJ9OeiomAUHLvxmPRSPH9m4s=
github.com/google/uuid v1.1.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -274,8 +268,10 @@ github.com/juju/errors v0.0.0-20180806074554-22422dad46e1/go.mod h1:W54LbzXuIE0b
github.com/juju/loggo v0.0.0-20190526231331-6e530bcce5d8/go.mod h1:vgyd7OREkbtVEN/8IXZe5Ooef3LQePvuBm9UWj6ZL8U=
github.com/juju/testing v0.0.0-20190613124551-e81189438503/go.mod h1:63prj8cnj0tU0S9OHjGJn+b1h0ZghCndfnbQolrYTwA=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kata-containers/govmm v0.0.0-20210112013750-7d320e8f5dca h1:UdXFthwasAPnmv37gLJUEFsW9FaabYA+mM6FoSi8kzU=
github.com/kata-containers/govmm v0.0.0-20210112013750-7d320e8f5dca/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk=
github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee h1:M4N7AdSHgWz/ubV5AZQdeqmK+9Ztpea6oqeXgk8GCHk=
github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk=
github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f h1:jXMZY7GIz5kSv3/Rdiesg1WMvgXJKNOk3KxwxgNWAVk=
github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk=
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
@@ -303,15 +299,12 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b h1:Ey6yH0acn50T/v6CB75bGP4EMJqnv9WvnjN7oZaj+xE=
github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.10.1 h1:q/mM8GF/n0shIN8SaAZ0V+jnLPzen6WIVZdiwrRlMlo=
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.11.0 h1:JAKSXpt1YjtLA7YpPiqO9ss6sNXEsPfSGdwN0UHqzrw=
github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a h1:KfNOeFvoAssuZLT7IntKZElKwi/5LRuxY71k+t6rfaM=
github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=
@@ -331,7 +324,6 @@ github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -375,7 +367,6 @@ github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -427,7 +418,6 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
@@ -459,7 +449,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191108221443-4ba9e2ef068c h1:SRpq/kuj/xNci/RdvEs+RSvpfxqvLAzTKuKGlzoGdZQ=
golang.org/x/net v0.0.0-20191108221443-4ba9e2ef068c/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@@ -473,7 +462,6 @@ golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
@@ -489,7 +477,6 @@ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 h1:qwRHBd0NqMbJxfbotnDhm2ByMI1Shq4Y6oRJo21SGJA=
@@ -531,17 +518,14 @@ golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1 h1:ogLJMz+qpzav7lGMh10LMvAkM/fAoGlaiiHYiFYdm80=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f h1:Fqb3ao1hUmOR3GkUOg/Y+BadLwykBIzs5q8Ez2SbHyc=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -593,7 +577,6 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc
golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -616,7 +599,6 @@ google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz513
google.golang.org/api v0.32.0 h1:Le77IccnTqEa8ryp9wIpX5W3zYm7Gf9LhOp9PHcwFts=
google.golang.org/api v0.32.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
@@ -633,14 +615,12 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -654,10 +634,8 @@ gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3M
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View File

@@ -176,7 +176,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
for sandboxID, namespace := range sandboxes {
wg.Add(1)
go func(sandboxID, namespace string, results chan<- []*dto.MetricFamily) {
sandboxMetrics, err := km.getSandboxMetrics(sandboxID, namespace)
sandboxMetrics, err := getParsedMetrics(sandboxID)
if err != nil {
monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox")
}
@@ -229,13 +229,12 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
return err
}
}
return nil
}
// getSandboxMetrics will get sandbox's metrics from shim
func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.MetricFamily, error) {
body, err := km.doGet(sandboxID, namespace, defaultTimeout, "metrics")
func getParsedMetrics(sandboxID string) ([]*dto.MetricFamily, error) {
body, err := doGet(sandboxID, defaultTimeout, "metrics")
if err != nil {
return nil, err
}
@@ -243,6 +242,16 @@ func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.Me
return parsePrometheusMetrics(sandboxID, body)
}
// GetSandboxMetrics will get sandbox's metrics from shim
func GetSandboxMetrics(sandboxID string) (string, error) {
body, err := doGet(sandboxID, defaultTimeout, "metrics")
if err != nil {
return "", err
}
return string(body), nil
}
// parsePrometheusMetrics will decode metrics from Prometheus text format
// and return array of *dto.MetricFamily with an ASC order
func parsePrometheusMetrics(sandboxID string, body []byte) ([]*dto.MetricFamily, error) {

View File

@@ -87,13 +87,8 @@ func (km *KataMonitor) GetAgentURL(w http.ResponseWriter, r *http.Request) {
commonServeError(w, http.StatusBadRequest, err)
return
}
namespace, err := km.getSandboxNamespace(sandboxID)
if err != nil {
commonServeError(w, http.StatusBadRequest, err)
return
}
data, err := km.doGet(sandboxID, namespace, defaultTimeout, "agent-url")
data, err := doGet(sandboxID, defaultTimeout, "agent-url")
if err != nil {
commonServeError(w, http.StatusBadRequest, err)
return

View File

@@ -11,6 +11,8 @@ import (
"net"
"net/http"
"time"
shim "github.com/kata-containers/kata-containers/src/runtime/containerd-shim-v2"
)
const (
@@ -33,16 +35,13 @@ func getSandboxIDFromReq(r *http.Request) (string, error) {
return "", fmt.Errorf("sandbox not found in %+v", r.URL.Query())
}
func (km *KataMonitor) buildShimClient(sandboxID, namespace string, timeout time.Duration) (*http.Client, error) {
socketAddr, err := km.getMonitorAddress(sandboxID, namespace)
if err != nil {
return nil, err
}
return BuildUnixSocketClient(socketAddr, timeout)
// BuildShimClient builds and returns an http client for communicating with the provided sandbox
func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) {
return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout)
}
// BuildUnixSocketClient build http client for Unix socket
func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) {
// buildUnixSocketClient build http client for Unix socket
func buildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) {
transport := &http.Transport{
DisableKeepAlives: true,
Dial: func(proto, addr string) (conn net.Conn, err error) {
@@ -61,8 +60,8 @@ func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Clie
return client, nil
}
func (km *KataMonitor) doGet(sandboxID, namespace string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) {
client, err := km.buildShimClient(sandboxID, namespace, timeoutInSeconds)
func doGet(sandboxID string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) {
client, err := BuildShimClient(sandboxID, timeoutInSeconds)
if err != nil {
return nil, err
}

View File

@@ -54,6 +54,7 @@ const defaultDisableImageNvdimm = false
const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
const defaultRxRateLimiterMaxRate = uint64(0)
const defaultTxRateLimiterMaxRate = uint64(0)
const defaultConfidentialGuest = false
var defaultSGXEPCSize = int64(0)

View File

@@ -1,4 +1,4 @@
// Copyright (c) 2018 Intel Corporation
// Copyright (c) 2018-2021 Intel Corporation
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
@@ -61,6 +61,12 @@ type tomlConfig struct {
Runtime runtime
Factory factory
Netmon netmon
Image image
}
type image struct {
ServiceOffload bool `toml:"service_offload"`
Provision string `toml:"provision"`
}
type factory struct {
@@ -99,6 +105,7 @@ type hypervisor struct {
PFlashList []string `toml:"pflashes"`
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
EntropySourceList []string `toml:"valid_entropy_sources"`
EnableAnnotations []string `toml:"enable_annotations"`
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
@@ -129,6 +136,7 @@ type hypervisor struct {
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
}
type runtime struct {
@@ -153,6 +161,7 @@ type agent struct {
Debug bool `toml:"enable_debug"`
Tracing bool `toml:"enable_tracing"`
DebugConsoleEnabled bool `toml:"debug_console_enabled"`
DialTimeout uint32 `toml:"dial_timeout"`
}
type netmon struct {
@@ -470,6 +479,10 @@ func (a agent) debugConsoleEnabled() bool {
return a.DebugConsoleEnabled
}
func (a agent) dialTimout() uint32 {
return a.DialTimeout
}
func (a agent) debug() bool {
return a.Debug
}
@@ -557,6 +570,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
HugePages: h.HugePages,
@@ -663,6 +677,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
@@ -699,6 +714,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableAnnotations: h.EnableAnnotations,
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest,
}, nil
}
@@ -754,6 +770,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
HugePages: h.HugePages,
Mlock: !h.Swap,
@@ -830,6 +847,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
@@ -905,7 +923,7 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi
return nil
}
func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig, builtIn bool) error {
func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
for _, agent := range tomlConf.Agent {
config.AgentConfig = vc.KataAgentConfig{
LongLiveConn: true,
@@ -915,6 +933,7 @@ func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oc
TraceType: agent.traceType(),
KernelModules: agent.kernelModules(),
EnableDebugConsole: agent.debugConsoleEnabled(),
DialTimeout: agent.dialTimout(),
}
}
@@ -980,12 +999,12 @@ func SetKernelParams(runtimeConfig *oci.RuntimeConfig) error {
return nil
}
func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig, builtIn bool) error {
func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
if err := updateRuntimeConfigHypervisor(configPath, tomlConf, config); err != nil {
return err
}
if err := updateRuntimeConfigAgent(configPath, tomlConf, config, builtIn); err != nil {
if err := updateRuntimeConfigAgent(configPath, tomlConf, config); err != nil {
return err
}
@@ -1050,6 +1069,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate,
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
SGXEPCSize: defaultSGXEPCSize,
ConfidentialGuest: defaultConfidentialGuest,
}
}
@@ -1076,7 +1096,7 @@ func initConfig() (config oci.RuntimeConfig, err error) {
//
// All paths are resolved fully meaning if this function does not return an
// error, all paths are valid at the time of the call.
func LoadConfiguration(configPath string, ignoreLogging, builtIn bool) (resolvedConfigPath string, config oci.RuntimeConfig, err error) {
func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPath string, config oci.RuntimeConfig, err error) {
config, err = initConfig()
if err != nil {
@@ -1118,7 +1138,7 @@ func LoadConfiguration(configPath string, ignoreLogging, builtIn bool) (resolved
}).Info("loaded configuration")
}
if err := updateRuntimeConfig(resolved, tomlConf, &config, builtIn); err != nil {
if err := updateRuntimeConfig(resolved, tomlConf, &config); err != nil {
return "", config, err
}

View File

@@ -260,7 +260,7 @@ func testLoadConfiguration(t *testing.T, dir string,
assert.NoError(t, err)
}
resolvedConfigPath, config, err := LoadConfiguration(file, ignoreLogging, false)
resolvedConfigPath, config, err := LoadConfiguration(file, ignoreLogging)
if expectFail {
assert.Error(t, err)
@@ -566,7 +566,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
t.Error(err)
}
_, config, err := LoadConfiguration(configPath, false, false)
_, config, err := LoadConfiguration(configPath, false)
if err != nil {
t.Fatal(err)
}
@@ -1398,7 +1398,7 @@ func TestUpdateRuntimeConfigurationVMConfig(t *testing.T) {
},
}
err := updateRuntimeConfig("", tomlConf, &config, false)
err := updateRuntimeConfig("", tomlConf, &config)
assert.NoError(err)
assert.Equal(expectedVMConfig, config.HypervisorConfig.MemorySize)
@@ -1416,7 +1416,7 @@ func TestUpdateRuntimeConfigurationFactoryConfig(t *testing.T) {
tomlConf := tomlConfig{Factory: factory{Template: true}}
err := updateRuntimeConfig("", tomlConf, &config, false)
err := updateRuntimeConfig("", tomlConf, &config)
assert.NoError(err)
assert.Equal(expectedFactoryConfig, config.FactoryConfig)
@@ -1443,7 +1443,7 @@ func TestUpdateRuntimeConfigurationInvalidKernelParams(t *testing.T) {
}
}
err := updateRuntimeConfig("", tomlConf, &config, false)
err := updateRuntimeConfig("", tomlConf, &config)
assert.EqualError(err, "Empty kernel parameter")
}

View File

@@ -104,7 +104,7 @@ func SetEphemeralStorageType(ociSpec specs.Spec) specs.Spec {
// CreateSandbox create a sandbox container
func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeConfig oci.RuntimeConfig, rootFs vc.RootFs,
containerID, bundlePath, console string, disableOutput, systemdCgroup bool) (_ vc.VCSandbox, _ vc.Process, err error) {
span, ctx := Trace(ctx, "createSandbox")
span, ctx := Trace(ctx, "CreateSandbox", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("sandbox"), label.Key("container_id").String(containerID)}...)
defer span.End()
sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, console, disableOutput, systemdCgroup)
@@ -159,7 +159,7 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo
sid := sandbox.ID()
kataUtilsLogger = kataUtilsLogger.WithField("sandbox", sid)
span.SetAttributes(label.Key("sandbox").String(sid))
span.SetAttributes(label.Key("sandbox_id").String(sid))
containers := sandbox.GetAllContainers()
if len(containers) != 1 {
@@ -202,7 +202,7 @@ func checkForFIPS(sandboxConfig *vc.SandboxConfig) error {
func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Spec, rootFs vc.RootFs, containerID, bundlePath, console string, disableOutput bool) (vc.Process, error) {
var c vc.VCContainer
span, ctx := Trace(ctx, "createContainer")
span, ctx := Trace(ctx, "CreateContainer", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("sandbox"), label.Key("container_id").String(containerID)}...)
defer span.End()
ociSpec = SetEphemeralStorageType(ociSpec)
@@ -228,7 +228,7 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp
return vc.Process{}, err
}
span.SetAttributes(label.Key("sandbox").String(sandboxID))
span.SetAttributes(label.Key("sandbox_id").String(sandboxID))
c, err = sandbox.CreateContainer(ctx, contConfig)
if err != nil {

View File

@@ -26,11 +26,9 @@ func hookLogger() *logrus.Entry {
}
func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error {
span, _ := Trace(ctx, "hook")
span, _ := Trace(ctx, "runHook", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("hook")}...)
defer span.End()
span.SetAttributes(label.Key("subsystem").String("runHook"))
// FIXME
// span.LogFields(
// log.String("hook-name", hook.Path),
@@ -90,11 +88,9 @@ func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error
}
func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType string) error {
span, _ := Trace(ctx, "hooks")
span, _ := Trace(ctx, "runHooks", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("hook"), label.Key("type").String(hookType)}...)
defer span.End()
span.SetAttributes(label.Key("subsystem").String(hookType))
for _, hook := range hooks {
if err := runHook(ctx, hook, cid, bundlePath); err != nil {
hookLogger().WithFields(logrus.Fields{

View File

@@ -30,7 +30,7 @@ var _ export.SpanExporter = (*kataSpanExporter)(nil)
// ExportSpans exports SpanData to Jaeger.
func (e *kataSpanExporter) ExportSpans(ctx context.Context, spans []*export.SpanData) error {
for _, span := range spans {
kataUtilsLogger.Infof("Reporting span %+v", span)
kataUtilsLogger.Tracef("Reporting span %+v", span)
}
return nil
}
@@ -110,12 +110,11 @@ func StopTracing(ctx context.Context) {
}
// Trace creates a new tracing span based on the specified name and parent
// context.
func Trace(parent context.Context, name string) (otelTrace.Span, context.Context) {
// context and an opentelemetry label.KeyValue slice for span attributes.
func Trace(parent context.Context, name string, tags ...label.KeyValue) (otelTrace.Span, context.Context) {
tracer := otel.Tracer("kata")
ctx, span := tracer.Start(parent, name)
span.SetAttributes(label.Key("source").String("runtime"))
ctx, span := tracer.Start(parent, name, otelTrace.WithAttributes(tags...))
// This is slightly confusing: when tracing is disabled, trace spans
// are still created - but the tracer used is a NOP. Therefore, only

Some files were not shown because too many files have changed in this diff Show More