Merge pull request #2015 from fidencio/2.2.0-alpha0-branch-bump

# Kata Containers 2.2.0-alpha0
Merge pull request #1995 from GabyCT/topic/removetravisreference
2026-03-17 02:02:34 +00:00 · 2021-06-11 18:51:08 +02:00 · 2021-06-11 09:23:47 -05:00 · 2021-06-11 16:10:01 +02:00 · 2021-06-11 00:31:51 +02:00 · 2021-06-11 00:31:34 +02:00
316 changed files with 14373 additions and 6500 deletions
--- a/.github/workflows/kata-deploy-test.yaml
+++ b/.github/workflows/kata-deploy-test.yaml
@@ -1,9 +1,12 @@
-on: issue_comment
+on:
+  issue_comment:
+    types: [created, edited]
+
 name: test-kata-deploy
+
 jobs:
  check_comments:
    if: ${{ github.event.issue.pull_request }}
-    types: [created, edited]
    runs-on: ubuntu-latest
    steps:
      - name: Check for Command
@@ -11,7 +14,7 @@ jobs:
        uses: kata-containers/slash-command-action@v1
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
-          command: "test-kata-deploy"
+          command: "test_kata_deploy"
          reaction: "true"
          reaction-type: "eyes"
          allow-edits: "false"
@@ -19,6 +22,7 @@ jobs:
      - name: verify command arg is kata-deploy
        run: |
           echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
+
  create-and-test-container:
    needs: check_comments
    runs-on: ubuntu-latest
@@ -29,22 +33,26 @@ jobs:
            ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed  's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
            echo "reference for PR: " ${ref}
            echo "##[set-output name=pr-ref;]${ref}"
-      - uses: actions/checkout@v2-beta
+
+      - name: check out
+        uses: actions/checkout@v2
        with:
-          ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
+           ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
+
      - name: build-container-image
        id: build-container-image
        run: |
            PR_SHA=$(git log --format=format:%H -n1)
-            VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/main/VERSION)
+            VERSION="2.0.0"
            ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
-            wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
-            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
+            wget "${ARTIFACT_URL}" -O tools/packaging/kata-deploy/kata-static.tar.xz
+            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./tools/packaging/kata-deploy
            docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
            docker push katadocker/kata-deploy-ci:$PR_SHA
            echo "##[set-output name=pr-sha;]${PR_SHA}"
+
      - name: test-kata-deploy-ci-in-aks
-        uses: ./kata-deploy/action
+        uses: ./tools/packaging/kata-deploy/action
        with:
          packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
        env:
--- a/.github/workflows/require-pr-porting-labels.yaml
+++ b/.github/workflows/require-pr-porting-labels.yaml
@@ -12,6 +12,9 @@ on:
      - reopened
      - labeled
      - unlabeled
+   pull_request:
+     branches:
+      - main

 jobs:
  check-pr-porting-labels:
--- a/.github/workflows/snap-release.yaml
+++ b/.github/workflows/snap-release.yaml
@@ -33,5 +33,5 @@ jobs:
          snap_file="kata-containers_${snap_version}_amd64.snap"
          # Upload the snap if it exists
          if [ -f ${snap_file} ]; then
-            snapcraft upload --release=candidate ${snap_file}
+            snapcraft upload --release=stable ${snap_file}
          fi
--- a/2
+++ b/2
@@ -1 +1 @@
-2.1-alpha1
+2.2.0-alpha0
--- a/ci/install_yq.sh
+++ b/ci/install_yq.sh
@@ -18,7 +18,9 @@ function install_yq() {
 	GOPATH=${GOPATH:-${HOME}/go}
 	local yq_path="${GOPATH}/bin/yq"
 	local yq_pkg="github.com/mikefarah/yq"
-	[ -x  "${GOPATH}/bin/yq" ] && return
+	local yq_version=3.4.1
+
+	[ -x  "${GOPATH}/bin/yq" ] && [ "`${GOPATH}/bin/yq --version`"X == "yq version ${yq_version}"X ] && return

 	read -r -a sysInfo <<< "$(uname -sm)"

@@ -56,8 +58,6 @@ function install_yq() {
 		die "Please install curl"
 	fi

-	local yq_version=3.4.1
-
 	## NOTE: ${var,,} => gives lowercase value of var
 	local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
 	curl -o "${yq_path}" -LSsf "${yq_url}"
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -7,16 +7,25 @@ export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
 export tests_repo_dir="$GOPATH/src/$tests_repo"
 export branch="${branch:-main}"

+# Clones the tests repository and checkout to the branch pointed out by
+# the global $branch variable.
+# If the clone exists and `CI` is exported then it does nothing. Otherwise
+# it will clone the repository or `git pull` the latest code.
+#
 clone_tests_repo()
 {
-	if [ -d "$tests_repo_dir" -a -n "$CI" ]
-	then
-		return
+	if [ -d "$tests_repo_dir" ]; then
+		[ -n "$CI" ] && return
+		pushd "${tests_repo_dir}"
+		git checkout "${branch}"
+		git pull
+		popd
+	else
+		git clone -q "https://${tests_repo}" "$tests_repo_dir"
+		pushd "${tests_repo_dir}"
+		git checkout "${branch}"
+		popd
 	fi
-
-	go get -d -u "$tests_repo" || true
-
-	pushd "${tests_repo_dir}" && git checkout "${branch}" && popd
 }

 run_static_checks()
--- a/ci/openshift-ci/images/Dockerfile.buildroot
+++ b/ci/openshift-ci/images/Dockerfile.buildroot
@@ -0,0 +1,9 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# This is the build root image for Kata Containers on OpenShift CI.
+#
+FROM centos:8
+
+RUN yum -y update && yum -y install git sudo wget
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -1,54 +1,55 @@
-* [Warning](#warning)
-* [Assumptions](#assumptions)
-* [Initial setup](#initial-setup)
-* [Requirements to build individual components](#requirements-to-build-individual-components)
-* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
-* [Check hardware requirements](#check-hardware-requirements)
-    * [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
-    * [Enable full debug](#enable-full-debug)
-        * [debug logs and shimv2](#debug-logs-and-shimv2)
-            * [Enabling full `containerd` debug](#enabling-full-containerd-debug)
-            * [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
-            * [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
-        * [journald rate limiting](#journald-rate-limiting)
-            * [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
-            * [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
-* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
-    * [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
-    * [Get the osbuilder](#get-the-osbuilder)
-    * [Create a rootfs image](#create-a-rootfs-image)
-        * [Create a local rootfs](#create-a-local-rootfs)
-        * [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
-        * [Build a rootfs image](#build-a-rootfs-image)
-        * [Install the rootfs image](#install-the-rootfs-image)
-    * [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
-        * [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
-        * [Build an initrd image](#build-an-initrd-image)
-        * [Install the initrd image](#install-the-initrd-image)
-* [Install guest kernel images](#install-guest-kernel-images)
-* [Install a hypervisor](#install-a-hypervisor)
-    * [Build a custom QEMU](#build-a-custom-qemu)
-        * [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
-* [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
-* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
-* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
-* [Appendices](#appendices)
-    * [Checking Docker default runtime](#checking-docker-default-runtime)
-    * [Set up a debug console](#set-up-a-debug-console)
-      * [Simple debug console setup](#simple-debug-console-setup)
-          * [Enable agent debug console](#enable-agent-debug-console)
-          * [Connect to debug console](#connect-to-debug-console)
-      * [Traditional debug console setup](#traditional-debug-console-setup)
-          * [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
-          * [Build the debug image](#build-the-debug-image)
-          * [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
-          * [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
-              * [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
-              * [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
-          * [Create a container](#create-a-container)
-          * [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
-    * [Obtain details of the image](#obtain-details-of-the-image)
-    * [Capturing kernel boot logs](#capturing-kernel-boot-logs)
+- [Warning](#warning)
+- [Assumptions](#assumptions)
+- [Initial setup](#initial-setup)
+- [Requirements to build individual components](#requirements-to-build-individual-components)
+- [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
+- [Check hardware requirements](#check-hardware-requirements)
+  - [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
+  - [Enable full debug](#enable-full-debug)
+    - [debug logs and shimv2](#debug-logs-and-shimv2)
+      - [Enabling full `containerd` debug](#enabling-full-containerd-debug)
+      - [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
+      - [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
+    - [journald rate limiting](#journald-rate-limiting)
+      - [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
+      - [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
+- [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
+  - [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
+  - [Get the osbuilder](#get-the-osbuilder)
+  - [Create a rootfs image](#create-a-rootfs-image)
+    - [Create a local rootfs](#create-a-local-rootfs)
+    - [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
+    - [Build a rootfs image](#build-a-rootfs-image)
+    - [Install the rootfs image](#install-the-rootfs-image)
+  - [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
+    - [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
+    - [Build an initrd image](#build-an-initrd-image)
+    - [Install the initrd image](#install-the-initrd-image)
+- [Install guest kernel images](#install-guest-kernel-images)
+- [Install a hypervisor](#install-a-hypervisor)
+  - [Build a custom QEMU](#build-a-custom-qemu)
+    - [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
+- [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
+- [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
+- [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
+- [Appendices](#appendices)
+  - [Checking Docker default runtime](#checking-docker-default-runtime)
+  - [Set up a debug console](#set-up-a-debug-console)
+    - [Simple debug console setup](#simple-debug-console-setup)
+      - [Enable agent debug console](#enable-agent-debug-console)
+      - [Start `kata-monitor` - ONLY NEEDED FOR 2.0.x](#start-kata-monitor---only-needed-for-20x)
+      - [Connect to debug console](#connect-to-debug-console)
+    - [Traditional debug console setup](#traditional-debug-console-setup)
+      - [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
+      - [Build the debug image](#build-the-debug-image)
+      - [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
+      - [Create a container](#create-a-container)
+      - [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
+        - [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
+        - [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
+        - [Connecting to the debug console](#connecting-to-the-debug-console)
+  - [Obtain details of the image](#obtain-details-of-the-image)
+  - [Capturing kernel boot logs](#capturing-kernel-boot-logs)

 # Warning

@@ -304,7 +305,7 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `f
 > - You should only do this step if you are testing with the latest version of the agent.

 ```
-$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
+$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/usr/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
 $ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
 $ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
 ```
@@ -384,31 +385,56 @@ You can build and install the guest kernel image as shown [here](../tools/packag

 # Install a hypervisor

-When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
+When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
+`QEMU` VMM is installed automatically. Cloud-Hypervisor and Firecracker VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
+You may choose to manually build your VMM/hypervisor.

 ## Build a custom QEMU

-Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
+Kata Containers makes use of upstream QEMU branch. The exact version
+and repository utilized can be found by looking at the [versions file](../versions.yaml).

+Find the correct version of QEMU from the versions file:
 ```
-$ go get -d github.com/kata-containers/qemu
-$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/kata-containers/versions.yaml | cut -d '"' -f2)
-$ cd ${GOPATH}/src/github.com/kata-containers/qemu
-$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
-$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
+$ source ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/lib.sh
+$ qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version")
+$ echo ${qemu_version}
+```
+Get source from the matching branch of QEMU:
+```
+$ go get -d github.com/qemu/qemu
+$ cd ${GOPATH}/src/github.com/qemu/qemu
+$ git checkout ${qemu_version}
+$ your_qemu_directory=${GOPATH}/src/github.com/qemu/qemu
 ```

-To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
-
+There are scripts to manage the build and packaging of QEMU. For the examples below, set your
+environment as:
+```
+$ go get -d github.com/kata-containers/kata-containers
+$ packaging_dir="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging"
+```
+
+Kata often utilizes patches for not-yet-upstream and/or backported fixes for components,
+including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches),
+and it's *recommended* that you apply them. For example, suppose that you are going to build QEMU
+version 5.2.0, do:
 ```
-$ go get -d github.com/kata-containers/kata-containers/tools/packaging
 $ cd $your_qemu_directory
-$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
+$ $packaging_dir/scripts/apply_patches.sh $packaging_dir/qemu/patches/5.2.x/
+```
+
+To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
+```
+$ cd $your_qemu_directory
+$ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
 $ eval ./configure "$(cat kata.cfg)"
 $ make -j $(nproc)
 $ sudo -E make install
 ```

+See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata.
+
 ### Build a custom QEMU for aarch64/arm64 - REQUIRED
 > **Note:**
 >
@@ -476,6 +502,16 @@ debug_console_enabled = true

 This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.

+#### Start `kata-monitor` - ONLY NEEDED FOR 2.0.x
+
+For Kata Containers `2.0.x` releases, the `kata-runtime exec` command depends on the`kata-monitor` running, in order to get the sandbox's `vsock` address to connect to. Thus, first start the `kata-monitor` process.
+
+```
+$ sudo kata-monitor
+```
+
+`kata-monitor` will serve at `localhost:8090` by default.
+
 #### Connect to debug console

 Command `kata-runtime exec` is used to connect to the debug console.
@@ -613,8 +649,11 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
 > **Note** Ports 1024 and 1025 are reserved for communication with the agent
 > and gathering of agent logs respectively. 

-Next, connect to the debug console. The VSOCKS paths vary slightly between
-cloud-hypervisor and firecracker.
+##### Connecting to the debug console
+
+Next, connect to the debug console. The VSOCKS paths vary slightly between each
+VMM solution.
+
 In case of cloud-hypervisor, connect to the `vsock` as shown:
 ```
 $ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
@@ -631,6 +670,12 @@ CONNECT 1026

 **Note**: You need to press the `RETURN` key to see the shell prompt.

+
+For QEMU, connect to the `vsock` as shown:
+```
+$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"
+```
+
 To disconnect from the virtual machine, type `CONTROL+q` (hold down the
 `CONTROL` key and press `q`).

--- a/docs/Limitations.md
+++ b/docs/Limitations.md
@@ -19,6 +19,8 @@
        * [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
        * [docker --net=host](#docker---nethost)
        * [docker run --link](#docker-run---link)
+    * [Storage limitations](#storage-limitations)
+        * [Kubernetes `volumeMounts.subPaths`](#kubernetes-volumemountssubpaths)
    * [Host resource sharing](#host-resource-sharing)
        * [docker run --privileged](#docker-run---privileged)
 * [Miscellaneous](#miscellaneous)
@@ -26,7 +28,7 @@
 * [Appendices](#appendices)
    * [The constraints challenge](#the-constraints-challenge)

---
+***

 # Overview

@@ -92,7 +94,9 @@ This section lists items that might be possible to fix.
 ### checkpoint and restore

 The runtime does not provide `checkpoint` and `restore` commands. There
-are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
+are discussions about using VM save and restore to give us a
+`[criu](https://github.com/checkpoint-restore/criu)`-like functionality,
+which might provide a solution.

 Note that the OCI standard does not specify `checkpoint` and `restore`
 commands.
@@ -216,6 +220,17 @@ Equivalent functionality can be achieved with the newer docker networking comman
 See more documentation at
 [docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).

+## Storage limitations
+
+### Kubernetes `volumeMounts.subPaths`
+
+Kubernetes `volumeMount.subPath` is not supported by Kata Containers at the
+moment.
+
+See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
+[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
+
+
 ## Host resource sharing

 ### docker run --privileged
@@ -224,7 +239,7 @@ Privileged support in Kata is essentially different from `runc` containers.
 Kata does support `docker run --privileged` command, but in this case full access
 to the guest VM is provided in addition to some host access.

-The container runs with elevated capabilities within the guest and is granted 
+The container runs with elevated capabilities within the guest and is granted
 access to guest devices instead of the host devices.
 This is also true with using `securityContext privileged=true` with Kubernetes.

--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -18,8 +18,7 @@
 ## Requirements

 - [hub](https://github.com/github/hub)
-
- OBS account with permissions on [`/home:katacontainers`](https://build.opensuse.org/project/subprojects/home:katacontainers)
+  * Using an [application token](https://github.com/settings/tokens) is required for hub.

 - GitHub permissions to push tags and create releases in Kata repositories.

@@ -30,16 +29,12 @@

 ## Release Process

+
 ### Bump all Kata repositories

-  - We have set up a Jenkins job to bump the version in the `VERSION` file in all Kata repositories. Go to the [Jenkins bump-job page](http://jenkins.katacontainers.io/job/release/build) to trigger a new job.
-  - Start a new job with variables for the job passed as:
-     - `BRANCH=<the-branch-you-want-to-bump>`
-     - `NEW_VERSION=<the-new-kata-version>`
-
-     For example, in the case where you want to make a patch release `1.10.2`, the variable `NEW_VERSION` should be `1.10.2` and `BRANCH` should point to  `stable-1.10`. In case of an alpha or release candidate release, `BRANCH` should point to `master` branch.
-
-  Alternatively, you can also bump the repositories using a script in the Kata packaging repo
+  Bump the repositories using a script in the Kata packaging repo, where:
+  - `BRANCH=<the-branch-you-want-to-bump>`
+  - `NEW_VERSION=<the-new-kata-version>`
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  $ export NEW_VERSION=<the-new-kata-version>
@@ -47,6 +42,23 @@
  $ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
  ```

+### Point tests repository to stable branch
+
+  If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then
+  you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch.
+  The objective is that changes in the CI on the main branch will not impact the stable branch.
+
+  In the test directory, change references the main branch in:
+  * `README.md`
+  * `versions.yaml`
+  * `cmd/github-labels/labels.yaml.in`
+  * `cmd/pmemctl/pmemctl.sh`
+  * `.ci/lib.sh`
+  * `.ci/static-checks.sh`
+
+  See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes.
+
+
 ### Merge all bump version Pull requests

  - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
@@ -56,7 +68,7 @@
 ### Tag all Kata repositories

  Once all the pull requests to bump versions in all Kata repositories are merged,
-  tag all the repositories as shown below.  
+  tag all the repositories as shown below.
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  $ git checkout  <kata-branch-to-release>
@@ -66,7 +78,7 @@

 ### Check Git-hub Actions

-  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
+  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.

  Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).

--- a/docs/Stable-Branch-Strategy.md
+++ b/docs/Stable-Branch-Strategy.md
@@ -32,16 +32,16 @@ provides additional information regarding release `99.123.77` in the previous ex
  changing the existing behavior*.

 - When `MAJOR` increases, the new release adds **new features, bug fixes, or
-  both** and which *changes the behavior from the previous release* (incompatible with previous releases).
+  both** and which **changes the behavior from the previous release** (incompatible with previous releases).

  A major release will also likely require a change of the container manager version used, 
-for example Docker\*. Please refer to the release notes for further details.
+for example Containerd or CRI-O. Please refer to the release notes for further details.

 ## Release Strategy

 Any new features added since the last release will be available in the next minor
 release. These will include bug fixes as well. To facilitate a stable user environment, 
-Kata provides stable branch-based releases and a master branch release.
+Kata provides stable branch-based releases and a main branch release.

 ## Stable branch patch criteria

@@ -49,9 +49,10 @@ No new features should be introduced to stable branches.  This is intended to li
 providing only bug and security fixes.

 ## Branch Management
-Kata Containers will maintain two stable release branches in addition to the master branch.
-Once a new MAJOR or MINOR release is created from master, a new stable branch is created for
-the prior MAJOR or MINOR release and the older stable branch is no longer maintained. End of
+Kata Containers will maintain **one** stable release branch, in addition to the main branch, for
+each active major release.
+Once a new MAJOR or MINOR release is created from main, a new stable branch is created for
+the prior MAJOR or MINOR release and the previous stable branch is no longer maintained. End of
 maintenance for a branch is announced on the Kata Containers mailing list.  Users can determine
 the version currently installed by running `kata-runtime kata-env`. It is recommended to use the
 latest stable branch available.
@@ -61,59 +62,59 @@ A couple of examples follow to help clarify this process.
 ### New bug fix introduced

 A bug fix is submitted against the runtime which does not introduce new inter-component dependencies.
-This fix is applied to both the master and stable branches, and there is no need to create a new
+This fix is applied to both the main and stable branches, and there is no need to create a new
 stable branch.

 | Branch | Original version | New version |
 |--|--|--|
-| `master` | `1.3.0-rc0` | `1.3.0-rc1` |
-| `stable-1.2` | `1.2.0` | `1.2.1` |
-| `stable-1.1` | `1.1.2` | `1.1.3` |
+| `main` | `2.3.0-rc0` | `2.3.0-rc1` |
+| `stable-2.2` | `2.2.0` | `2.2.1` |
+| `stable-2.1` | (unmaintained) | (unmaintained) |


 ### New release made feature or change adding new inter-component dependency

 A new feature is introduced, which adds a new inter-component dependency. In this case a new stable
-branch is created (stable-1.3) starting from master and the older stable branch (stable-1.1)
+branch is created (stable-2.3) starting from main and the previous stable branch (stable-2.2)
 is dropped from maintenance.


 | Branch | Original version | New version |
 |--|--|--|
-| `master` | `1.3.0-rc1` | `1.3.0` |
-| `stable-1.3` | N/A| `1.3.0` |
-| `stable-1.2` | `1.2.1` | `1.2.2` |
-| `stable-1.1` | `1.1.3` | (unmaintained) |
+| `main` | `2.3.0-rc1` | `2.3.0` |
+| `stable-2.3` | N/A| `2.3.0` |
+| `stable-2.2` | `2.2.1` | (unmaintained) |
+| `stable-2.1` | (unmaintained) | (unmaintained) |

-Note, the stable-1.1 branch will still exist with tag 1.1.3, but under current plans it is
-not maintained further. The next tag applied to master will be 1.4.0-alpha0. We would then
+Note, the stable-2.2 branch will still exist with tag 2.2.1, but under current plans it is
+not maintained further. The next tag applied to main will be 2.4.0-alpha0. We would then
 create a couple of alpha releases gathering features targeted for that particular release (in
-this case 1.4.0), followed by a release candidate. The release candidate marks a feature freeze.
+this case 2.4.0), followed by a release candidate. The release candidate marks a feature freeze.
 A new stable branch is created for the release candidate. Only bug fixes and any security issues
-are added to the branch going forward until release 1.4.0 is made.
+are added to the branch going forward until release 2.4.0 is made.
   
 ## Backporting Process 

-Development that occurs against the master branch and applicable code commits should also be submitted
+Development that occurs against the main branch and applicable code commits should also be submitted
 against the stable branches. Some guidelines for this process follow::
  1. Only bug and security fixes which do not introduce inter-component dependencies are
 candidates for stable branches. These PRs should be marked with "bug" in GitHub.
-  2. Once a PR is created against master which meets requirement of (1), a comparable one
+  2. Once a PR is created against main which meets requirement of (1), a comparable one
 should also be submitted against the stable branches. It is the responsibility of the submitter
 to apply their pull request against stable, and it is the responsibility of the
 reviewers to help identify stable-candidate pull requests.
 
 ## Continuous Integration Testing

-The test repository is forked to create stable branches from master. Full CI
-runs on each stable and master PR using its respective tests repository branch.
+The test repository is forked to create stable branches from main. Full CI
+runs on each stable and main PR using its respective tests repository branch.

 ### An alternative method for CI testing:

-Ideally, the continuous integration infrastructure will run the same test suite on both master
+Ideally, the continuous integration infrastructure will run the same test suite on both main
 and the stable branches.  When tests are modified or new feature tests are introduced, explicit
 logic should exist within the testing CI to make sure only applicable tests are executed against
-stable and master. While this is not in place currently, it should be considered in the long term.
+stable and main. While this is not in place currently, it should be considered in the long term.

 ## Release Management

@@ -121,7 +122,7 @@ stable and master. While this is not in place currently, it should be considered

 Releases are made every three weeks, which include a GitHub release as
 well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
-for the next `MAJOR` or `MINOR` is created from master. If there are no changes across all the repositories, no
+for the next `MAJOR` or `MINOR` is created from main. If there are no changes across all the repositories, no
 release is created and an announcement is made on the developer mailing list to highlight this.
 If a release is being made, each repository is tagged for this release, regardless
 of whether changes are introduced. The release schedule can be seen on the
@@ -142,10 +143,10 @@ maturity, we have increased the cadence from six weeks to twelve weeks. The rele
 ### Compatibility
 Kata guarantees compatibility between components that are within one minor release of each other. 
 
-This is critical for dependencies which cross between host (runtime, shim, proxy) and
+This is critical for dependencies which cross between host (shimv2 runtime) and
 the guest (hypervisor, rootfs and agent).  For example, consider a cluster with a long-running
-deployment, workload-never-dies, all on Kata version 1.1.3 components. If the operator updates
-the Kata components to the next new minor release (i.e. 1.2.0), we need to guarantee that the 1.2.0
-runtime still communicates with 1.1.3 agent within workload-never-dies.
+deployment, workload-never-dies, all on Kata version 2.1.3 components. If the operator updates
+the Kata components to the next new minor release (i.e. 2.2.0), we need to guarantee that the 2.2.0
+shimv2 runtime still communicates with 2.1.3 agent within workload-never-dies.

 Handling live-update is out of the scope of this document. See this [`kata-runtime` issue](https://github.com/kata-containers/runtime/issues/492) for details.
--- a/docs/how-to/README.md
+++ b/docs/how-to/README.md
@@ -37,3 +37,4 @@
 - [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
 - [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
 - [How to monitor Kata Containers in K8s](how-to-set-prometheus-in-k8s.md)
+- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)
--- a/docs/how-to/how-to-hotplug-memory-arm64.md
+++ b/docs/how-to/how-to-hotplug-memory-arm64.md
@@ -0,0 +1,32 @@
+# How to use memory hotplug feature in Kata Containers on arm64
+
+- [Introduction](#introduction)
+- [Install UEFI ROM](#install-uefi-rom)
+- [Run for test](#run-for-test)
+
+## Introduction
+
+Memory hotplug is a key feature for containers to allocate memory dynamically in deployment.
+As Kata Container bases on VM, this feature needs support both from VMM and guest kernel. Luckily, it has been fully supported for the current default version of QEMU and guest kernel used by Kata on arm64. For other VMMs, e.g, Cloud Hypervisor, the enablement work is on the road. Apart from VMM and guest kernel, memory hotplug also depends on ACPI which depends on firmware either. On x86, you can boot a VM using QEMU with ACPI enabled directly, because it boots up with firmware implicitly. For arm64, however, you need specify firmware explicitly. That is to say, if you are ready to run a normal Kata Container on arm64, what you need extra to do is to install the UEFI ROM before use the memory hotplug feature.
+
+## Install UEFI ROM
+
+We have offered a helper script for you to install the UEFI ROM. If you have installed Kata normally on your host, you just need to run the script as fellows:
+
+```bash
+$ pushd $GOPATH/src/github.com/kata-containers/tests
+$ sudo .ci/aarch64/install_rom_aarch64.sh
+$ popd
+```
+
+## Run for test
+
+Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test.
+
+```bash
+$ sudo ctr image pull docker.io/library/ubuntu:latest
+$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
+$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --memory-limit 536870912 --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
+```
+
+Compare the results between the two tests. If the latter is 0.5G larger than the former, you have done what you want, and congratulation!
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -26,6 +26,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
 | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
 | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
+| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process |

 ## Agent Options
 | Key | Value Type | Comments |
@@ -60,7 +61,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
 | `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) |
 | `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) |
-| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
+| `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
 | `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory |
 | `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
 | `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
@@ -95,6 +96,8 @@ There are several kinds of Kata configurations and they are listed below.

 In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.

+# containerd Configuration
+
 For containerd, annotations specified in the pod spec are passed down to Kata
 starting with version `1.3.0` of containerd. Additionally, extra configuration is
 needed for containerd, by providing a `pod_annotations` field in the containerd config
@@ -107,11 +110,9 @@ for passing annotations to Kata from containerd:
 $ cat /etc/containerd/config
 ....

-[plugins.cri.containerd.runtimes.kata]
-           runtime_type = "io.containerd.runc.v1"
+         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
+           runtime_type = "io.containerd.kata.v2"
           pod_annotations = ["io.katacontainers.*"]
-           [plugins.cri.containerd.runtimes.kata.options]
-             BinaryName = "/usr/bin/kata-runtime"
 ....

 ```
@@ -197,6 +198,7 @@ the configuration entry:
 | Key | Config file entry | Comments |
 |-------| ----- | ----- |
 | `ctlpath`  | `valid_ctlpaths` | Valid paths for `acrnctl` binary |
+| `entropy_source` | `valid_entropy_sources` | Valid entropy sources, e.g. `/dev/random` |
 | `file_mem_backend`  | `valid_file_mem_backends` | Valid locations for the file-based memory backend root directory |
 | `jailer_path`  | `valid_jailer_paths`| Valid paths for the jailer constraining the container VM (Firecracker) |
 | `path`  | `valid_hypervisor_paths` | Valid hypervisors to run the container VM |
--- a/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
+++ b/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
@@ -7,9 +7,10 @@
    * [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
    * [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
 * [Start Kubernetes](#start-kubernetes)
-* [Install a Pod Network](#install-a-pod-network)
+* [Configure Pod Network](#configure-pod-network)
 * [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
-* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
+* [Create runtime class for Kata Containers](#create-runtime-class-for-kata-containers)
+* [Run pod in Kata Containers](#run-pod-in-kata-containers)
 * [Delete created pod](#delete-created-pod)

 This document describes how to set up a single-machine Kubernetes (k8s) cluster.
@@ -18,9 +19,6 @@ The Kubernetes cluster will use the
 [CRI containerd plugin](https://github.com/containerd/cri) and
 [Kata Containers](https://katacontainers.io) to launch untrusted workloads.

-For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
-to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
-
 ## Requirements

 - Kubernetes, Kubelet, `kubeadm`
@@ -125,43 +123,33 @@ $ sudo systemctl daemon-reload
  $ sudo -E kubectl get pods
  ```

-## Install a Pod Network
+## Configure Pod Network

 A pod network plugin is needed to allow pods to communicate with each other.
+You can find more about CNI plugins from the [Creating a cluster with `kubeadm`](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions) guide.

- Install the `flannel` plugin by following the
-  [Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
-  guide, starting from the **Installing a pod network** section.
-
- Create a pod network using flannel
-
-  > **Note:** There is no known way to determine programmatically the best version (commit) to use.
-  > See https://github.com/coreos/flannel/issues/995.
+By default the CNI plugin binaries is installed under `/opt/cni/bin` (in package `kubernetes-cni`), you only need to create a configuration file for CNI plugin.

  ```bash
-  $ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
-  ```
+  $ sudo -E mkdir -p /etc/cni/net.d

- Wait for the pod network to become available
-
-  ```bash
-  # number of seconds to wait for pod network to become available
-  $ timeout_dns=420
-
-  $ while [ "$timeout_dns" -gt 0 ]; do
-      if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
-          break
-      fi
-
-      sleep 1s
-      ((timeout_dns--))
-   done
-  ```
-
- Check the pod network is running
-
-  ```bash
-  $ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
+  $ sudo -E cat > /etc/cni/net.d/10-mynet.conf <<EOF
+  {
+    "cniVersion": "0.2.0",
+    "name": "mynet",
+    "type": "bridge",
+    "bridge": "cni0",
+    "isGateway": true,
+    "ipMasq": true,
+    "ipam": {
+      "type": "host-local",
+      "subnet": "172.19.0.0/24",
+      "routes": [
+        { "dst": "0.0.0.0/0" }
+      ]
+    }
+  }
+  EOF
  ```

 ## Allow pods to run in the master node
@@ -172,24 +160,38 @@ By default, the cluster will not schedule pods in the master node. To enable mas
 $ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
 ```

-## Create an untrusted pod using Kata Containers
+## Create runtime class for Kata Containers

 By default, all pods are created with the default runtime configured in CRI containerd plugin.
+From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods.

-If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
+```bash
+$ cat > runtime.yaml <<EOF
+apiVersion: node.k8s.io/v1beta1
+kind: RuntimeClass
+metadata:
+  name: kata
+handler: kata
+EOF
+
+$ sudo -E kubectl apply -f runtime.yaml
+```
+
+## Run pod in Kata Containers
+
+If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the
 [Kata Containers runtime](../../src/runtime/README.md).

- Create an untrusted pod configuration
+- Create an pod configuration that using Kata Containers runtime

  ```bash
-  $ cat << EOT | tee nginx-untrusted.yaml
+  $ cat << EOT | tee nginx-kata.yaml
  apiVersion: v1
  kind: Pod
  metadata:
-    name: nginx-untrusted
-    annotations:
-      io.kubernetes.cri.untrusted-workload: "true"
+    name: nginx-kata
  spec:
+    runtimeClassName: kata
    containers:
    - name: nginx
      image: nginx
@@ -197,9 +199,9 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
  EOT
  ```

- Create an untrusted pod
+- Create the pod
  ```bash
-  $ sudo -E kubectl apply -f nginx-untrusted.yaml
+  $ sudo -E kubectl apply -f nginx-kata.yaml
  ```

 - Check pod is running
@@ -216,5 +218,5 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
 ## Delete created pod

 ```bash
-$ sudo -E kubectl delete -f nginx-untrusted.yaml
+$ sudo -E kubectl delete -f nginx-kata.yaml
 ```
--- a/docs/how-to/how-to-use-virtio-mem-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-mem-with-kata.md
@@ -13,26 +13,23 @@ Kata Containers with `virtio-mem` supports memory resize.

 ## Requisites

-Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
-The Linux kernel and QEMU upstream version still not support `virtio-mem`.  @davidhildenbrand is working on them.
-Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
+Kata Containers just supports `virtio-mem` with QEMU.
+Install and setup Kata Containers as shown [here](../install/README.md).

-The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
-The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
-
-The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem.  Its base version of QEMU cannot work with Kata Containers.  So merge the commit of `virtio-mem` to upstream QEMU.)
-
-Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
-```toml
-[hypervisor.qemu]
-path = "qemu-dir"
-kernel = "vmlinux-dir"
+### With x86_64
+The `virtio-mem` config of the x86_64 Kata Linux kernel is open.
+Enable `virtio-mem` as follows:
+```
+$ sudo sed -i -e 's/^#enable_virtio_mem.*$/enable_virtio_mem = true/g' /etc/kata-containers/configuration.toml
 ```

-Enable `virtio-mem` with following line in the Kata Containers configuration:
-```toml
-enable_virtio_mem = true
+### With other architectures
+The `virtio-mem` config of the others Kata Linux kernel is not open.
+You can open `virtio-mem` config as follows:
 ```
+CONFIG_VIRTIO_MEM=y
+```
+Then you can build and install the guest kernel image as shown [here](../../tools/packaging/kernel/README.md#build-kata-containers-kernel).

 ## Run a Kata Container utilizing `virtio-mem`

@@ -41,13 +38,35 @@ Use following command to enable memory overcommitment of a Linux kernel.  Becaus
 $ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
 ```

-Use following command start a Kata Container.
+Use following command to start a Kata Container.
 ```
-$ docker run --rm -it --runtime=kata --name test busybox
+$ pod_yaml=pod.yaml
+$ container_yaml=${REPORT_DIR}/container.yaml
+$ image="quay.io/prometheus/busybox:latest"
+$ cat << EOF > "${pod_yaml}"
+metadata:
+  name: busybox-sandbox1
+EOF
+$ cat << EOF > "${container_yaml}"
+metadata:
+  name: busybox-killed-vmm
+image:
+  image: "$image"
+command:
+- top
+EOF
+$ sudo crictl pull $image
+$ podid=$(sudo crictl runp $pod_yaml)
+$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
+$ sudo crictl start $cid
 ```

-Use following command set the memory size of test to default_memory + 512m.
+Use the following command to set the container memory limit to 2g and the memory size of the VM to its default_memory + 2g.
 ```
-$ docker update -m 512m --memory-swap -1 test
+$ sudo crictl update --memory $((2*1024*1024*1024)) $cid
 ```

+Use the following command to set the container memory limit to 1g and the memory size of the VM to its default_memory + 1g.
+```
+$ sudo crictl update --memory $((1*1024*1024*1024)) $cid
+```
--- a/docs/how-to/run-kata-with-k8s.md
+++ b/docs/how-to/run-kata-with-k8s.md
@@ -171,10 +171,10 @@ $ sudo systemctl daemon-reload
 $ sudo systemctl restart kubelet

 # If using CRI-O
-$ sudo kubeadm init --skip-preflight-checks --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
+$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16

 # If using CRI-containerd
-$ sudo kubeadm init --skip-preflight-checks --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
+$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16

 $ export KUBECONFIG=/etc/kubernetes/admin.conf
 ```
--- a/docs/install/README.md
+++ b/docs/install/README.md
@@ -50,7 +50,7 @@ Kata packages are provided by official distribution repositories for:
 | Distribution (link to installation guide)                | Minimum versions                                                               |
 |----------------------------------------------------------|--------------------------------------------------------------------------------|
 | [CentOS](centos-installation-guide.md)                   | 8                                                                              |
-| [Fedora](fedora-installation-guide.md)                   | 32, Rawhide                                                                    |
+| [Fedora](fedora-installation-guide.md)                   | 34                                                                             |

 > **Note::**
 >
--- a/docs/install/centos-installation-guide.md
+++ b/docs/install/centos-installation-guide.md
@@ -3,15 +3,9 @@
 1. Install the Kata Containers components with the following commands:

   ```bash
+   $ sudo -E dnf install -y centos-release-advanced-virtualization
+   $ sudo -E dnf module disable -y virt:rhel
   $ source /etc/os-release
-   $ cat <<EOF | sudo -E tee /etc/yum.repos.d/advanced-virt.repo
-     [advanced-virt]
-     name=Advanced Virtualization
-     baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/advanced-virtualization
-     enabled=1
-     gpgcheck=1
-     skip_if_unavailable=1
-     EOF
   $ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
     [kata-containers]
     name=Kata Containers
@@ -20,8 +14,7 @@
     gpgcheck=1
     skip_if_unavailable=1
     EOF
-   $ sudo -E dnf module disable -y virt:rhel
-   $ sudo -E dnf install -y kata-runtime
+   $ sudo -E dnf install -y kata-containers
   ```

 2. Decide which container manager to use and select the corresponding link that follows:
--- a/docs/install/fedora-installation-guide.md
+++ b/docs/install/fedora-installation-guide.md
@@ -3,7 +3,7 @@
 1. Install the Kata Containers components with the following commands:

   ```bash
-   $ sudo -E dnf -y install kata-runtime
+   $ sudo -E dnf -y install kata-containers
   ```

 2. Decide which container manager to use and select the corresponding link that follows:
--- a/docs/install/snap-installation-guide.md
+++ b/docs/install/snap-installation-guide.md
@@ -2,9 +2,6 @@

 * [Install Kata Containers](#install-kata-containers)
 * [Configure Kata Containers](#configure-kata-containers)
-* [Integration with non-compatible shim v2 Container Engines](#integration-with-non-compatible-shim-v2-container-engines)
-    * [Integration with Docker](#integration-with-docker)
-    * [Integration with Podman](#integration-with-podman)
 * [Integration with shim v2 Container Engines](#integration-with-shim-v2-container-engines)
 * [Remove Kata Containers snap package](#remove-kata-containers-snap-package)

@@ -14,23 +11,10 @@
 Kata Containers can be installed in any Linux distribution that supports
 [snapd](https://docs.snapcraft.io/installing-snapd).

-> NOTE: From Kata Containers 2.x, only the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
-> is supported, note that some container engines (`docker`, `podman`, etc) may not
-> be able to run Kata Containers 2.x.
-
-Kata Containers 1.x is released through the *stable* channel while Kata Containers
-2.x is available in the *candidate* channel.
-
-Run the following command to install **Kata Containers 1.x**:
+Run the following command to install **Kata Containers**:

 ```sh
-$ sudo snap install kata-containers --classic
-```
-
-Run the following command to install **Kata Containers 2.x**:
-
-```sh
-$ sudo snap install kata-containers --candidate --classic
+$ sudo snap install kata-containers --stable --classic
 ```

 ## Configure Kata Containers
@@ -46,55 +30,6 @@ $ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/confi
 $ $EDITOR /etc/kata-containers/configuration.toml
 ```

-## Integration with non-compatible shim v2 Container Engines
-
-At the time of writing this document, `docker` and `podman` **do not support Kata
-Containers 2.x, therefore Kata Containers 1.x must be used instead.**
-
-The path to the runtime provided by the Kata Containers 1.x snap package is
-`/snap/bin/kata-containers.runtime`, it should be used to run Kata Containers 1.x.
-
-### Integration with Docker
-
-`/etc/docker/daemon.json` is the configuration file for `docker`, use the
-following configuration to add a new runtime (`kata`) to `docker`.
-
-```json
-{
-  "runtimes": {
-    "kata": {
-      "path": "/snap/bin/kata-containers.runtime"
-    }
-  }
-}
-```
-
-Once the above configuration has been applied, use the
-following commands to restart `docker` and run Kata Containers 1.x.
-
-```sh
-$ sudo systemctl restart docker
-$ docker run -ti --runtime kata busybox sh
-```
-
-### Integration with Podman
-
-`/usr/share/containers/containers.conf` is the configuration file for `podman`,
-add the following configuration in the `[engine.runtimes]` section.
-
-```toml
-kata = [
-   "/snap/bin/kata-containers.runtime"
-]
-```
-
-Once the above configuration has been applied, use the following command to run
-Kata Containers 1.x with `podman`
-
-```sh
-$ sudo podman run -ti --runtime kata docker.io/library/busybox sh
-```
-
 ## Integration with shim v2 Container Engines

 The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the
--- a/docs/install/ubuntu-installation-guide.md
+++ b/docs/install/ubuntu-installation-guide.md
@@ -1,15 +0,0 @@
-# Install Kata Containers on Ubuntu
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ ARCH=$(arch)
-   $ BRANCH="${BRANCH:-master}"
-   $ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
-   $ curl -sL  http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
-   $ sudo -E apt-get update
-   $ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/use-cases/zun_kata.md
+++ b/docs/use-cases/zun_kata.md
@@ -10,9 +10,6 @@ Currently, the instructions are based on the following links:

 - https://docs.openstack.org/zun/latest/admin/clear-containers.html

- ../install/ubuntu-installation-guide.md
-
-
 ## Install Git to use with DevStack

 ```sh
@@ -54,7 +51,7 @@ $ zun delete test

 ## Install Kata Containers

-Follow [these instructions](../install/ubuntu-installation-guide.md)
+Follow [these instructions](../install/README.md)
 to install the Kata Containers components.

 ## Update Docker with new Kata Containers runtime
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -13,8 +13,8 @@ lazy_static = "1.3.0"
 ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
 protobuf = "=2.14.0"
 libc = "0.2.58"
-nix = "0.17.0"
-prctl = "1.0.0"
+nix = "0.21.0"
+capctl = "0.2.0"
 serde_json = "1.0.39"
 scan_fmt = "0.2.3"
 scopeguard = "1.0.0"
@@ -24,8 +24,11 @@ async-trait = "0.1.42"
 tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
 futures = "0.3.12"
 netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
-tokio-vsock = "0.3.0"
-rtnetlink = "0.7.0"
+tokio-vsock = "0.3.1"
+# Because the author has no time to maintain the crate, we switch the dependency to github,
+# Once the new version released on crates.io, we switch it back.
+# https://github.com/little-dude/netlink/issues/161
+rtnetlink = { git = "https://github.com/little-dude/netlink", rev = "a9367bc4700496ddebc088110c28f40962923326" }
 netlink-packet-utils = "0.4.0"
 ipnetwork = "0.17.0"

@@ -40,12 +43,20 @@ slog-scope = "4.1.2"
 slog-stdlog = "4.0.0"
 log = "0.4.11"

-# for testing
-tempfile = "3.1.0"
 prometheus = { version = "0.9.0", features = ["process"] }
 procfs = "0.7.9"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.2" }
+cgroups = { package = "cgroups-rs", version = "0.2.5" }
+
+# Tracing
+tracing = "0.1.26"
+tracing-subscriber = "0.2.18"
+tracing-opentelemetry = "0.13.0"
+opentelemetry = "0.14.0"
+vsock-exporter = { path = "vsock-exporter" }
+
+[dev-dependencies]
+tempfile = "3.1.0"

 [workspace]
 members = [
--- a/src/agent/VERSION
+++ b/src/agent/VERSION
@@ -1 +0,0 @@
-2.0.0
--- a/src/agent/VERSION
+++ b/src/agent/VERSION
@@ -0,0 +1 @@
+../../VERSION
--- a/src/agent/kata-agent.service.in
+++ b/src/agent/kata-agent.service.in
@@ -15,7 +15,7 @@ Wants=kata-containers.target
 StandardOutput=tty
 Type=simple
 ExecStart=@BINDIR@/@AGENT_NAME@
-LimitNOFILE=infinity
+LimitNOFILE=1048576
 # ExecStop is required for static agent tracing; in all other scenarios
 # the runtime handles shutting down the VM.
 ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff
--- a/src/agent/oci/src/lib.rs
+++ b/src/agent/oci/src/lib.rs
@@ -8,7 +8,7 @@ extern crate serde;
 extern crate serde_derive;
 extern crate serde_json;

-use libc::mode_t;
+use libc::{self, mode_t};
 use std::collections::HashMap;

 mod serialize;
@@ -27,6 +27,10 @@ where
    *d == T::default()
 }

+fn default_seccomp_errno() -> u32 {
+    libc::EPERM as u32
+}
+
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Spec {
    #[serde(
@@ -54,7 +58,7 @@ pub struct Spec {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub windows: Option<Windows<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub vm: Option<VM>,
+    pub vm: Option<Vm>,
 }

 impl Spec {
@@ -67,7 +71,7 @@ impl Spec {
    }
 }

-pub type LinuxRlimit = POSIXRlimit;
+pub type LinuxRlimit = PosixRlimit;

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Process {
@@ -89,7 +93,7 @@ pub struct Process {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub capabilities: Option<LinuxCapabilities>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub rlimits: Vec<POSIXRlimit>,
+    pub rlimits: Vec<PosixRlimit>,
    #[serde(default, rename = "noNewPrivileges")]
    pub no_new_privileges: bool,
    #[serde(
@@ -195,9 +199,9 @@ pub struct Hooks {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Linux {
    #[serde(default, rename = "uidMappings", skip_serializing_if = "Vec::is_empty")]
-    pub uid_mappings: Vec<LinuxIDMapping>,
+    pub uid_mappings: Vec<LinuxIdMapping>,
    #[serde(default, rename = "gidMappings", skip_serializing_if = "Vec::is_empty")]
-    pub gid_mappings: Vec<LinuxIDMapping>,
+    pub gid_mappings: Vec<LinuxIdMapping>,
    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
    pub sysctl: HashMap<String, String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -257,7 +261,7 @@ pub const UTSNAMESPACE: &str = "uts";
 pub const CGROUPNAMESPACE: &str = "cgroup";

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxIDMapping {
+pub struct LinuxIdMapping {
    #[serde(default, rename = "containerID")]
    pub container_id: u32,
    #[serde(default, rename = "hostID")]
@@ -267,7 +271,7 @@ pub struct LinuxIDMapping {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct POSIXRlimit {
+pub struct PosixRlimit {
    #[serde(default)]
    pub r#type: String,
    #[serde(default)]
@@ -293,7 +297,7 @@ pub struct LinuxInterfacePriority {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxBlockIODevice {
+pub struct LinuxBlockIoDevice {
    #[serde(default)]
    pub major: i64,
    #[serde(default)]
@@ -303,7 +307,7 @@ pub struct LinuxBlockIODevice {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct LinuxWeightDevice {
    #[serde(flatten)]
-    pub blk: LinuxBlockIODevice,
+    pub blk: LinuxBlockIoDevice,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub weight: Option<u16>,
    #[serde(
@@ -317,13 +321,13 @@ pub struct LinuxWeightDevice {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct LinuxThrottleDevice {
    #[serde(flatten)]
-    pub blk: LinuxBlockIODevice,
+    pub blk: LinuxBlockIoDevice,
    #[serde(default)]
    pub rate: u64,
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxBlockIO {
+pub struct LinuxBlockIo {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub weight: Option<u16>,
    #[serde(
@@ -387,7 +391,7 @@ pub struct LinuxMemory {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxCPU {
+pub struct LinuxCpu {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub shares: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -449,11 +453,11 @@ pub struct LinuxResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub memory: Option<LinuxMemory>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub cpu: Option<LinuxCPU>,
+    pub cpu: Option<LinuxCpu>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub pids: Option<LinuxPids>,
    #[serde(skip_serializing_if = "Option::is_none", rename = "blockIO")]
-    pub block_io: Option<LinuxBlockIO>,
+    pub block_io: Option<LinuxBlockIo>,
    #[serde(
        default,
        skip_serializing_if = "Vec::is_empty",
@@ -513,7 +517,7 @@ pub struct Solaris {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub anet: Vec<SolarisAnet>,
    #[serde(default, skip_serializing_if = "Option::is_none", rename = "cappedCPU")]
-    pub capped_cpu: Option<SolarisCappedCPU>,
+    pub capped_cpu: Option<SolarisCappedCpu>,
    #[serde(
        default,
        skip_serializing_if = "Option::is_none",
@@ -523,7 +527,7 @@ pub struct Solaris {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct SolarisCappedCPU {
+pub struct SolarisCappedCpu {
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub ncpus: String,
 }
@@ -601,7 +605,7 @@ pub struct WindowsResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub memory: Option<WindowsMemoryResources>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub cpu: Option<WindowsCPUResources>,
+    pub cpu: Option<WindowsCpuResources>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub storage: Option<WindowsStorageResources>,
 }
@@ -613,7 +617,7 @@ pub struct WindowsMemoryResources {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct WindowsCPUResources {
+pub struct WindowsCpuResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub count: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -671,14 +675,14 @@ pub struct WindowsHyperV {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VM {
-    pub hypervisor: VMHypervisor,
-    pub kernel: VMKernel,
-    pub image: VMImage,
+pub struct Vm {
+    pub hypervisor: VmHypervisor,
+    pub kernel: VmKernel,
+    pub image: VmImage,
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMHypervisor {
+pub struct VmHypervisor {
    #[serde(default)]
    pub path: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -686,7 +690,7 @@ pub struct VMHypervisor {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMKernel {
+pub struct VmKernel {
    #[serde(default)]
    pub path: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -696,7 +700,7 @@ pub struct VMKernel {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMImage {
+pub struct VmImage {
    #[serde(default)]
    pub path: String,
    #[serde(default)]
@@ -710,6 +714,8 @@ pub struct LinuxSeccomp {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub architectures: Vec<Arch>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub flags: Vec<LinuxSeccompFlag>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub syscalls: Vec<LinuxSyscall>,
 }

@@ -733,14 +739,20 @@ pub const ARCHS390: &str = "SCMP_ARCH_S390";
 pub const ARCHS390X: &str = "SCMP_ARCH_S390X";
 pub const ARCHPARISC: &str = "SCMP_ARCH_PARISC";
 pub const ARCHPARISC64: &str = "SCMP_ARCH_PARISC64";
+pub const ARCHRISCV64: &str = "SCMP_ARCH_RISCV64";
+
+pub type LinuxSeccompFlag = String;

 pub type LinuxSeccompAction = String;

 pub const ACTKILL: &str = "SCMP_ACT_KILL";
+pub const ACTKILLPROCESS: &str = "SCMP_ACT_KILL_PROCESS";
+pub const ACTKILLTHREAD: &str = "SCMP_ACT_KILL_THREAD";
 pub const ACTTRAP: &str = "SCMP_ACT_TRAP";
 pub const ACTERRNO: &str = "SCMP_ACT_ERRNO";
 pub const ACTTRACE: &str = "SCMP_ACT_TRACE";
 pub const ACTALLOW: &str = "SCMP_ACT_ALLOW";
+pub const ACTLOG: &str = "SCMP_ACT_LOG";

 pub type LinuxSeccompOperator = String;

@@ -770,6 +782,8 @@ pub struct LinuxSyscall {
    pub names: Vec<String>,
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub action: LinuxSeccompAction,
+    #[serde(default = "default_seccomp_errno", rename = "errnoRet")]
+    pub errno_ret: u32,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub args: Vec<LinuxSeccompArg>,
 }
@@ -787,11 +801,11 @@ pub struct LinuxIntelRdt {
 #[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ContainerState {
-    CREATING,
-    CREATED,
-    RUNNING,
-    STOPPED,
-    PAUSED,
+    Creating,
+    Created,
+    Running,
+    Stopped,
+    Paused,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
@@ -832,7 +846,7 @@ mod tests {
        let expected = State {
            version: "0.2.0".to_string(),
            id: "oci-container1".to_string(),
-            status: ContainerState::RUNNING,
+            status: ContainerState::Running,
            pid: 4422,
            bundle: "/containers/redis".to_string(),
            annotations: [("myKey".to_string(), "myValue".to_string())]
@@ -1257,12 +1271,12 @@ mod tests {
                    ambient: vec!["CAP_NET_BIND_SERVICE".to_string()],
                }),
                rlimits: vec![
-                    crate::POSIXRlimit {
+                    crate::PosixRlimit {
                        r#type: "RLIMIT_CORE".to_string(),
                        hard: 1024,
                        soft: 1024,
                    },
-                    crate::POSIXRlimit {
+                    crate::PosixRlimit {
                        r#type: "RLIMIT_NOFILE".to_string(),
                        hard: 1024,
                        soft: 1024,
@@ -1394,12 +1408,12 @@ mod tests {
            .cloned()
            .collect(),
            linux: Some(crate::Linux {
-                uid_mappings: vec![crate::LinuxIDMapping {
+                uid_mappings: vec![crate::LinuxIdMapping {
                    container_id: 0,
                    host_id: 1000,
                    size: 32000,
                }],
-                gid_mappings: vec![crate::LinuxIDMapping {
+                gid_mappings: vec![crate::LinuxIdMapping {
                    container_id: 0,
                    host_id: 1000,
                    size: 32000,
@@ -1444,7 +1458,7 @@ mod tests {
                        swappiness: Some(0),
                        disable_oom_killer: Some(false),
                    }),
-                    cpu: Some(crate::LinuxCPU {
+                    cpu: Some(crate::LinuxCpu {
                        shares: Some(1024),
                        quota: Some(1000000),
                        period: Some(500000),
@@ -1454,17 +1468,17 @@ mod tests {
                        mems: "0-7".to_string(),
                    }),
                    pids: Some(crate::LinuxPids { limit: 32771 }),
-                    block_io: Some(crate::LinuxBlockIO {
+                    block_io: Some(crate::LinuxBlockIo {
                        weight: Some(10),
                        leaf_weight: Some(10),
                        weight_device: vec![
                            crate::LinuxWeightDevice {
-                                blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
+                                blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
                                weight: Some(500),
                                leaf_weight: Some(300),
                            },
                            crate::LinuxWeightDevice {
-                                blk: crate::LinuxBlockIODevice {
+                                blk: crate::LinuxBlockIoDevice {
                                    major: 8,
                                    minor: 16,
                                },
@@ -1473,13 +1487,13 @@ mod tests {
                            },
                        ],
                        throttle_read_bps_device: vec![crate::LinuxThrottleDevice {
-                            blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
+                            blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
                            rate: 600,
                        }],
                        throttle_write_bps_device: vec![],
                        throttle_read_iops_device: vec![],
                        throttle_write_iops_device: vec![crate::LinuxThrottleDevice {
-                            blk: crate::LinuxBlockIODevice {
+                            blk: crate::LinuxBlockIoDevice {
                                major: 8,
                                minor: 16,
                            },
@@ -1565,9 +1579,11 @@ mod tests {
                seccomp: Some(crate::LinuxSeccomp {
                    default_action: "SCMP_ACT_ALLOW".to_string(),
                    architectures: vec!["SCMP_ARCH_X86".to_string(), "SCMP_ARCH_X32".to_string()],
+                    flags: vec![],
                    syscalls: vec![crate::LinuxSyscall {
                        names: vec!["getcwd".to_string(), "chmod".to_string()],
                        action: "SCMP_ACT_ERRNO".to_string(),
+                        errno_ret: crate::default_seccomp_errno(),
                        args: vec![],
                    }],
                }),
--- a/src/agent/protocols/hack/update-generated-proto.sh
+++ b/src/agent/protocols/hack/update-generated-proto.sh
@@ -65,7 +65,7 @@ $GOPATH/src/github.com/kata-containers/kata-containers/src/agent/protocols/proto
 }

 if [ "$(basename $(pwd))" != "agent" ]; then
-	die "Please go to directory of protocols before execute this shell"
+	die "Please go to root directory of agent before execute this shell"
 fi

 # Protocol buffer files required to generate golang/rust bindings.
--- a/src/agent/protocols/protos/agent.proto
+++ b/src/agent/protocols/protos/agent.proto
@@ -32,7 +32,6 @@ service AgentService {
 	rpc ExecProcess(ExecProcessRequest) returns (google.protobuf.Empty);
 	rpc SignalProcess(SignalProcessRequest) returns (google.protobuf.Empty);
 	rpc WaitProcess(WaitProcessRequest) returns (WaitProcessResponse); // wait & reap like waitpid(2)
-	rpc ListProcesses(ListProcessesRequest) returns (ListProcessesResponse);
 	rpc UpdateContainer(UpdateContainerRequest) returns (google.protobuf.Empty);
 	rpc StatsContainer(StatsContainerRequest) returns (StatsContainerResponse);
 	rpc PauseContainer(PauseContainerRequest) returns (google.protobuf.Empty);
@@ -126,18 +125,6 @@ message WaitProcessResponse {
 	int32 status = 1;
 }

-// ListProcessesRequest contains the options used to list running processes inside the container
-message ListProcessesRequest {
-	string container_id = 1;
-	string format = 2;
-	repeated string args = 3;
-}
-
-// ListProcessesResponse represents the list of running processes inside the container
-message ListProcessesResponse {
-	bytes process_list = 1;
-}
-
 message UpdateContainerRequest {
 	string container_id = 1;
 	LinuxResources resources = 2;
--- a/src/agent/protocols/protos/oci.proto
+++ b/src/agent/protocols/protos/oci.proto
@@ -441,7 +441,8 @@ message LinuxInterfacePriority {
 message LinuxSeccomp {
 	string DefaultAction = 1;
 	repeated string Architectures = 2;
-	repeated LinuxSyscall Syscalls = 3  [(gogoproto.nullable) = false];
+	repeated string Flags = 3;
+	repeated LinuxSyscall Syscalls = 4  [(gogoproto.nullable) = false];
 }

 message LinuxSeccompArg {
@@ -454,7 +455,10 @@ message LinuxSeccompArg {
 message LinuxSyscall {
 	repeated string Names = 1;
 	string Action = 2;
-	repeated LinuxSeccompArg Args = 3  [(gogoproto.nullable) = false];
+	oneof ErrnoRet {
+		uint32 errnoret = 3;
+	}
+	repeated LinuxSeccompArg Args = 4  [(gogoproto.nullable) = false];
 }

 message LinuxIntelRdt {
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -11,9 +11,9 @@ serde_derive = "1.0.91"
 oci = { path = "../oci" }
 protocols = { path ="../protocols" }
 caps = "0.5.0"
-nix = "0.17.0"
+nix = "0.21.0"
 scopeguard = "1.0.0"
-prctl = "1.0.0"
+capctl = "0.2.0"
 lazy_static = "1.3.0"
 libc = "0.2.58"
 protobuf = "=2.14.0"
@@ -23,8 +23,7 @@ scan_fmt = "0.2"
 regex = "1.1"
 path-absolutize = "1.2.0"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.1" }
-tempfile = "3.1.0"
+cgroups = { package = "cgroups-rs", version = "0.2.5" }
 rlimit = "0.5.3"

 tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
@@ -34,3 +33,4 @@ inotify = "0.9.2"

 [dev-dependencies]
 serial_test = "0.5.0"
+tempfile = "3.1.0"
--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -24,7 +24,7 @@ use anyhow::{anyhow, Context, Result};
 use libc::{self, pid_t};
 use nix::errno::Errno;
 use oci::{
-    LinuxBlockIO, LinuxCPU, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
+    LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
    LinuxNetwork, LinuxPids, LinuxResources,
 };

@@ -272,7 +272,7 @@ fn set_hugepages_resources(

 fn set_block_io_resources(
    _cg: &cgroups::Cgroup,
-    blkio: &LinuxBlockIO,
+    blkio: &LinuxBlockIo,
    res: &mut cgroups::Resources,
 ) {
    info!(sl!(), "cgroup manager set block io");
@@ -302,7 +302,7 @@ fn set_block_io_resources(
        build_blk_io_device_throttle_resource(&blkio.throttle_write_iops_device);
 }

-fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
+fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCpu) -> Result<()> {
    info!(sl!(), "cgroup manager set cpu");

    let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
@@ -349,14 +349,34 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        mem_controller.set_kmem_limit(-1)?;
    }

-    set_resource!(mem_controller, set_limit, memory, limit);
-    set_resource!(mem_controller, set_soft_limit, memory, reservation);
-    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
-    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+    // If the memory update is set to -1 we should also
+    // set swap to -1, it means unlimited memory.
+    let mut swap = memory.swap.unwrap_or(0);
+    if memory.limit == Some(-1) {
+        swap = -1;
+    }

-    if let Some(swap) = memory.swap {
-        // set memory swap
-        let swap = if cg.v2() {
+    if memory.limit.is_some() && swap != 0 {
+        let memstat = get_memory_stats(cg)
+            .into_option()
+            .ok_or_else(|| anyhow!("failed to get the cgroup memory stats"))?;
+        let memusage = memstat.get_usage();
+
+        // When update memory limit, the kernel would check the current memory limit
+        // set against the new swap setting, if the current memory limit is large than
+        // the new swap, then set limit first, otherwise the kernel would complain and
+        // refused to set; on the other hand, if the current memory limit is smaller than
+        // the new swap, then we should set the swap first and then set the memor limit.
+        if swap == -1 || memusage.get_limit() < swap as u64 {
+            mem_controller.set_memswap_limit(swap)?;
+            set_resource!(mem_controller, set_limit, memory, limit);
+        } else {
+            set_resource!(mem_controller, set_limit, memory, limit);
+            mem_controller.set_memswap_limit(swap)?;
+        }
+    } else {
+        set_resource!(mem_controller, set_limit, memory, limit);
+        swap = if cg.v2() {
            convert_memory_swap_to_v2_value(swap, memory.limit.unwrap_or(0))?
        } else {
            swap
@@ -366,6 +386,10 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        }
    }

+    set_resource!(mem_controller, set_soft_limit, memory, reservation);
+    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
+    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+
    if let Some(swappiness) = memory.swappiness {
        if (0..=100).contains(&swappiness) {
            mem_controller.set_swappiness(swappiness as u64)?;
@@ -489,63 +513,61 @@ lazy_static! {
    };

    pub static ref DEFAULT_ALLOWED_DEVICES: Vec<LinuxDeviceCgroup> = {
-        let mut v = Vec::new();
+        vec![
+            // all mknod to all char devices
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(WILDCARD),
+                minor: Some(WILDCARD),
+                access: "m".to_string(),
+            },

-        // all mknod to all char devices
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(WILDCARD),
-            minor: Some(WILDCARD),
-            access: "m".to_string(),
-        });
+            // all mknod to all block devices
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "b".to_string(),
+                major: Some(WILDCARD),
+                minor: Some(WILDCARD),
+                access: "m".to_string(),
+            },

-        // all mknod to all block devices
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "b".to_string(),
-            major: Some(WILDCARD),
-            minor: Some(WILDCARD),
-            access: "m".to_string(),
-        });
+            // all read/write/mknod to char device /dev/console
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(5),
+                minor: Some(1),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/console
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(5),
-            minor: Some(1),
-            access: "rwm".to_string(),
-        });
+            // all read/write/mknod to char device /dev/pts/<N>
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(136),
+                minor: Some(WILDCARD),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/pts/<N>
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(136),
-            minor: Some(WILDCARD),
-            access: "rwm".to_string(),
-        });
+            // all read/write/mknod to char device /dev/ptmx
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(5),
+                minor: Some(2),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/ptmx
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(5),
-            minor: Some(2),
-            access: "rwm".to_string(),
-        });
-
-        // all read/write/mknod to char device /dev/net/tun
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(10),
-            minor: Some(200),
-            access: "rwm".to_string(),
-        });
-
-        v
+            // all read/write/mknod to char device /dev/net/tun
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(10),
+                minor: Some(200),
+                access: "rwm".to_string(),
+            },
+        ]
    };
 }

--- a/src/agent/rustjail/src/cgroups/notifier.rs
+++ b/src/agent/rustjail/src/cgroups/notifier.rs
@@ -8,7 +8,7 @@ use eventfd::{eventfd, EfdFlags};
 use nix::sys::eventfd;
 use std::fs::{self, File};
 use std::os::unix::io::{AsRawFd, FromRawFd};
-use std::path::{Path, PathBuf};
+use std::path::Path;

 use crate::pipestream::PipeStream;
 use futures::StreamExt as _;
@@ -35,7 +35,7 @@ pub async fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
 // Flat keyed file format:
 //   KEY0 VAL0\n
 //   KEY1 VAL1\n
-fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {
+fn get_value_from_cgroup(path: &Path, key: &str) -> Result<i64> {
    let content = fs::read_to_string(path)?;
    info!(
        sl!(),
@@ -117,12 +117,12 @@ async fn register_memory_event_v2(
                    return;
                }
            }
-        }

-        // When a cgroup is destroyed, an event is sent to eventfd.
-        // So if the control path is gone, return instead of notifying.
-        if !Path::new(&event_control_path).exists() {
-            return;
+            // When a cgroup is destroyed, an event is sent to eventfd.
+            // So if the control path is gone, return instead of notifying.
+            if !Path::new(&event_control_path).exists() {
+                return;
+            }
        }
    });

--- a/src/agent/rustjail/src/configs/device.rs
+++ b/src/agent/rustjail/src/configs/device.rs
@@ -1,56 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use libc::*;
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Device {
-    #[serde(default)]
-    r#type: char,
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    major: i64,
-    #[serde(default)]
-    minor: i64,
-    #[serde(default)]
-    permissions: String,
-    #[serde(default)]
-    file_mode: mode_t,
-    #[serde(default)]
-    uid: i32,
-    #[serde(default)]
-    gid: i32,
-    #[serde(default)]
-    allow: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct BlockIODevice {
-    #[serde(default)]
-    major: i64,
-    #[serde(default)]
-    minor: i64,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct WeightDevice {
-    block: BlockIODevice,
-    #[serde(default)]
-    weight: u16,
-    #[serde(default, rename = "leafWeight")]
-    leaf_weight: u16,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct ThrottleDevice {
-    block: BlockIODevice,
-    #[serde(default)]
-    rate: u64,
-}
--- a/src/agent/rustjail/src/configs/mod.rs
+++ b/src/agent/rustjail/src/configs/mod.rs
@@ -1,368 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-use protocols::oci::State as OCIState;
-
-use std::collections::HashMap;
-use std::fmt;
-use std::path::PathBuf;
-use std::time::Duration;
-
-use nix::unistd;
-
-use self::device::{Device, ThrottleDevice, WeightDevice};
-use self::namespaces::Namespaces;
-use crate::specconv::CreateOpts;
-
-pub mod device;
-pub mod namespaces;
-pub mod validator;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Rlimit {
-    #[serde(default)]
-    r#type: i32,
-    #[serde(default)]
-    hard: i32,
-    #[serde(default)]
-    soft: i32,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IDMap {
-    #[serde(default)]
-    container_id: i32,
-    #[serde(default)]
-    host_id: i32,
-    #[serde(default)]
-    size: i32,
-}
-
-type Action = i32;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Seccomp {
-    #[serde(default)]
-    default_action: Action,
-    #[serde(default)]
-    architectures: Vec<String>,
-    #[serde(default)]
-    syscalls: Vec<Syscall>,
-}
-
-type Operator = i32;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Arg {
-    #[serde(default)]
-    index: u32,
-    #[serde(default)]
-    value: u64,
-    #[serde(default)]
-    value_two: u64,
-    #[serde(default)]
-    op: Operator,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Syscall {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    name: String,
-    #[serde(default)]
-    action: Action,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    args: Vec<Arg>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Config<'a> {
-    #[serde(default)]
-    no_pivot_root: bool,
-    #[serde(default)]
-    parent_death_signal: i32,
-    #[serde(default)]
-    rootfs: String,
-    #[serde(default)]
-    readonlyfs: bool,
-    #[serde(default, rename = "rootPropagation")]
-    root_propagation: i32,
-    #[serde(default)]
-    mounts: Vec<Mount>,
-    #[serde(default)]
-    devices: Vec<Device>,
-    #[serde(default)]
-    mount_label: String,
-    #[serde(default)]
-    hostname: String,
-    #[serde(default)]
-    namespaces: Namespaces,
-    #[serde(default)]
-    capabilities: Option<Capabilities>,
-    #[serde(default)]
-    networks: Vec<Network>,
-    #[serde(default)]
-    routes: Vec<Route>,
-    #[serde(default)]
-    cgroups: Option<Cgroup<'a>>,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    apparmor_profile: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    process_label: String,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    rlimits: Vec<Rlimit>,
-    #[serde(default)]
-    oom_score_adj: Option<i32>,
-    #[serde(default)]
-    uid_mappings: Vec<IDMap>,
-    #[serde(default)]
-    gid_mappings: Vec<IDMap>,
-    #[serde(default)]
-    mask_paths: Vec<String>,
-    #[serde(default)]
-    readonly_paths: Vec<String>,
-    #[serde(default)]
-    sysctl: HashMap<String, String>,
-    #[serde(default)]
-    seccomp: Option<Seccomp>,
-    #[serde(default)]
-    no_new_privileges: bool,
-    hooks: Option<Hooks>,
-    #[serde(default)]
-    version: String,
-    #[serde(default)]
-    labels: Vec<String>,
-    #[serde(default)]
-    no_new_keyring: bool,
-    #[serde(default)]
-    intel_rdt: Option<IntelRdt>,
-    #[serde(default)]
-    rootless_euid: bool,
-    #[serde(default)]
-    rootless_cgroups: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Hooks {
-    prestart: Vec<Box<Hook>>,
-    poststart: Vec<Box<Hook>>,
-    poststop: Vec<Box<Hook>>,
-}
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Capabilities {
-    bounding: Vec<String>,
-    effective: Vec<String>,
-    inheritable: Vec<String>,
-    permitted: Vec<String>,
-    ambient: Vec<String>,
-}
-
-pub trait Hook {
-    fn run(&self, state: &OCIState) -> Result<()>;
-}
-
-pub struct FuncHook {
-    // run: fn(&OCIState) -> Result<()>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Command {
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    args: Vec<String>,
-    #[serde(default)]
-    env: Vec<String>,
-    #[serde(default)]
-    dir: String,
-    #[serde(default)]
-    timeout: Duration,
-}
-
-pub struct CommandHook {
-    command: Command,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Mount {
-    #[serde(default)]
-    source: String,
-    #[serde(default)]
-    destination: String,
-    #[serde(default)]
-    device: String,
-    #[serde(default)]
-    flags: i32,
-    #[serde(default)]
-    propagation_flags: Vec<i32>,
-    #[serde(default)]
-    data: String,
-    #[serde(default)]
-    relabel: String,
-    #[serde(default)]
-    extensions: i32,
-    #[serde(default)]
-    premount_cmds: Vec<Command>,
-    #[serde(default)]
-    postmount_cmds: Vec<Command>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct HugepageLimit {
-    #[serde(default)]
-    page_size: String,
-    #[serde(default)]
-    limit: u64,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IntelRdt {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    l3_cache_schema: String,
-    #[serde(
-        default,
-        rename = "memBwSchema",
-        skip_serializing_if = "String::is_empty"
-    )]
-    mem_bw_schema: String,
-}
-
-pub type FreezerState = String;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Cgroup<'a> {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    name: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    parent: String,
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    scope_prefix: String,
-    paths: HashMap<String, String>,
-    resource: &'a Resources<'a>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Resources<'a> {
-    #[serde(default)]
-    allow_all_devices: bool,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    allowed_devices: Vec<&'a Device>,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    denied_devices: Vec<&'a Device>,
-    #[serde(default)]
-    devices: Vec<&'a Device>,
-    #[serde(default)]
-    memory: i64,
-    #[serde(default)]
-    memory_reservation: i64,
-    #[serde(default)]
-    memory_swap: i64,
-    #[serde(default)]
-    kernel_memory: i64,
-    #[serde(default)]
-    kernel_memory_tcp: i64,
-    #[serde(default)]
-    cpu_shares: u64,
-    #[serde(default)]
-    cpu_quota: i64,
-    #[serde(default)]
-    cpu_period: u64,
-    #[serde(default)]
-    cpu_rt_quota: i64,
-    #[serde(default)]
-    cpu_rt_period: u64,
-    #[serde(default)]
-    cpuset_cpus: String,
-    #[serde(default)]
-    cpuset_mems: String,
-    #[serde(default)]
-    pids_limit: i64,
-    #[serde(default)]
-    blkio_weight: u64,
-    #[serde(default)]
-    blkio_leaf_weight: u64,
-    #[serde(default)]
-    blkio_weight_device: Vec<&'a WeightDevice>,
-    #[serde(default)]
-    blkio_throttle_read_bps_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_write_bps_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_read_iops_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_write_iops_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    freezer: FreezerState,
-    #[serde(default)]
-    hugetlb_limit: Vec<&'a HugepageLimit>,
-    #[serde(default)]
-    oom_kill_disable: bool,
-    #[serde(default)]
-    memory_swapiness: u64,
-    #[serde(default)]
-    net_prio_ifpriomap: Vec<&'a IfPrioMap>,
-    #[serde(default)]
-    net_cls_classid_u: u32,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Network {
-    #[serde(default)]
-    r#type: String,
-    #[serde(default)]
-    name: String,
-    #[serde(default)]
-    bridge: String,
-    #[serde(default)]
-    mac_address: String,
-    #[serde(default)]
-    address: String,
-    #[serde(default)]
-    gateway: String,
-    #[serde(default)]
-    ipv6_address: String,
-    #[serde(default)]
-    ipv6_gateway: String,
-    #[serde(default)]
-    mtu: i32,
-    #[serde(default)]
-    txqueuelen: i32,
-    #[serde(default)]
-    host_interface_name: String,
-    #[serde(default)]
-    hairpin_mode: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Route {
-    #[serde(default)]
-    destination: String,
-    #[serde(default)]
-    source: String,
-    #[serde(default)]
-    gateway: String,
-    #[serde(default)]
-    interface_name: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IfPrioMap {
-    #[serde(default)]
-    interface: String,
-    #[serde(default)]
-    priority: i32,
-}
-
-impl IfPrioMap {
-    fn cgroup_string(&self) -> String {
-        format!("{} {}", self.interface, self.priority)
-    }
-}
--- a/src/agent/rustjail/src/configs/namespaces.rs
+++ b/src/agent/rustjail/src/configs/namespaces.rs
@@ -1,46 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-use std::collections::HashMap;
-#[macro_use]
-use lazy_static;
-
-pub type NamespaceType = String;
-pub type Namespaces = Vec<Namespace>;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Namespace {
-    #[serde(default)]
-    r#type: NamespaceType,
-    #[serde(default)]
-    path: String,
-}
-
-pub const NEWNET: &'static str = "NEWNET";
-pub const NEWPID: &'static str = "NEWPID";
-pub const NEWNS: &'static str = "NEWNS";
-pub const NEWUTS: &'static str = "NEWUTS";
-pub const NEWUSER: &'static str = "NEWUSER";
-pub const NEWCGROUP: &'static str = "NEWCGROUP";
-pub const NEWIPC: &'static str = "NEWIPC";
-
-lazy_static! {
-    static ref TYPETONAME: HashMap<&'static str, &'static str> = {
-        let mut m = HashMap::new();
-        m.insert("pid", "pid");
-        m.insert("network", "net");
-        m.insert("mount", "mnt");
-        m.insert("user", "user");
-        m.insert("uts", "uts");
-        m.insert("ipc", "ipc");
-        m.insert("cgroup", "cgroup");
-        m
-    };
-}
--- a/src/agent/rustjail/src/configs/validator.rs
+++ b/src/agent/rustjail/src/configs/validator.rs
@@ -1,23 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use crate::configs::Config;
-use std::io::Result;
-
-pub trait Validator {
-    fn validate(&self, config: &Config) -> Result<()> {
-        Ok(())
-    }
-}
-
-pub struct ConfigValidator {}
-
-impl Validator for ConfigValidator {}
-
-impl ConfigValidator {
-    fn new() -> Self {
-        ConfigValidator {}
-    }
-}
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
@@ -5,10 +5,10 @@

 use anyhow::{anyhow, Context, Result};
 use libc::pid_t;
-use oci::{ContainerState, LinuxDevice, LinuxIDMapping};
+use oci::{ContainerState, LinuxDevice, LinuxIdMapping};
 use oci::{Hook, Linux, LinuxNamespace, LinuxResources, Spec};
 use std::clone::Clone;
-use std::ffi::{CStr, CString};
+use std::ffi::CString;
 use std::fmt::Display;
 use std::fs;
 use std::os::unix::io::RawFd;
@@ -48,6 +48,7 @@ use oci::State as OCIState;
 use std::collections::HashMap;
 use std::os::unix::io::FromRawFd;
 use std::str::FromStr;
+use std::sync::Arc;

 use slog::{info, o, Logger};

@@ -57,6 +58,7 @@ use crate::sync_with_async::{read_async, write_async};
 use async_trait::async_trait;
 use rlimit::{setrlimit, Resource, Rlim};
 use tokio::io::AsyncBufReadExt;
+use tokio::sync::Mutex;

 use crate::utils;

@@ -83,8 +85,8 @@ pub struct ContainerStatus {
 impl ContainerStatus {
    fn new() -> Self {
        ContainerStatus {
-            pre_status: ContainerState::CREATED,
-            cur_status: ContainerState::CREATED,
+            pre_status: ContainerState::Created,
+            cur_status: ContainerState::Created,
        }
    }

@@ -106,6 +108,9 @@ pub type Config = CreateOpts;
 type NamespaceType = String;

 lazy_static! {
+    // This locker ensures the child exit signal will be received by the right receiver.
+    pub static ref WAIT_PID_LOCKER: Arc<Mutex<bool>> = Arc::new(Mutex::new(false));
+
    static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
        let mut m = HashMap::new();
        m.insert("user", CloneFlags::CLONE_NEWUSER);
@@ -132,62 +137,62 @@ lazy_static! {
    };

    pub static ref DEFAULT_DEVICES: Vec<LinuxDevice> = {
-        let mut v = Vec::new();
-        v.push(LinuxDevice {
-            path: "/dev/null".to_string(),
-            r#type: "c".to_string(),
-            major: 1,
-            minor: 3,
-            file_mode: Some(0o666),
-            uid: Some(0xffffffff),
-            gid: Some(0xffffffff),
-        });
-        v.push(LinuxDevice {
-            path: "/dev/zero".to_string(),
-            r#type: "c".to_string(),
-            major: 1,
-            minor: 5,
-            file_mode: Some(0o666),
-            uid: Some(0xffffffff),
-            gid: Some(0xffffffff),
-        });
-        v.push(LinuxDevice {
-            path: "/dev/full".to_string(),
-            r#type: String::from("c"),
-            major: 1,
-            minor: 7,
-            file_mode: Some(0o666),
-            uid: Some(0xffffffff),
-            gid: Some(0xffffffff),
-        });
-        v.push(LinuxDevice {
-            path: "/dev/tty".to_string(),
-            r#type: "c".to_string(),
-            major: 5,
-            minor: 0,
-            file_mode: Some(0o666),
-            uid: Some(0xffffffff),
-            gid: Some(0xffffffff),
-        });
-        v.push(LinuxDevice {
-            path: "/dev/urandom".to_string(),
-            r#type: "c".to_string(),
-            major: 1,
-            minor: 9,
-            file_mode: Some(0o666),
-            uid: Some(0xffffffff),
-            gid: Some(0xffffffff),
-        });
-        v.push(LinuxDevice {
-            path: "/dev/random".to_string(),
-            r#type: "c".to_string(),
-            major: 1,
-            minor: 8,
-            file_mode: Some(0o666),
-            uid: Some(0xffffffff),
-            gid: Some(0xffffffff),
-        });
-        v
+        vec![
+            LinuxDevice {
+                path: "/dev/null".to_string(),
+                r#type: "c".to_string(),
+                major: 1,
+                minor: 3,
+                file_mode: Some(0o666),
+                uid: Some(0xffffffff),
+                gid: Some(0xffffffff),
+            },
+            LinuxDevice {
+                path: "/dev/zero".to_string(),
+                r#type: "c".to_string(),
+                major: 1,
+                minor: 5,
+                file_mode: Some(0o666),
+                uid: Some(0xffffffff),
+                gid: Some(0xffffffff),
+            },
+            LinuxDevice {
+                path: "/dev/full".to_string(),
+                r#type: String::from("c"),
+                major: 1,
+                minor: 7,
+                file_mode: Some(0o666),
+                uid: Some(0xffffffff),
+                gid: Some(0xffffffff),
+            },
+            LinuxDevice {
+                path: "/dev/tty".to_string(),
+                r#type: "c".to_string(),
+                major: 5,
+                minor: 0,
+                file_mode: Some(0o666),
+                uid: Some(0xffffffff),
+                gid: Some(0xffffffff),
+            },
+            LinuxDevice {
+                path: "/dev/urandom".to_string(),
+                r#type: "c".to_string(),
+                major: 1,
+                minor: 9,
+                file_mode: Some(0o666),
+                uid: Some(0xffffffff),
+                gid: Some(0xffffffff),
+            },
+            LinuxDevice {
+                path: "/dev/random".to_string(),
+                r#type: "c".to_string(),
+                major: 1,
+                minor: 8,
+                file_mode: Some(0o666),
+                uid: Some(0xffffffff),
+                gid: Some(0xffffffff),
+            },
+        ]
    };
 }

@@ -255,7 +260,7 @@ pub struct State {
 }

 #[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct SyncPC {
+pub struct SyncPc {
    #[serde(default)]
    pid: pid_t,
 }
@@ -268,7 +273,7 @@ pub trait Container: BaseContainer {
 impl Container for LinuxContainer {
    fn pause(&mut self) -> Result<()> {
        let status = self.status();
-        if status != ContainerState::RUNNING && status != ContainerState::CREATED {
+        if status != ContainerState::Running && status != ContainerState::Created {
            return Err(anyhow!(
                "failed to pause container: current status is: {:?}",
                status
@@ -281,7 +286,7 @@ impl Container for LinuxContainer {
                .unwrap()
                .freeze(FreezerState::Frozen)?;

-            self.status.transition(ContainerState::PAUSED);
+            self.status.transition(ContainerState::Paused);
            return Ok(());
        }
        Err(anyhow!("failed to get container's cgroup manager"))
@@ -289,7 +294,7 @@ impl Container for LinuxContainer {

    fn resume(&mut self) -> Result<()> {
        let status = self.status();
-        if status != ContainerState::PAUSED {
+        if status != ContainerState::Paused {
            return Err(anyhow!("container status is: {:?}, not paused", status));
        }

@@ -299,7 +304,7 @@ impl Container for LinuxContainer {
                .unwrap()
                .freeze(FreezerState::Thawed)?;

-            self.status.transition(ContainerState::RUNNING);
+            self.status.transition(ContainerState::Running);
            return Ok(());
        }
        Err(anyhow!("failed to get container's cgroup manager"))
@@ -341,7 +346,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        Err(_e) => sched::unshare(CloneFlags::CLONE_NEWPID)?,
    }

-    match fork() {
+    match unsafe { fork() } {
        Ok(ForkResult::Parent { child, .. }) => {
            log_child!(
                cfd_log,
@@ -464,7 +469,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
    // Ref: https://github.com/opencontainers/runc/commit/50a19c6ff828c58e5dab13830bd3dacde268afe5
    //
    if !nses.is_empty() {
-        prctl::set_dumpable(false)
+        capctl::prctl::set_dumpable(false)
            .map_err(|e| anyhow!(e).context("set process non-dumpable failed"))?;
    }

@@ -540,7 +545,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        // notify parent to run prestart hooks
        write_sync(cwfd, SYNC_SUCCESS, "")?;
        // wait parent run prestart hooks
-        let _ = read_sync(crfd)?;
+        read_sync(crfd)?;
    }

    if mount_fd != -1 {
@@ -597,7 +602,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {

    // NoNewPeiviledges, Drop capabilities
    if oci_process.no_new_privileges {
-        prctl::set_no_new_privileges(true).map_err(|_| anyhow!("cannot set no new privileges"))?;
+        capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
    }

    if oci_process.capabilities.is_some() {
@@ -607,8 +612,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {

    if init {
        // notify parent to run poststart hooks
-        // cfd is closed when return from join_namespaces
-        // should retunr cfile instead of cfd?
        write_sync(cwfd, SYNC_SUCCESS, "")?;
    }

@@ -634,12 +637,12 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        env::set_var(v[0], v[1]);
    }

-    // set the "HOME" env getting from "/etc/passwd"
+    // set the "HOME" env getting from "/etc/passwd", if
+    // there's no uid entry in /etc/passwd, set "/" as the
+    // home env.
    if env::var_os(HOME_ENV_KEY).is_none() {
-        match utils::home_dir(guser.uid) {
-            Ok(home_dir) => env::set_var(HOME_ENV_KEY, home_dir),
-            Err(e) => log_child!(cfd_log, "failed to get home dir: {:?}", e),
-        }
+        let home_dir = utils::home_dir(guser.uid).unwrap_or_else(|_| String::from("/"));
+        env::set_var(HOME_ENV_KEY, home_dir);
    }

    let exec_file = Path::new(&args[0]);
@@ -734,7 +737,7 @@ impl BaseContainer for LinuxContainer {
        };

        let status = self.status();
-        let pid = if status != ContainerState::STOPPED {
+        let pid = if status != ContainerState::Stopped {
            self.init_process_pid
        } else {
            0
@@ -817,7 +820,7 @@ impl BaseContainer for LinuxContainer {
            if stat::stat(fifo_file.as_str()).is_ok() {
                return Err(anyhow!("exec fifo exists"));
            }
-            unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o622).unwrap())?;
+            unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o644).unwrap())?;

            fifofd = fcntl::open(
                fifo_file.as_str(),
@@ -908,7 +911,7 @@ impl BaseContainer for LinuxContainer {
            child = child.env(PIDNS_FD, format!("{}", pidns.unwrap()));
        }

-        let child = child.spawn()?;
+        child.spawn()?;

        unistd::close(crfd)?;
        unistd::close(cwfd)?;
@@ -964,19 +967,6 @@ impl BaseContainer for LinuxContainer {

        self.created = SystemTime::now();

-        // create the pipes for notify process exited
-        let (exit_pipe_r, exit_pipe_w) = unistd::pipe2(OFlag::O_CLOEXEC)
-            .context("failed to create pipe")
-            .map_err(|e| {
-                let _ = signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL))
-                    .map_err(|e| warn!(logger, "signal::kill creating pipe {:?}", e));
-
-                e
-            })?;
-
-        p.exit_pipe_w = Some(exit_pipe_w);
-        p.exit_pipe_r = Some(exit_pipe_r);
-
        if p.init {
            let spec = self.config.spec.as_mut().unwrap();
            update_namespaces(&self.logger, spec, p.pid)?;
@@ -997,7 +987,7 @@ impl BaseContainer for LinuxContainer {

        if init {
            self.exec()?;
-            self.status.transition(ContainerState::RUNNING);
+            self.status.transition(ContainerState::Running);
        }

        Ok(())
@@ -1019,7 +1009,7 @@ impl BaseContainer for LinuxContainer {
            }
        }

-        self.status.transition(ContainerState::STOPPED);
+        self.status.transition(ContainerState::Stopped);
        mount::umount2(
            spec.root.as_ref().unwrap().path.as_str(),
            MntFlags::MNT_DETACH,
@@ -1055,7 +1045,7 @@ impl BaseContainer for LinuxContainer {
            .unwrap()
            .as_secs();

-        self.status.transition(ContainerState::RUNNING);
+        self.status.transition(ContainerState::Running);
        unistd::close(fd)?;

        Ok(())
@@ -1089,9 +1079,8 @@ fn do_exec(args: &[String]) -> ! {
        .iter()
        .map(|s| CString::new(s.to_string()).unwrap_or_default())
        .collect();
-    let a: Vec<&CStr> = sa.iter().map(|s| s.as_c_str()).collect();

-    let _ = unistd::execvp(p.as_c_str(), a.as_slice()).map_err(|e| match e {
+    let _ = unistd::execvp(p.as_c_str(), &sa).map_err(|e| match e {
        nix::Error::Sys(errno) => {
            std::process::exit(errno as i32);
        }
@@ -1264,7 +1253,7 @@ async fn join_namespaces(

    if p.init {
        info!(logger, "notify child parent ready to run prestart hook!");
-        let _ = read_async(pipe_r).await?;
+        read_async(pipe_r).await?;

        info!(logger, "get ready to run prestart hook!");

@@ -1302,7 +1291,7 @@ async fn join_namespaces(
    Ok(())
 }

-fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIDMapping]) -> Result<()> {
+fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIdMapping]) -> Result<()> {
    let data = maps
        .iter()
        .filter(|m| m.size != 0)
@@ -1324,7 +1313,7 @@ fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIDMapping]) -> Resul

 fn setid(uid: Uid, gid: Gid) -> Result<()> {
    // set uid/gid
-    prctl::set_keep_capabilities(true)
+    capctl::prctl::set_keepcaps(true)
        .map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;

    {
@@ -1338,7 +1327,7 @@ fn setid(uid: Uid, gid: Gid) -> Result<()> {
        capabilities::reset_effective()?;
    }

-    prctl::set_keep_capabilities(false)
+    capctl::prctl::set_keepcaps(false)
        .map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;

    Ok(())
@@ -1480,6 +1469,8 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
        })
        .collect();

+    // Avoid the exit signal to be reaped by the global reaper.
+    let _wait_locker = WAIT_PID_LOCKER.lock().await;
    let mut child = tokio::process::Command::new(path)
        .args(args.iter())
        .envs(env.iter())
@@ -1528,28 +1519,23 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {

        match child.wait().await {
            Ok(exit) => {
-                let code = match exit.code() {
-                    Some(c) => c,
-                    None => {
-                        return Err(anyhow!("hook exit status has no status code"));
-                    }
-                };
+                let code = exit
+                    .code()
+                    .ok_or_else(|| anyhow!("hook exit status has no status code"))?;

-                if code == 0 {
-                    debug!(logger, "hook {} exit status is 0", &path);
-                    return Ok(());
-                } else {
+                if code != 0 {
                    error!(logger, "hook {} exit status is {}", &path, code);
                    return Err(anyhow!(nix::Error::from_errno(Errno::UnknownErrno)));
                }
+
+                debug!(logger, "hook {} exit status is 0", &path);
+                Ok(())
            }
-            Err(e) => {
-                return Err(anyhow!(
-                    "wait child error: {} {}",
-                    e,
-                    e.raw_os_error().unwrap()
-                ));
-            }
+            Err(e) => Err(anyhow!(
+                "wait child error: {} {}",
+                e,
+                e.raw_os_error().unwrap()
+            )),
        }
    });

@@ -1588,7 +1574,7 @@ mod tests {
            &OCIState {
                version: "1.2.3".to_string(),
                id: "321".to_string(),
-                status: ContainerState::RUNNING,
+                status: ContainerState::Running,
                pid: 2,
                bundle: "".to_string(),
                annotations: Default::default(),
@@ -1611,7 +1597,7 @@ mod tests {
            &OCIState {
                version: "1.2.3".to_string(),
                id: "321".to_string(),
-                status: ContainerState::RUNNING,
+                status: ContainerState::Running,
                pid: 2,
                bundle: "".to_string(),
                annotations: Default::default(),
@@ -1630,10 +1616,10 @@ mod tests {
    fn test_status_transtition() {
        let mut status = ContainerStatus::new();
        let status_table: [ContainerState; 4] = [
-            ContainerState::CREATED,
-            ContainerState::RUNNING,
-            ContainerState::PAUSED,
-            ContainerState::STOPPED,
+            ContainerState::Created,
+            ContainerState::Running,
+            ContainerState::Paused,
+            ContainerState::Stopped,
        ];

        for s in status_table.iter() {
@@ -1770,7 +1756,7 @@ mod tests {
    fn test_linuxcontainer_pause_bad_status() {
        let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
            // Change state to pause, c.pause() should fail
-            c.status.transition(ContainerState::PAUSED);
+            c.status.transition(ContainerState::Paused);
            c.pause().map_err(|e| anyhow!(e))
        });

@@ -1802,7 +1788,7 @@ mod tests {
    fn test_linuxcontainer_resume_bad_status() {
        let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
            // Change state to created, c.resume() should fail
-            c.status.transition(ContainerState::CREATED);
+            c.status.transition(ContainerState::Created);
            c.resume().map_err(|e| anyhow!(e))
        });

@@ -1813,7 +1799,7 @@ mod tests {
    #[test]
    fn test_linuxcontainer_resume_cgroupmgr_is_none() {
        let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
-            c.status.transition(ContainerState::PAUSED);
+            c.status.transition(ContainerState::Paused);
            c.cgroup_manager = None;
            c.resume().map_err(|e| anyhow!(e))
        });
@@ -1826,7 +1812,7 @@ mod tests {
        let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
            c.cgroup_manager = FsManager::new("").ok();
            // Change status to paused, this way we can resume it
-            c.status.transition(ContainerState::PAUSED);
+            c.status.transition(ContainerState::Paused);
            c.resume().map_err(|e| anyhow!(e))
        });

--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -23,7 +23,7 @@ extern crate caps;
 extern crate protocols;
 #[macro_use]
 extern crate scopeguard;
-extern crate prctl;
+extern crate capctl;
 #[macro_use]
 extern crate lazy_static;
 extern crate libc;
@@ -47,35 +47,17 @@ pub mod sync;
 pub mod sync_with_async;
 pub mod utils;
 pub mod validator;
-// pub mod factory;
-//pub mod configs;
-// pub mod devices;
-// pub mod init;
-// pub mod rootfs;
-// pub mod capabilities;
-// pub mod console;
-// pub mod stats;
-// pub mod user;
-//pub mod intelrdt;

-// construtc ociSpec from grpcSpec, which is needed for hook
-// execution. since hooks read config.json
-
-use oci::{
-    Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities,
-    Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot,
-    Spec as ociSpec, User as ociUser,
-};
-use protocols::oci::{
-    Hooks as grpcHooks, Linux as grpcLinux, Mount as grpcMount, Process as grpcProcess,
-    Root as grpcRoot, Spec as grpcSpec,
-};
 use std::collections::HashMap;

-pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
+use protocols::oci as grpc;
+
+// construct ociSpec from grpc::Spec, which is needed for hook
+// execution. since hooks read config.json
+pub fn process_grpc_to_oci(p: &grpc::Process) -> oci::Process {
    let console_size = if p.ConsoleSize.is_some() {
        let c = p.ConsoleSize.as_ref().unwrap();
-        Some(ociBox {
+        Some(oci::Box {
            height: c.Height,
            width: c.Width,
        })
@@ -85,14 +67,14 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {

    let user = if p.User.is_some() {
        let u = p.User.as_ref().unwrap();
-        ociUser {
+        oci::User {
            uid: u.UID,
            gid: u.GID,
            additional_gids: u.AdditionalGids.clone(),
            username: u.Username.clone(),
        }
    } else {
-        ociUser {
+        oci::User {
            uid: 0,
            gid: 0,
            additional_gids: vec![],
@@ -103,7 +85,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    let capabilities = if p.Capabilities.is_some() {
        let cap = p.Capabilities.as_ref().unwrap();

-        Some(ociLinuxCapabilities {
+        Some(oci::LinuxCapabilities {
            bounding: cap.Bounding.clone().into_vec(),
            effective: cap.Effective.clone().into_vec(),
            inheritable: cap.Inheritable.clone().into_vec(),
@@ -117,7 +99,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    let rlimits = {
        let mut r = Vec::new();
        for lm in p.Rlimits.iter() {
-            r.push(ociPOSIXRlimit {
+            r.push(oci::PosixRlimit {
                r#type: lm.Type.clone(),
                hard: lm.Hard,
                soft: lm.Soft,
@@ -126,7 +108,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
        r
    };

-    ociProcess {
+    oci::Process {
        terminal: p.Terminal,
        console_size,
        user,
@@ -142,15 +124,15 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    }
 }

-fn root_grpc_to_oci(root: &grpcRoot) -> ociRoot {
-    ociRoot {
+fn root_grpc_to_oci(root: &grpc::Root) -> oci::Root {
+    oci::Root {
        path: root.Path.clone(),
        readonly: root.Readonly,
    }
 }

-fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
-    ociMount {
+fn mount_grpc_to_oci(m: &grpc::Mount) -> oci::Mount {
+    oci::Mount {
        destination: m.destination.clone(),
        r#type: m.field_type.clone(),
        source: m.source.clone(),
@@ -158,13 +140,12 @@ fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
    }
 }

-use oci::Hook as ociHook;
 use protocols::oci::Hook as grpcHook;

-fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
+fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
    let mut r = Vec::new();
    for e in h.iter() {
-        r.push(ociHook {
+        r.push(oci::Hook {
            path: e.Path.clone(),
            args: e.Args.clone().into_vec(),
            env: e.Env.clone().into_vec(),
@@ -174,39 +155,29 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
    r
 }

-fn hooks_grpc_to_oci(h: &grpcHooks) -> ociHooks {
+fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
    let prestart = hook_grpc_to_oci(h.Prestart.as_ref());

    let poststart = hook_grpc_to_oci(h.Poststart.as_ref());

    let poststop = hook_grpc_to_oci(h.Poststop.as_ref());

-    ociHooks {
+    oci::Hooks {
        prestart,
        poststart,
        poststop,
    }
 }

-use oci::{
-    LinuxDevice as ociLinuxDevice, LinuxIDMapping as ociLinuxIDMapping,
-    LinuxIntelRdt as ociLinuxIntelRdt, LinuxNamespace as ociLinuxNamespace,
-    LinuxResources as ociLinuxResources, LinuxSeccomp as ociLinuxSeccomp,
-};
-use protocols::oci::{
-    LinuxIDMapping as grpcLinuxIDMapping, LinuxResources as grpcLinuxResources,
-    LinuxSeccomp as grpcLinuxSeccomp,
-};
-
-fn idmap_grpc_to_oci(im: &grpcLinuxIDMapping) -> ociLinuxIDMapping {
-    ociLinuxIDMapping {
+fn idmap_grpc_to_oci(im: &grpc::LinuxIDMapping) -> oci::LinuxIdMapping {
+    oci::LinuxIdMapping {
        container_id: im.ContainerID,
        host_id: im.HostID,
        size: im.Size,
    }
 }

-fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
+fn idmaps_grpc_to_oci(ims: &[grpc::LinuxIDMapping]) -> Vec<oci::LinuxIdMapping> {
    let mut r = Vec::new();
    for im in ims.iter() {
        r.push(idmap_grpc_to_oci(im));
@@ -214,24 +185,13 @@ fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
    r
 }

-use oci::{
-    LinuxBlockIO as ociLinuxBlockIO, LinuxBlockIODevice as ociLinuxBlockIODevice,
-    LinuxCPU as ociLinuxCPU, LinuxDeviceCgroup as ociLinuxDeviceCgroup,
-    LinuxHugepageLimit as ociLinuxHugepageLimit,
-    LinuxInterfacePriority as ociLinuxInterfacePriority, LinuxMemory as ociLinuxMemory,
-    LinuxNetwork as ociLinuxNetwork, LinuxPids as ociLinuxPids,
-    LinuxThrottleDevice as ociLinuxThrottleDevice, LinuxWeightDevice as ociLinuxWeightDevice,
-};
-use protocols::oci::{
-    LinuxBlockIO as grpcLinuxBlockIO, LinuxThrottleDevice as grpcLinuxThrottleDevice,
-    LinuxWeightDevice as grpcLinuxWeightDevice,
-};
-
-fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinuxThrottleDevice> {
+fn throttle_devices_grpc_to_oci(
+    tds: &[grpc::LinuxThrottleDevice],
+) -> Vec<oci::LinuxThrottleDevice> {
    let mut r = Vec::new();
    for td in tds.iter() {
-        r.push(ociLinuxThrottleDevice {
-            blk: ociLinuxBlockIODevice {
+        r.push(oci::LinuxThrottleDevice {
+            blk: oci::LinuxBlockIoDevice {
                major: td.Major,
                minor: td.Minor,
            },
@@ -241,11 +201,11 @@ fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinux
    r
 }

-fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeightDevice> {
+fn weight_devices_grpc_to_oci(wds: &[grpc::LinuxWeightDevice]) -> Vec<oci::LinuxWeightDevice> {
    let mut r = Vec::new();
    for wd in wds.iter() {
-        r.push(ociLinuxWeightDevice {
-            blk: ociLinuxBlockIODevice {
+        r.push(oci::LinuxWeightDevice {
+            blk: oci::LinuxBlockIoDevice {
                major: wd.Major,
                minor: wd.Minor,
            },
@@ -256,7 +216,7 @@ fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeig
    r
 }

-fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
+fn blockio_grpc_to_oci(blk: &grpc::LinuxBlockIO) -> oci::LinuxBlockIo {
    let weight_device = weight_devices_grpc_to_oci(blk.WeightDevice.as_ref());
    let throttle_read_bps_device = throttle_devices_grpc_to_oci(blk.ThrottleReadBpsDevice.as_ref());
    let throttle_write_bps_device =
@@ -266,7 +226,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
    let throttle_write_iops_device =
        throttle_devices_grpc_to_oci(blk.ThrottleWriteIOPSDevice.as_ref());

-    ociLinuxBlockIO {
+    oci::LinuxBlockIo {
        weight: Some(blk.Weight as u16),
        leaf_weight: Some(blk.LeafWeight as u16),
        weight_device,
@@ -277,7 +237,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
    }
 }

-pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
+pub fn resources_grpc_to_oci(res: &grpc::LinuxResources) -> oci::LinuxResources {
    let devices = {
        let mut d = Vec::new();
        for dev in res.Devices.iter() {
@@ -292,7 +252,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
            } else {
                Some(dev.Minor)
            };
-            d.push(ociLinuxDeviceCgroup {
+            d.push(oci::LinuxDeviceCgroup {
                allow: dev.Allow,
                r#type: dev.Type.clone(),
                major,
@@ -305,7 +265,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let memory = if res.Memory.is_some() {
        let mem = res.Memory.as_ref().unwrap();
-        Some(ociLinuxMemory {
+        Some(oci::LinuxMemory {
            limit: Some(mem.Limit),
            reservation: Some(mem.Reservation),
            swap: Some(mem.Swap),
@@ -320,7 +280,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let cpu = if res.CPU.is_some() {
        let c = res.CPU.as_ref().unwrap();
-        Some(ociLinuxCPU {
+        Some(oci::LinuxCpu {
            shares: Some(c.Shares),
            quota: Some(c.Quota),
            period: Some(c.Period),
@@ -335,7 +295,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let pids = if res.Pids.is_some() {
        let p = res.Pids.as_ref().unwrap();
-        Some(ociLinuxPids { limit: p.Limit })
+        Some(oci::LinuxPids { limit: p.Limit })
    } else {
        None
    };
@@ -351,7 +311,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
    let hugepage_limits = {
        let mut r = Vec::new();
        for hl in res.HugepageLimits.iter() {
-            r.push(ociLinuxHugepageLimit {
+            r.push(oci::LinuxHugepageLimit {
                page_size: hl.Pagesize.clone(),
                limit: hl.Limit,
            });
@@ -364,14 +324,14 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
        let priorities = {
            let mut r = Vec::new();
            for pr in net.Priorities.iter() {
-                r.push(ociLinuxInterfacePriority {
+                r.push(oci::LinuxInterfacePriority {
                    name: pr.Name.clone(),
                    priority: pr.Priority,
                });
            }
            r
        };
-        Some(ociLinuxNetwork {
+        Some(oci::LinuxNetwork {
            class_id: Some(net.ClassID),
            priorities,
        })
@@ -379,7 +339,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
        None
    };

-    ociLinuxResources {
+    oci::LinuxResources {
        devices,
        memory,
        cpu,
@@ -391,17 +351,22 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
    }
 }

-use oci::{LinuxSeccompArg as ociLinuxSeccompArg, LinuxSyscall as ociLinuxSyscall};
-
-fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
+fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp {
    let syscalls = {
        let mut r = Vec::new();

        for sys in sec.Syscalls.iter() {
            let mut args = Vec::new();
+            let errno_ret: u32;
+
+            if sys.has_errnoret() {
+                errno_ret = sys.get_errnoret();
+            } else {
+                errno_ret = libc::EPERM as u32;
+            }

            for arg in sys.Args.iter() {
-                args.push(ociLinuxSeccompArg {
+                args.push(oci::LinuxSeccompArg {
                    index: arg.Index as u32,
                    value: arg.Value,
                    value_two: arg.ValueTwo,
@@ -409,23 +374,25 @@ fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
                });
            }

-            r.push(ociLinuxSyscall {
+            r.push(oci::LinuxSyscall {
                names: sys.Names.clone().into_vec(),
                action: sys.Action.clone(),
+                errno_ret,
                args,
            });
        }
        r
    };

-    ociLinuxSeccomp {
+    oci::LinuxSeccomp {
        default_action: sec.DefaultAction.clone(),
        architectures: sec.Architectures.clone().into_vec(),
+        flags: sec.Flags.clone().into_vec(),
        syscalls,
    }
 }

-fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
+fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux {
    let uid_mappings = idmaps_grpc_to_oci(l.UIDMappings.as_ref());
    let gid_mappings = idmaps_grpc_to_oci(l.GIDMappings.as_ref());

@@ -445,7 +412,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
        let mut r = Vec::new();

        for ns in l.Namespaces.iter() {
-            r.push(ociLinuxNamespace {
+            r.push(oci::LinuxNamespace {
                r#type: ns.Type.clone(),
                path: ns.Path.clone(),
            });
@@ -457,7 +424,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
        let mut r = Vec::new();

        for d in l.Devices.iter() {
-            r.push(ociLinuxDevice {
+            r.push(oci::LinuxDevice {
                path: d.Path.clone(),
                r#type: d.Type.clone(),
                major: d.Major,
@@ -473,14 +440,14 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
    let intel_rdt = if l.IntelRdt.is_some() {
        let rdt = l.IntelRdt.as_ref().unwrap();

-        Some(ociLinuxIntelRdt {
+        Some(oci::LinuxIntelRdt {
            l3_cache_schema: rdt.L3CacheSchema.clone(),
        })
    } else {
        None
    };

-    ociLinux {
+    oci::Linux {
        uid_mappings,
        gid_mappings,
        sysctl: l.Sysctl.clone(),
@@ -497,11 +464,11 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
    }
 }

-fn linux_oci_to_grpc(_l: &ociLinux) -> grpcLinux {
-    grpcLinux::default()
+fn linux_oci_to_grpc(_l: &oci::Linux) -> grpc::Linux {
+    grpc::Linux::default()
 }

-pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
+pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
    // process
    let process = if grpc.Process.is_some() {
        Some(process_grpc_to_oci(grpc.Process.as_ref().unwrap()))
@@ -539,7 +506,7 @@ pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
        None
    };

-    ociSpec {
+    oci::Spec {
        version: grpc.Version.clone(),
        process,
        root,
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -52,10 +52,12 @@ const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
 const PROC_PATH: &str = "/proc";

 // since libc didn't defined this const for musl, thus redefined it here.
-#[cfg(all(target_os = "linux", target_env = "gnu"))]
+#[cfg(all(target_os = "linux", target_env = "gnu", not(target_arch = "s390x")))]
 const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
 #[cfg(all(target_os = "linux", target_env = "musl"))]
 const PROC_SUPER_MAGIC: libc::c_ulong = 0x00009fa0;
+#[cfg(all(target_os = "linux", target_env = "gnu", target_arch = "s390x"))]
+const PROC_SUPER_MAGIC: libc::c_uint = 0x00009fa0;

 lazy_static! {
    static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
@@ -66,6 +68,8 @@ lazy_static! {
        m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
        m.insert("slave", MsFlags::MS_SLAVE);
        m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
+        m.insert("unbindable", MsFlags::MS_UNBINDABLE);
+        m.insert("runbindable", MsFlags::MS_UNBINDABLE | MsFlags::MS_REC);
        m
    };
    static ref OPTIONS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -91,17 +95,6 @@ lazy_static! {
        m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME));
        m.insert("bind", (false, MsFlags::MS_BIND));
        m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC));
-        m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE));
-        m.insert(
-            "runbindable",
-            (false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC),
-        );
-        m.insert("private", (false, MsFlags::MS_PRIVATE));
-        m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC));
-        m.insert("shared", (false, MsFlags::MS_SHARED));
-        m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC));
-        m.insert("slave", (false, MsFlags::MS_SLAVE));
-        m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC));
        m.insert("relatime", (false, MsFlags::MS_RELATIME));
        m.insert("norelatime", (true, MsFlags::MS_RELATIME));
        m.insert("strictatime", (false, MsFlags::MS_STRICTATIME));
@@ -190,7 +183,7 @@ pub fn init_rootfs(

    let mut bind_mount_dev = false;
    for m in &spec.mounts {
-        let (mut flags, data) = parse_mount(&m);
+        let (mut flags, pgflags, data) = parse_mount(&m);
        if !m.destination.starts_with('/') || m.destination.contains("..") {
            return Err(anyhow!(
                "the mount destination {} is invalid",
@@ -232,13 +225,15 @@ pub fn init_rootfs(
            // effective.
            // first check that we have non-default options required before attempting a
            // remount
-            if m.r#type == "bind" {
-                for o in &m.options {
-                    if let Some(fl) = PROPAGATION.get(o.as_str()) {
-                        let dest = secure_join(rootfs, &m.destination);
-                        mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
-                    }
-                }
+            if m.r#type == "bind" && !pgflags.is_empty() {
+                let dest = secure_join(rootfs, &m.destination);
+                mount(
+                    None::<&str>,
+                    dest.as_str(),
+                    None::<&str>,
+                    pgflags,
+                    None::<&str>,
+                )?;
            }
        }
    }
@@ -655,26 +650,27 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
    Ok(true)
 }

-fn parse_mount(m: &Mount) -> (MsFlags, String) {
+fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
    let mut flags = MsFlags::empty();
+    let mut pgflags = MsFlags::empty();
    let mut data = Vec::new();

    for o in &m.options {
-        match OPTIONS.get(o.as_str()) {
-            Some(v) => {
-                let (clear, fl) = *v;
-                if clear {
-                    flags &= !fl;
-                } else {
-                    flags |= fl;
-                }
+        if let Some(v) = OPTIONS.get(o.as_str()) {
+            let (clear, fl) = *v;
+            if clear {
+                flags &= !fl;
+            } else {
+                flags |= fl;
            }
-
-            None => data.push(o.clone()),
+        } else if let Some(fl) = PROPAGATION.get(o.as_str()) {
+            pgflags |= *fl;
+        } else {
+            data.push(o.clone());
        }
    }

-    (flags, data.join(","))
+    (flags, pgflags, data.join(","))
 }

 // This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
@@ -920,7 +916,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {

    for m in spec.mounts.iter() {
        if m.destination == "/dev" {
-            let (flags, _) = parse_mount(m);
+            let (flags, _, _) = parse_mount(m);
            if flags.contains(MsFlags::MS_RDONLY) {
                mount(
                    Some("/dev"),
@@ -1365,7 +1361,7 @@ mod tests {
            let msg = format!("{}, result: {:?}", msg, result);

            // Perform the checks
-            assert!(result == t.result, msg);
+            assert!(result == t.result, "{}", msg);
        }
    }
 }
--- a/src/agent/rustjail/src/pipestream.rs
+++ b/src/agent/rustjail/src/pipestream.rs
@@ -77,10 +77,6 @@ impl PipeStream {
        Ok(Self(AsyncFd::new(StreamFd(fd))?))
    }

-    pub fn shutdown(&mut self) -> io::Result<()> {
-        self.0.get_mut().close()
-    }
-
    pub fn from_fd(fd: RawFd) -> Self {
        unsafe { Self::from_raw_fd(fd) }
    }
@@ -164,7 +160,44 @@ impl AsyncWrite for PipeStream {
    }

    fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
-        self.get_mut().shutdown()?;
+        // Do nothing in shutdown is very important
+        // The only right way to shutdown pipe is drop it
+        // Otherwise PipeStream will conflict with its twins
+        // Because they both have same fd, and both registered.
        Poll::Ready(Ok(()))
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nix::fcntl::OFlag;
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    #[tokio::test]
+    // Shutdown should never close the inner fd.
+    async fn test_pipestream_shutdown() {
+        let (_, wfd1) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
+        let mut writer1 = PipeStream::new(wfd1).unwrap();
+
+        // if close fd in shutdown, the fd will be reused
+        // and the test will failed
+        let _ = writer1.shutdown().await.unwrap();
+
+        // let _ = unistd::close(wfd1);
+
+        let (rfd2, wfd2) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); // reuse fd number, rfd2 == wfd1
+
+        let mut reader2 = PipeStream::new(rfd2).unwrap();
+        let mut writer2 = PipeStream::new(wfd2).unwrap();
+
+        // deregister writer1, then reader2 which has the same fd will be deregistered from epoll
+        drop(writer1);
+
+        let _ = writer2.write(b"1").await;
+
+        let mut content = vec![0u8; 1];
+        // Will Block here if shutdown close the fd.
+        let _ = reader2.read(&mut content).await;
+    }
+}
--- a/src/agent/rustjail/src/process.rs
+++ b/src/agent/rustjail/src/process.rs
@@ -29,7 +29,6 @@ pub enum StreamType {
    Stdin,
    Stdout,
    Stderr,
-    ExitPipeR,
    TermMaster,
    ParentStdin,
    ParentStdout,
@@ -45,8 +44,8 @@ pub struct Process {
    pub stdin: Option<RawFd>,
    pub stdout: Option<RawFd>,
    pub stderr: Option<RawFd>,
-    pub exit_pipe_r: Option<RawFd>,
-    pub exit_pipe_w: Option<RawFd>,
+    pub exit_tx: Option<tokio::sync::watch::Sender<bool>>,
+    pub exit_rx: Option<tokio::sync::watch::Receiver<bool>>,
    pub extra_files: Vec<File>,
    pub term_master: Option<RawFd>,
    pub tty: bool,
@@ -97,14 +96,15 @@ impl Process {
        pipe_size: i32,
    ) -> Result<Self> {
        let logger = logger.new(o!("subsystem" => "process"));
+        let (exit_tx, exit_rx) = tokio::sync::watch::channel(false);

        let mut p = Process {
            exec_id: String::from(id),
            stdin: None,
            stdout: None,
            stderr: None,
-            exit_pipe_w: None,
-            exit_pipe_r: None,
+            exit_tx: Some(exit_tx),
+            exit_rx: Some(exit_rx),
            extra_files: Vec::new(),
            tty: ocip.terminal,
            term_master: None,
@@ -152,7 +152,6 @@ impl Process {
            StreamType::Stdin => self.stdin,
            StreamType::Stdout => self.stdout,
            StreamType::Stderr => self.stderr,
-            StreamType::ExitPipeR => self.exit_pipe_r,
            StreamType::TermMaster => self.term_master,
            StreamType::ParentStdin => self.parent_stdin,
            StreamType::ParentStdout => self.parent_stdout,
--- a/src/agent/rustjail/src/sync_with_async.rs
+++ b/src/agent/rustjail/src/sync_with_async.rs
@@ -117,28 +117,20 @@ pub async fn write_async(pipe_w: &mut PipeStream, msg_type: i32, data_str: &str)
    }

    match msg_type {
-        SYNC_FAILED => match write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
-            Ok(_) => pipe_w.shutdown()?,
-            Err(e) => {
-                pipe_w.shutdown()?;
+        SYNC_FAILED => {
+            if let Err(e) = write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
                return Err(anyhow!(e).context("error in send message to process"));
            }
-        },
+        }
        SYNC_DATA => {
            let length: i32 = data_str.len() as i32;
            write_count(pipe_w, &length.to_be_bytes(), MSG_SIZE)
                .await
-                .or_else(|e| {
-                    pipe_w.shutdown()?;
-                    Err(anyhow!(e).context("error in send message to process"))
-                })?;
+                .map_err(|e| anyhow!(e).context("error in send message to process"))?;

            write_count(pipe_w, data_str.as_bytes(), data_str.len())
                .await
-                .or_else(|e| {
-                    pipe_w.shutdown()?;
-                    Err(anyhow!(e).context("error in send message to process"))
-                })?;
+                .map_err(|e| anyhow!(e).context("error in send message to process"))?;
        }

        _ => (),
--- a/src/agent/rustjail/src/validator.rs
+++ b/src/agent/rustjail/src/validator.rs
@@ -6,7 +6,7 @@
 use crate::container::Config;
 use anyhow::{anyhow, Context, Error, Result};
 use nix::errno::Errno;
-use oci::{Linux, LinuxIDMapping, LinuxNamespace, Spec};
+use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
 use std::collections::HashMap;
 use std::path::{Component, PathBuf};

@@ -28,16 +28,6 @@ fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool {
    false
 }

-fn get_namespace_path(nses: &[LinuxNamespace], key: &str) -> Result<String> {
-    for ns in nses {
-        if ns.r#type.as_str() == key {
-            return Ok(ns.path.clone());
-        }
-    }
-
-    Err(einval())
-}
-
 fn rootfs(root: &str) -> Result<()> {
    let path = PathBuf::from(root);
    // not absolute path or not exists
@@ -107,7 +97,7 @@ fn security(oci: &Spec) -> Result<()> {
    Ok(())
 }

-fn idmapping(maps: &[LinuxIDMapping]) -> Result<()> {
+fn idmapping(maps: &[LinuxIdMapping]) -> Result<()> {
    for map in maps {
        if map.size > 0 {
            return Ok(());
@@ -166,31 +156,6 @@ lazy_static! {
    };
 }

-fn check_host_ns(path: &str) -> Result<()> {
-    let cpath = PathBuf::from(path);
-    let hpath = PathBuf::from("/proc/self/ns/net");
-
-    let real_hpath = hpath
-        .read_link()
-        .context(format!("read link {:?}", hpath))?;
-    let meta = cpath
-        .symlink_metadata()
-        .context(format!("symlink metadata {:?}", cpath))?;
-    let file_type = meta.file_type();
-
-    if !file_type.is_symlink() {
-        return Ok(());
-    }
-    let real_cpath = cpath
-        .read_link()
-        .context(format!("read link {:?}", cpath))?;
-    if real_cpath == real_hpath {
-        return Err(einval());
-    }
-
-    Ok(())
-}
-
 fn sysctl(oci: &Spec) -> Result<()> {
    let linux = get_linux(oci)?;

@@ -238,7 +203,7 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
    Ok(())
 }

-fn has_idmapping(maps: &[LinuxIDMapping], id: u32) -> bool {
+fn has_idmapping(maps: &[LinuxIdMapping], id: u32) -> bool {
    for map in maps {
        if id >= map.container_id && id < map.container_id + map.size {
            return true;
@@ -334,19 +299,6 @@ mod tests {
        assert_eq!(contain_namespace(&namespaces, ""), false);
        assert_eq!(contain_namespace(&namespaces, "Net"), false);
        assert_eq!(contain_namespace(&namespaces, "ipc"), false);
-
-        assert_eq!(
-            get_namespace_path(&namespaces, "net").unwrap(),
-            "/sys/cgroups/net"
-        );
-        assert_eq!(
-            get_namespace_path(&namespaces, "uts").unwrap(),
-            "/sys/cgroups/uts"
-        );
-
-        get_namespace_path(&namespaces, "").unwrap_err();
-        get_namespace_path(&namespaces, "Uts").unwrap_err();
-        get_namespace_path(&namespaces, "ipc").unwrap_err();
    }

    #[test]
@@ -441,7 +393,7 @@ mod tests {
        usernamespace(&spec).unwrap();

        let mut linux = Linux::default();
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 0,
@@ -450,7 +402,7 @@ mod tests {
        usernamespace(&spec).unwrap_err();

        let mut linux = Linux::default();
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 100,
@@ -497,12 +449,12 @@ mod tests {
                path: "/sys/cgroups/user".to_owned(),
            },
        ];
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 1000,
        }];
-        linux.gid_mappings = vec![LinuxIDMapping {
+        linux.gid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 1000,
@@ -528,12 +480,6 @@ mod tests {
        rootless_euid(&spec).unwrap();
    }

-    #[test]
-    fn test_check_host_ns() {
-        check_host_ns("/proc/self/ns/net").unwrap_err();
-        check_host_ns("/proc/sys/net/ipv4/tcp_sack").unwrap();
-    }
-
    #[test]
    fn test_sysctl() {
        let mut spec = Spec::default();
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
@@ -2,13 +2,16 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 //
+use crate::tracer;
 use anyhow::{anyhow, Result};
 use std::env;
 use std::fs;
 use std::time;
+use tracing::instrument;

 const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
 const DEV_MODE_FLAG: &str = "agent.devmode";
+const TRACE_MODE_OPTION: &str = "agent.trace";
 const LOG_LEVEL_OPTION: &str = "agent.log";
 const SERVER_ADDR_OPTION: &str = "agent.server_addr";
 const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
@@ -26,6 +29,7 @@ const VSOCK_PORT: u16 = 1024;
 // Environment variables used for development and testing
 const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
 const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";
+const TRACE_TYPE_ENV_VAR: &str = "KATA_AGENT_TRACE_TYPE";

 const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
 const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
@@ -54,6 +58,7 @@ pub struct AgentConfig {
    pub container_pipe_size: i32,
    pub server_addr: String,
    pub unified_cgroup_hierarchy: bool,
+    pub tracing: tracer::TraceType,
 }

 // parse_cmdline_param parse commandline parameters.
@@ -98,9 +103,11 @@ impl AgentConfig {
            container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
            server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
            unified_cgroup_hierarchy: false,
+            tracing: tracer::TraceType::Disabled,
        }
    }

+    #[instrument]
    pub fn parse_cmdline(&mut self, file: &str) -> Result<()> {
        let cmdline = fs::read_to_string(file)?;
        let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
@@ -109,6 +116,15 @@ impl AgentConfig {
            parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
            parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);

+            // Support "bare" tracing option for backwards compatibility with
+            // Kata 1.x.
+            if param == &TRACE_MODE_OPTION {
+                self.tracing = tracer::TraceType::Isolated;
+                continue;
+            }
+
+            parse_cmdline_param!(param, TRACE_MODE_OPTION, self.tracing, get_trace_type);
+
            // parse cmdline options
            parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
            parse_cmdline_param!(
@@ -167,10 +183,17 @@ impl AgentConfig {
            }
        }

+        if let Ok(value) = env::var(TRACE_TYPE_ENV_VAR) {
+            if let Ok(result) = value.parse::<tracer::TraceType>() {
+                self.tracing = result;
+            }
+        }
+
        Ok(())
    }
 }

+#[instrument]
 fn get_vsock_port(p: &str) -> Result<i32> {
    let fields: Vec<&str> = p.split('=').collect();
    if fields.len() != 2 {
@@ -185,6 +208,7 @@ fn get_vsock_port(p: &str) -> Result<i32> {
 //
 // Note: Logrus names are used for compatability with the previous
 // golang-based agent.
+#[instrument]
 fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
    let level = match logrus_level {
        // Note: different semantics to logrus: log, but don't panic.
@@ -207,6 +231,7 @@ fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
    Ok(level)
 }

+#[instrument]
 fn get_log_level(param: &str) -> Result<slog::Level> {
    let fields: Vec<&str> = param.split('=').collect();

@@ -221,6 +246,28 @@ fn get_log_level(param: &str) -> Result<slog::Level> {
    }
 }

+#[instrument]
+fn get_trace_type(param: &str) -> Result<tracer::TraceType> {
+    if param.is_empty() {
+        return Err(anyhow!("invalid trace type parameter"));
+    }
+
+    let fields: Vec<&str> = param.split('=').collect();
+
+    if fields[0] != TRACE_MODE_OPTION {
+        return Err(anyhow!("invalid trace type key name"));
+    }
+
+    if fields.len() == 1 {
+        return Ok(tracer::TraceType::Isolated);
+    }
+
+    let result = fields[1].parse::<tracer::TraceType>()?;
+
+    Ok(result)
+}
+
+#[instrument]
 fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
    let fields: Vec<&str> = param.split('=').collect();

@@ -241,6 +288,7 @@ fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
    Ok(time::Duration::from_secs(value.unwrap()))
 }

+#[instrument]
 fn get_bool_value(param: &str) -> Result<bool> {
    let fields: Vec<&str> = param.split('=').collect();

@@ -265,6 +313,7 @@ fn get_bool_value(param: &str) -> Result<bool> {
 // - A value can contain any number of equal signs.
 // - We could/should maybe check if the name is pure whitespace
 //   since this is considered to be invalid.
+#[instrument]
 fn get_string_value(param: &str) -> Result<String> {
    let fields: Vec<&str> = param.split('=').collect();

@@ -273,18 +322,19 @@ fn get_string_value(param: &str) -> Result<String> {
    }

    // We need name (but the value can be blank)
-    if fields[0] == "" {
+    if fields[0].is_empty() {
        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_NAME));
    }

    let value = fields[1..].join("=");
-    if value == "" {
+    if value.is_empty() {
        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_VALUE));
    }

    Ok(value)
 }

+#[instrument]
 fn get_container_pipe_size(param: &str) -> Result<i32> {
    let fields: Vec<&str> = param.split('=').collect();

@@ -319,6 +369,10 @@ mod tests {
    use std::time;
    use tempfile::tempdir;

+    const ERR_INVALID_TRACE_TYPE_PARAM: &str = "invalid trace type parameter";
+    const ERR_INVALID_TRACE_TYPE: &str = "invalid trace type";
+    const ERR_INVALID_TRACE_TYPE_KEY: &str = "invalid trace type key name";
+
    // helper function to make errors less crazy-long
    fn make_err(desc: &str) -> Error {
        anyhow!(desc.to_string())
@@ -334,7 +388,7 @@ mod tests {
            if $expected_result.is_ok() {
                let expected_level = $expected_result.as_ref().unwrap();
                let actual_level = $actual_result.unwrap();
-                assert!(*expected_level == actual_level, $msg);
+                assert!(*expected_level == actual_level, "{}", $msg);
            } else {
                let expected_error = $expected_result.as_ref().unwrap_err();
                let expected_error_msg = format!("{:?}", expected_error);
@@ -342,9 +396,9 @@ mod tests {
                if let Err(actual_error) = $actual_result {
                    let actual_error_msg = format!("{:?}", actual_error);

-                    assert!(expected_error_msg == actual_error_msg, $msg);
+                    assert!(expected_error_msg == actual_error_msg, "{}", $msg);
                } else {
-                    assert!(expected_error_msg == "expected error, got OK", $msg);
+                    assert!(expected_error_msg == "expected error, got OK", "{}", $msg);
                }
            }
        };
@@ -374,6 +428,7 @@ mod tests {
            container_pipe_size: i32,
            server_addr: &'a str,
            unified_cgroup_hierarchy: bool,
+            tracing: tracer::TraceType,
        }

        let tests = &[
@@ -387,6 +442,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.debug_console agent.devmodex",
@@ -398,6 +454,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.logx=debug",
@@ -409,6 +466,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.log=debug",
@@ -420,6 +478,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.log=debug",
@@ -431,6 +490,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -442,6 +502,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo",
@@ -453,6 +514,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo bar",
@@ -464,6 +526,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo bar",
@@ -475,6 +538,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent bar",
@@ -486,6 +550,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo debug_console agent bar devmode",
@@ -497,6 +562,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.debug_console",
@@ -508,6 +574,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "   agent.debug_console ",
@@ -519,6 +586,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.debug_console foo",
@@ -530,6 +598,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: " agent.debug_console foo",
@@ -541,6 +610,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.debug_console bar",
@@ -552,6 +622,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.debug_console",
@@ -563,6 +634,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.debug_console ",
@@ -574,6 +646,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode",
@@ -585,6 +658,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "   agent.devmode ",
@@ -596,6 +670,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode foo",
@@ -607,6 +682,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: " agent.devmode foo",
@@ -618,6 +694,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.devmode bar",
@@ -629,6 +706,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.devmode",
@@ -640,6 +718,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.devmode ",
@@ -651,6 +730,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console",
@@ -662,6 +742,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.hotplug_timeout=100 agent.unified_cgroup_hierarchy=a",
@@ -673,6 +754,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.hotplug_timeout=0 agent.unified_cgroup_hierarchy=11",
@@ -684,6 +766,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: true,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pipe_size=2097152 agent.unified_cgroup_hierarchy=false",
@@ -695,6 +778,7 @@ mod tests {
                container_pipe_size: 2097152,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pipe_size=100 agent.unified_cgroup_hierarchy=true",
@@ -706,6 +790,7 @@ mod tests {
                container_pipe_size: 100,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: true,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pipe_size=0 agent.unified_cgroup_hierarchy=0",
@@ -717,6 +802,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pip_siz=100 agent.unified_cgroup_hierarchy=1",
@@ -728,6 +814,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: true,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -739,6 +826,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -750,6 +838,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "foo",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -761,6 +850,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "=",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -772,6 +862,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "=foo",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -783,6 +874,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "foo=bar=baz=",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -794,6 +886,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix:///tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -805,6 +898,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix://@/tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -816,6 +910,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -827,6 +922,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -838,6 +934,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -849,6 +946,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "server_addr=unix:///tmp/foo.socket",
@@ -860,6 +958,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.server_address=unix:///tmp/foo.socket",
@@ -871,6 +970,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.server_addr=unix:///tmp/foo.socket",
@@ -882,6 +982,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix:///tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: " agent.server_addr=unix:///tmp/foo.socket",
@@ -893,6 +994,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix:///tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: " agent.server_addr=unix:///tmp/foo.socket a",
@@ -904,6 +1006,115 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix:///tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "trace",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: ".trace",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.tracer",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.trac",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.trace",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Isolated,
+            },
+            TestData {
+                contents: "agent.trace=isolated",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Isolated,
+            },
+            TestData {
+                contents: "agent.trace=disabled",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "",
+                env_vars: vec!["KATA_AGENT_TRACE_TYPE=isolated"],
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Isolated,
+            },
+            TestData {
+                contents: "",
+                env_vars: vec!["KATA_AGENT_TRACE_TYPE=disabled"],
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
        ];

@@ -958,6 +1169,7 @@ mod tests {
            );
            assert_eq!(config.container_pipe_size, 0, "{}", msg);
            assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
+            assert_eq!(config.tracing, tracer::TraceType::Disabled, "{}", msg);

            let result = config.parse_cmdline(filename);
            assert!(result.is_ok(), "{}", msg);
@@ -973,6 +1185,7 @@ mod tests {
            assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{}", msg);
            assert_eq!(d.container_pipe_size, config.container_pipe_size, "{}", msg);
            assert_eq!(d.server_addr, config.server_addr, "{}", msg);
+            assert_eq!(d.tracing, config.tracing, "{}", msg);

            for v in vars_to_unset {
                env::remove_var(v);
@@ -1369,4 +1582,62 @@ mod tests {
            assert_result!(d.result, result, msg);
        }
    }
+
+    #[test]
+    fn test_get_trace_type() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            param: &'a str,
+            result: Result<tracer::TraceType>,
+        }
+
+        let tests = &[
+            TestData {
+                param: "",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE_PARAM)),
+            },
+            TestData {
+                param: "agent.tracer",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE_KEY)),
+            },
+            TestData {
+                param: "agent.trac",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE_KEY)),
+            },
+            TestData {
+                param: "agent.trace=",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
+            },
+            TestData {
+                param: "agent.trace==",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
+            },
+            TestData {
+                param: "agent.trace=foo",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
+            },
+            TestData {
+                param: "agent.trace",
+                result: Ok(tracer::TraceType::Isolated),
+            },
+            TestData {
+                param: "agent.trace=isolated",
+                result: Ok(tracer::TraceType::Isolated),
+            },
+            TestData {
+                param: "agent.trace=disabled",
+                result: Ok(tracer::TraceType::Disabled),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = get_trace_type(d.param);
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            assert_result!(d.result, result, msg);
+        }
+    }
 }
--- a/src/agent/src/console.rs
+++ b/src/agent/src/console.rs
@@ -0,0 +1,295 @@
+// Copyright (c) 2021 Ant Group
+// Copyright (c) 2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::util;
+use anyhow::{anyhow, Result};
+use nix::fcntl::{self, FcntlArg, FdFlag, OFlag};
+use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
+use nix::pty::{openpty, OpenptyResult};
+use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
+use nix::sys::stat::Mode;
+use nix::sys::wait;
+use nix::unistd::{self, close, dup2, fork, setsid, ForkResult, Pid};
+use rustjail::pipestream::PipeStream;
+use slog::Logger;
+use std::ffi::CString;
+use std::os::unix::io::{FromRawFd, RawFd};
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::sync::Mutex as SyncMutex;
+
+use futures::StreamExt;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::select;
+use tokio::sync::watch::Receiver;
+
+const CONSOLE_PATH: &str = "/dev/console";
+
+lazy_static! {
+    static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
+        let mut v = Vec::new();
+
+        if !cfg!(test) {
+            v.push("/bin/bash".to_string());
+            v.push("/bin/sh".to_string());
+        }
+
+        Arc::new(SyncMutex::new(v))
+    };
+}
+
+pub fn initialize() {
+    lazy_static::initialize(&SHELLS);
+}
+
+pub async fn debug_console_handler(
+    logger: Logger,
+    port: u32,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "debug-console"));
+
+    let shells = SHELLS.lock().unwrap().to_vec();
+
+    let shell = shells
+        .into_iter()
+        .find(|sh| PathBuf::from(sh).exists())
+        .ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
+
+    if port > 0 {
+        let listenfd = socket::socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )?;
+        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
+        socket::bind(listenfd, &addr)?;
+        socket::listen(listenfd, 1)?;
+
+        let mut incoming = util::get_vsock_incoming(listenfd);
+
+        loop {
+            select! {
+                _ = shutdown.changed() => {
+                    info!(logger, "debug console got shutdown request");
+                    break;
+                }
+
+                conn = incoming.next() => {
+                    if let Some(conn) = conn {
+                        // Accept a new connection
+                        match conn {
+                            Ok(stream) => {
+                                let logger = logger.clone();
+                                let shell = shell.clone();
+                                // Do not block(await) here, or we'll never receive the shutdown signal
+                                tokio::spawn(async move {
+                                    let _ = run_debug_console_vsock(logger, shell, stream).await;
+                                });
+                            }
+                            Err(e) => {
+                                error!(logger, "{:?}", e);
+                            }
+                        }
+                    } else {
+                        break;
+                    }
+                }
+            }
+        }
+    } else {
+        let mut flags = OFlag::empty();
+        flags.insert(OFlag::O_RDWR);
+        flags.insert(OFlag::O_CLOEXEC);
+
+        let fd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
+
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "debug console got shutdown request");
+            }
+
+            result = run_debug_console_serial(shell.clone(), fd) => {
+               match result {
+                   Ok(_) => {
+                       info!(logger, "run_debug_console_shell session finished");
+                   }
+                   Err(err) => {
+                       error!(logger, "run_debug_console_shell failed: {:?}", err);
+                   }
+               }
+            }
+        }
+    };
+
+    Ok(())
+}
+
+fn run_in_child(slave_fd: libc::c_int, shell: String) -> Result<()> {
+    // create new session with child as session leader
+    setsid()?;
+
+    // dup stdin, stdout, stderr to let child act as a terminal
+    dup2(slave_fd, STDIN_FILENO)?;
+    dup2(slave_fd, STDOUT_FILENO)?;
+    dup2(slave_fd, STDERR_FILENO)?;
+
+    // set tty
+    unsafe {
+        libc::ioctl(0, libc::TIOCSCTTY);
+    }
+
+    let cmd = CString::new(shell).unwrap();
+    let args: Vec<CString> = Vec::new();
+
+    // run shell
+    let _ = unistd::execvp(cmd.as_c_str(), &args).map_err(|e| match e {
+        nix::Error::Sys(errno) => {
+            std::process::exit(errno as i32);
+        }
+        _ => std::process::exit(-2),
+    });
+
+    Ok(())
+}
+
+async fn run_in_parent<T: AsyncRead + AsyncWrite>(
+    logger: Logger,
+    stream: T,
+    pseudo: OpenptyResult,
+    child_pid: Pid,
+) -> Result<()> {
+    info!(logger, "get debug shell pid {:?}", child_pid);
+
+    let master_fd = pseudo.master;
+    let _ = close(pseudo.slave);
+
+    let (mut socket_reader, mut socket_writer) = tokio::io::split(stream);
+    let (mut master_reader, mut master_writer) = tokio::io::split(PipeStream::from_fd(master_fd));
+
+    select! {
+        res = tokio::io::copy(&mut master_reader, &mut socket_writer) => {
+            debug!(
+                logger,
+                "master closed: {:?}", res
+            );
+        }
+        res = tokio::io::copy(&mut socket_reader, &mut master_writer) => {
+            info!(
+                logger,
+                "socket closed: {:?}", res
+            );
+        }
+    }
+
+    let wait_status = wait::waitpid(child_pid, None);
+    info!(logger, "debug console process exit code: {:?}", wait_status);
+
+    Ok(())
+}
+
+async fn run_debug_console_vsock<T: AsyncRead + AsyncWrite>(
+    logger: Logger,
+    shell: String,
+    stream: T,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "debug-console-shell"));
+
+    let pseudo = openpty(None, None)?;
+    let _ = fcntl::fcntl(pseudo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
+    let _ = fcntl::fcntl(pseudo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
+
+    let slave_fd = pseudo.slave;
+
+    match unsafe { fork() } {
+        Ok(ForkResult::Child) => run_in_child(slave_fd, shell),
+        Ok(ForkResult::Parent { child: child_pid }) => {
+            run_in_parent(logger.clone(), stream, pseudo, child_pid).await
+        }
+        Err(err) => Err(anyhow!("fork error: {:?}", err)),
+    }
+}
+
+async fn run_debug_console_serial(shell: String, fd: RawFd) -> Result<()> {
+    let mut child = match tokio::process::Command::new(shell)
+        .arg("-i")
+        .kill_on_drop(true)
+        .stdin(unsafe { Stdio::from_raw_fd(fd) })
+        .stdout(unsafe { Stdio::from_raw_fd(fd) })
+        .stderr(unsafe { Stdio::from_raw_fd(fd) })
+        .spawn()
+    {
+        Ok(c) => c,
+        Err(_) => return Err(anyhow!("failed to spawn shell")),
+    };
+
+    child.wait().await?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+    use tokio::sync::watch;
+
+    #[tokio::test]
+    async fn test_setup_debug_console_no_shells() {
+        {
+            // Guarantee no shells have been added
+            // (required to avoid racing with
+            // test_setup_debug_console_invalid_shell()).
+            let shells_ref = SHELLS.clone();
+            let mut shells = shells_ref.lock().unwrap();
+            shells.clear();
+        }
+
+        let logger = slog_scope::logger();
+
+        let (_, rx) = watch::channel(true);
+        let result = debug_console_handler(logger, 0, rx).await;
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "no shell found to launch debug console"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_setup_debug_console_invalid_shell() {
+        {
+            let shells_ref = SHELLS.clone();
+            let mut shells = shells_ref.lock().unwrap();
+
+            let dir = tempdir().expect("failed to create tmpdir");
+
+            // Add an invalid shell
+            let shell = dir
+                .path()
+                .join("enoent")
+                .to_str()
+                .expect("failed to construct shell path")
+                .to_string();
+
+            shells.push(shell);
+        }
+
+        let logger = slog_scope::logger();
+
+        let (_, rx) = watch::channel(true);
+        let result = debug_console_handler(logger, 0, rx).await;
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "no shell found to launch debug console"
+        );
+    }
+}
--- a/src/agent/src/device.rs
+++ b/src/agent/src/device.rs
@@ -5,6 +5,7 @@

 use libc::{c_uint, major, minor};
 use nix::sys::stat;
+use regex::Regex;
 use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::MetadataExt;
@@ -17,10 +18,11 @@ use crate::linux_abi::*;
 use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
 use crate::pci;
 use crate::sandbox::Sandbox;
-use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER};
+use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
 use anyhow::{anyhow, Result};
 use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
 use protocols::agent::Device;
+use tracing::instrument;

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -31,17 +33,21 @@ macro_rules! sl {

 const VM_ROOTFS: &str = "/";

+#[derive(Debug)]
 struct DevIndexEntry {
    idx: usize,
    residx: Vec<usize>,
 }

+#[derive(Debug)]
 struct DevIndex(HashMap<String, DevIndexEntry>);

+#[instrument]
 pub fn rescan_pci_bus() -> Result<()> {
    online_device(SYSFS_PCI_BUS_RESCAN_FILE)
 }

+#[instrument]
 pub fn online_device(path: &str) -> Result<()> {
    fs::write(path, "1")?;
    Ok(())
@@ -50,6 +56,7 @@ pub fn online_device(path: &str) -> Result<()> {
 // pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
 // the sysfs path for the PCI host bridge, based on the PCI path
 // provided.
+#[instrument]
 fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
    let mut bus = "0000:00".to_string();
    let mut relpath = String::new();
@@ -87,78 +94,123 @@ fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String>
    Ok(relpath)
 }

-async fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
-    // Keep the same lock order as uevent::handle_block_add_event(), otherwise it may cause deadlock.
-    let mut w = GLOBAL_DEVICE_WATCHER.lock().await;
-    let sb = sandbox.lock().await;
-    for (key, value) in sb.pci_device_map.iter() {
-        if key.contains(dev_addr) {
-            info!(sl!(), "Device {} found in pci device map", dev_addr);
-            return Ok(format!("{}/{}", SYSTEM_DEV_PATH, value));
-        }
-    }
-    drop(sb);
-
-    // If device is not found in the device map, hotplug event has not
-    // been received yet, create and add channel to the watchers map.
-    // The key of the watchers map is the device we are interested in.
-    // Note this is done inside the lock, not to miss any events from the
-    // global udev listener.
-    let (tx, rx) = tokio::sync::oneshot::channel::<String>();
-    w.insert(dev_addr.to_string(), Some(tx));
-    drop(w);
-
-    info!(sl!(), "Waiting on channel for device notification\n");
-    let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
-
-    let dev_name = match tokio::time::timeout(hotplug_timeout, rx).await {
-        Ok(v) => v?,
-        Err(_) => {
-            let watcher = GLOBAL_DEVICE_WATCHER.clone();
-            let mut w = watcher.lock().await;
-            w.remove_entry(dev_addr);
-
-            return Err(anyhow!(
-                "Timeout reached after {:?} waiting for device {}",
-                hotplug_timeout,
-                dev_addr
-            ));
-        }
-    };
-
-    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
+// FIXME: This matcher is only correct if the guest has at most one
+// SCSI host.
+#[derive(Debug)]
+struct ScsiBlockMatcher {
+    search: String,
 }

+impl ScsiBlockMatcher {
+    fn new(scsi_addr: &str) -> ScsiBlockMatcher {
+        let search = format!(r"/0:0:{}/block/", scsi_addr);
+
+        ScsiBlockMatcher { search }
+    }
+}
+
+impl UeventMatcher for ScsiBlockMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block" && uev.devpath.contains(&self.search) && !uev.devname.is_empty()
+    }
+}
+
+#[instrument]
 pub async fn get_scsi_device_name(
    sandbox: &Arc<Mutex<Sandbox>>,
    scsi_addr: &str,
 ) -> Result<String> {
-    let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX);
+    let matcher = ScsiBlockMatcher::new(scsi_addr);

    scan_scsi_bus(scsi_addr)?;
-    get_device_name(sandbox, &dev_sub_path).await
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
 }

-pub async fn get_pci_device_name(
+#[derive(Debug)]
+struct VirtioBlkPciMatcher {
+    rex: Regex,
+}
+
+impl VirtioBlkPciMatcher {
+    fn new(relpath: &str) -> VirtioBlkPciMatcher {
+        let root_bus = create_pci_root_bus_path();
+        let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath);
+        VirtioBlkPciMatcher {
+            rex: Regex::new(&re).unwrap(),
+        }
+    }
+}
+
+impl UeventMatcher for VirtioBlkPciMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty()
+    }
+}
+
+#[instrument]
+pub async fn get_virtio_blk_pci_device_name(
    sandbox: &Arc<Mutex<Sandbox>>,
    pcipath: &pci::Path,
 ) -> Result<String> {
    let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
    let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
+    let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);

    rescan_pci_bus()?;
-    get_device_name(sandbox, &sysfs_rel_path).await
+
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
 }

-pub async fn get_pmem_device_name(
-    sandbox: &Arc<Mutex<Sandbox>>,
-    pmem_devname: &str,
-) -> Result<String> {
-    let dev_sub_path = format!("/{}/{}", SCSI_BLOCK_SUFFIX, pmem_devname);
-    get_device_name(sandbox, &dev_sub_path).await
+#[derive(Debug)]
+struct PmemBlockMatcher {
+    suffix: String,
+}
+
+impl PmemBlockMatcher {
+    fn new(devname: &str) -> PmemBlockMatcher {
+        let suffix = format!(r"/block/{}", devname);
+
+        PmemBlockMatcher { suffix }
+    }
+}
+
+impl UeventMatcher for PmemBlockMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block"
+            && uev.devpath.starts_with(ACPI_DEV_PATH)
+            && uev.devpath.ends_with(&self.suffix)
+            && !uev.devname.is_empty()
+    }
+}
+
+#[instrument]
+pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str) -> Result<()> {
+    let devname = match devpath.strip_prefix("/dev/") {
+        Some(dev) => dev,
+        None => {
+            return Err(anyhow!(
+                "Storage source '{}' must start with /dev/",
+                devpath
+            ))
+        }
+    };
+
+    let matcher = PmemBlockMatcher::new(devname);
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    if uev.devname != devname {
+        return Err(anyhow!(
+            "Unexpected device name {} for pmem device (expected {})",
+            uev.devname,
+            devname
+        ));
+    }
+    Ok(())
 }

 /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
+#[instrument]
 fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
    let tokens: Vec<&str> = scsi_addr.split(':').collect();
    if tokens.len() != 2 {
@@ -193,6 +245,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
 // the same device in the list of devices provided through the OCI spec.
 // This is needed to update information about minor/major numbers that cannot
 // be predicted from the caller.
+#[instrument]
 fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> {
    let major_id: c_uint;
    let minor_id: c_uint;
@@ -269,6 +322,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

 // device.Id should be the predicted device name (vda, vdb, ...)
 // device.VmPath already provides a way to send it in
+#[instrument]
 async fn virtiommio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -283,6 +337,7 @@ async fn virtiommio_blk_device_handler(
 }

 // device.Id should be a PCI path string
+#[instrument]
 async fn virtio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -290,19 +345,15 @@ async fn virtio_blk_device_handler(
    devidx: &DevIndex,
 ) -> Result<()> {
    let mut dev = device.clone();
+    let pcipath = pci::Path::from_str(&device.id)?;

-    // When "Id (PCI path)" is not set, we allow to use the predicted
-    // "VmPath" passed from kata-runtime Note this is a special code
-    // path for cloud-hypervisor when BDF information is not available
-    if !device.id.is_empty() {
-        let pcipath = pci::Path::from_str(&device.id)?;
-        dev.vm_path = get_pci_device_name(sandbox, &pcipath).await?;
-    }
+    dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;

    update_spec_device_list(&dev, spec, devidx)
 }

 // device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
+#[instrument]
 async fn virtio_scsi_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -314,6 +365,7 @@ async fn virtio_scsi_device_handler(
    update_spec_device_list(&dev, spec, devidx)
 }

+#[instrument]
 async fn virtio_nvdimm_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -352,6 +404,7 @@ impl DevIndex {
    }
 }

+#[instrument]
 pub async fn add_devices(
    devices: &[Device],
    spec: &mut Spec,
@@ -366,6 +419,7 @@ pub async fn add_devices(
    Ok(())
 }

+#[instrument]
 async fn add_device(
    device: &Device,
    spec: &mut Spec,
@@ -400,6 +454,7 @@ async fn add_device(
 // update_device_cgroup update the device cgroup for container
 // to not allow access to the guest root partition. This prevents
 // the container from being able to access the VM rootfs.
+#[instrument]
 pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
    let meta = fs::metadata(VM_ROOTFS)?;
    let rdev = meta.dev();
@@ -430,6 +485,7 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::uevent::spawn_test_watcher;
    use oci::Linux;
    use tempfile::tempdir;

@@ -776,4 +832,107 @@ mod tests {
        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0");
    }
+
+    // We use device specific variants of this for real cases, but
+    // they have some complications that make them troublesome to unit
+    // test
+    async fn example_get_device_name(
+        sandbox: &Arc<Mutex<Sandbox>>,
+        relpath: &str,
+    ) -> Result<String> {
+        let matcher = VirtioBlkPciMatcher::new(relpath);
+
+        let uev = wait_for_uevent(sandbox, matcher).await?;
+
+        Ok(uev.devname)
+    }
+
+    #[tokio::test]
+    async fn test_get_device_name() {
+        let devname = "vda";
+        let root_bus = create_pci_root_bus_path();
+        let relpath = "/0000:00:0a.0/0000:03:0b.0";
+        let devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath, devname);
+
+        let mut uev = crate::uevent::Uevent::default();
+        uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev.subsystem = "block".to_string();
+        uev.devpath = devpath.clone();
+        uev.devname = devname.to_string();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.insert(devpath.clone(), uev);
+        drop(sb); // unlock
+
+        let name = example_get_device_name(&sandbox, relpath).await;
+        assert!(name.is_ok(), "{}", name.unwrap_err());
+        assert_eq!(name.unwrap(), devname);
+
+        let mut sb = sandbox.lock().await;
+        let uev = sb.uevent_map.remove(&devpath).unwrap();
+        drop(sb); // unlock
+
+        spawn_test_watcher(sandbox.clone(), uev);
+
+        let name = example_get_device_name(&sandbox, relpath).await;
+        assert!(name.is_ok(), "{}", name.unwrap_err());
+        assert_eq!(name.unwrap(), devname);
+    }
+
+    #[tokio::test]
+    async fn test_virtio_blk_matcher() {
+        let root_bus = create_pci_root_bus_path();
+        let devname = "vda";
+
+        let mut uev_a = crate::uevent::Uevent::default();
+        let relpath_a = "/0000:00:0a.0";
+        uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev_a.subsystem = "block".to_string();
+        uev_a.devname = devname.to_string();
+        uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname);
+        let matcher_a = VirtioBlkPciMatcher::new(&relpath_a);
+
+        let mut uev_b = uev_a.clone();
+        let relpath_b = "/0000:00:0a.0/0000:00:0b.0";
+        uev_b.devpath = format!("{}{}/virtio0/block/{}", root_bus, relpath_b, devname);
+        let matcher_b = VirtioBlkPciMatcher::new(&relpath_b);
+
+        assert!(matcher_a.is_match(&uev_a));
+        assert!(matcher_b.is_match(&uev_b));
+        assert!(!matcher_b.is_match(&uev_a));
+        assert!(!matcher_a.is_match(&uev_b));
+    }
+
+    #[tokio::test]
+    async fn test_scsi_block_matcher() {
+        let root_bus = create_pci_root_bus_path();
+        let devname = "sda";
+
+        let mut uev_a = crate::uevent::Uevent::default();
+        let addr_a = "0:0";
+        uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev_a.subsystem = "block".to_string();
+        uev_a.devname = devname.to_string();
+        uev_a.devpath = format!(
+            "{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda",
+            root_bus, addr_a
+        );
+        let matcher_a = ScsiBlockMatcher::new(&addr_a);
+
+        let mut uev_b = uev_a.clone();
+        let addr_b = "2:0";
+        uev_b.devpath = format!(
+            "{}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{}/block/sdb",
+            root_bus, addr_b
+        );
+        let matcher_b = ScsiBlockMatcher::new(&addr_b);
+
+        assert!(matcher_a.is_match(&uev_a));
+        assert!(matcher_b.is_match(&uev_b));
+        assert!(!matcher_b.is_match(&uev_a));
+        assert!(!matcher_a.is_match(&uev_b));
+    }
 }
--- a/src/agent/src/linux_abi.rs
+++ b/src/agent/src/linux_abi.rs
@@ -78,11 +78,6 @@ pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block
 pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe";
 pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory";

-// Here in "0:0", the first number is the SCSI host number because
-// only one SCSI controller has been plugged, while the second number
-// is always 0.
-pub const SCSI_HOST_CHANNEL: &str = "0:0:";
-pub const SCSI_BLOCK_SUFFIX: &str = "block";
 pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host";

 pub const SYSFS_CGROUPPATH: &str = "/sys/fs/cgroup";
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -5,8 +5,8 @@

 #[macro_use]
 extern crate lazy_static;
+extern crate capctl;
 extern crate oci;
-extern crate prctl;
 extern crate prometheus;
 extern crate protocols;
 extern crate regex;
@@ -20,27 +20,22 @@ extern crate scopeguard;
 extern crate slog;

 use anyhow::{anyhow, Context, Result};
-use nix::fcntl::{self, OFlag};
-use nix::fcntl::{FcntlArg, FdFlag};
-use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
-use nix::pty;
-use nix::sys::select::{select, FdSet};
+use nix::fcntl::OFlag;
 use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
-use nix::sys::wait;
-use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
-use std::collections::HashMap;
+use nix::unistd::{self, dup, Pid};
 use std::env;
-use std::ffi::{CStr, CString, OsStr};
+use std::ffi::OsStr;
 use std::fs::{self, File};
-use std::io::{Read, Write};
 use std::os::unix::ffi::OsStrExt;
 use std::os::unix::fs as unixfs;
 use std::os::unix::io::AsRawFd;
 use std::path::Path;
+use std::process::exit;
 use std::sync::Arc;
-use unistd::Pid;
+use tracing::{instrument, span};

 mod config;
+mod console;
 mod device;
 mod linux_abi;
 mod metrics;
@@ -61,40 +56,32 @@ mod version;
 use mount::{cgroups_mount, general_mount};
 use sandbox::Sandbox;
 use signal::setup_signal_handler;
-use slog::Logger;
+use slog::{error, info, o, warn, Logger};
 use uevent::watch_uevents;

-use std::sync::Mutex as SyncMutex;
-
 use futures::future::join_all;
-use futures::StreamExt as _;
 use rustjail::pipestream::PipeStream;
 use tokio::{
    io::AsyncWrite,
    sync::{
-        oneshot::Sender,
        watch::{channel, Receiver},
        Mutex, RwLock,
    },
    task::JoinHandle,
 };
-use tokio_vsock::{Incoming, VsockListener, VsockStream};

 mod rpc;
+mod tracer;

 const NAME: &str = "kata-agent";
 const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
-const CONSOLE_PATH: &str = "/dev/console";
-
-const DEFAULT_BUF_SIZE: usize = 8 * 1024;

 lazy_static! {
-    static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Option<Sender<String>>>>> =
-        Arc::new(Mutex::new(HashMap::new()));
    static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
        Arc::new(RwLock::new(config::AgentConfig::new()));
 }

+#[instrument]
 fn announce(logger: &Logger, config: &AgentConfig) {
    info!(logger, "announce";
    "agent-commit" => version::VERSION_COMMIT,
@@ -108,27 +95,6 @@ fn announce(logger: &Logger, config: &AgentConfig) {
    );
 }

-fn set_fd_close_exec(fd: RawFd) -> Result<RawFd> {
-    if let Err(e) = fcntl::fcntl(fd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)) {
-        return Err(anyhow!("failed to set fd: {} as close-on-exec: {}", fd, e));
-    }
-    Ok(fd)
-}
-
-fn get_vsock_incoming(fd: RawFd) -> Incoming {
-    let incoming;
-    unsafe {
-        incoming = VsockListener::from_raw_fd(fd).incoming();
-    }
-    incoming
-}
-
-async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
-    let stream = get_vsock_incoming(fd).next().await.unwrap().unwrap();
-    set_fd_close_exec(stream.as_raw_fd())?;
-    Ok(stream)
-}
-
 // Create a thread to handle reading from the logger pipe. The thread will
 // output to the vsock port specified, or stdout.
 async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool>) -> Result<()> {
@@ -147,7 +113,7 @@ async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool
        socket::bind(listenfd, &addr).unwrap();
        socket::listen(listenfd, 1).unwrap();

-        writer = Box::new(get_vsock_stream(listenfd).await.unwrap());
+        writer = Box::new(util::get_vsock_stream(listenfd).await.unwrap());
    } else {
        writer = Box::new(tokio::io::stdout());
    }
@@ -163,7 +129,7 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    // List of tasks that need to be stopped for a clean shutdown
    let mut tasks: Vec<JoinHandle<Result<()>>> = vec![];

-    lazy_static::initialize(&SHELLS);
+    console::initialize();

    lazy_static::initialize(&AGENT_CONFIG);

@@ -236,6 +202,17 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
        ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
    }

+    if config.tracing != tracer::TraceType::Disabled {
+        let _ = tracer::setup_tracing(NAME, &logger, &config)?;
+    }
+
+    let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
+
+    // XXX: Start the root trace transaction.
+    //
+    // XXX: Note that *ALL* spans needs to start after this point!!
+    let _enter = root.enter();
+
    // Start the sandbox and wait for its ttRPC server to end
    start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;

@@ -264,6 +241,10 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
        }
    }

+    if config.tracing != tracer::TraceType::Disabled {
+        tracer::end_tracing();
+    }
+
    eprintln!("{} shutdown complete", NAME);

    Ok(())
@@ -285,6 +266,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    }

    if args.len() == 2 && args[1] == "init" {
+        reset_sigpipe();
        rustjail::container::init_child();
        exit(0);
    }
@@ -296,6 +278,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    rt.block_on(real_main())
 }

+#[instrument]
 async fn start_sandbox(
    logger: &Logger,
    config: &AgentConfig,
@@ -303,26 +286,17 @@ async fn start_sandbox(
    tasks: &mut Vec<JoinHandle<Result<()>>>,
    shutdown: Receiver<bool>,
 ) -> Result<()> {
-    let shells = SHELLS.clone();
    let debug_console_vport = config.debug_console_vport as u32;

-    let shell_handle = if config.debug_console {
-        let thread_logger = logger.clone();
-        let shells = shells.lock().unwrap().to_vec();
+    if config.debug_console {
+        let debug_console_task = tokio::task::spawn(console::debug_console_handler(
+            logger.clone(),
+            debug_console_vport,
+            shutdown.clone(),
+        ));

-        let handle = tokio::task::spawn_blocking(move || {
-            let result = setup_debug_console(&thread_logger, shells, debug_console_vport);
-            if result.is_err() {
-                // Report error, but don't fail
-                warn!(thread_logger, "failed to setup debug console";
-                    "error" => format!("{}", result.unwrap_err()));
-            }
-        });
-
-        Some(handle)
-    } else {
-        None
-    };
+        tasks.push(debug_console_task);
+    }

    // Initialize unique sandbox structure.
    let s = Sandbox::new(&logger).context("Failed to create sandbox")?;
@@ -351,13 +325,9 @@ async fn start_sandbox(
    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
    server.start().await?;

-    let _ = rx.await?;
+    rx.await?;
    server.shutdown().await?;

-    if let Some(handle) = shell_handle {
-        handle.await.map_err(|e| anyhow!("{:?}", e))?;
-    }
-
    Ok(())
 }

@@ -395,6 +365,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
    Ok(())
 }

+#[instrument]
 fn sethostname(hostname: &OsStr) -> Result<()> {
    let size = hostname.len() as usize;

@@ -408,284 +379,16 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
    }
 }

-lazy_static! {
-    static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
-        let mut v = Vec::new();
-
-        if !cfg!(test) {
-            v.push("/bin/bash".to_string());
-            v.push("/bin/sh".to_string());
-        }
-
-        Arc::new(SyncMutex::new(v))
-    };
+// The Rust standard library had suppressed the default SIGPIPE behavior,
+// see https://github.com/rust-lang/rust/pull/13158.
+// Since the parent's signal handler would be inherited by it's child process,
+// thus we should re-enable the standard SIGPIPE behavior as a workaround to
+// fix the issue of https://github.com/kata-containers/kata-containers/issues/1887.
+fn reset_sigpipe() {
+    unsafe {
+        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
+    }
 }

 use crate::config::AgentConfig;
-use nix::sys::stat::Mode;
 use std::os::unix::io::{FromRawFd, RawFd};
-use std::path::PathBuf;
-use std::process::exit;
-
-fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
-    let shell = shells
-        .iter()
-        .find(|sh| PathBuf::from(sh).exists())
-        .ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
-
-    if port > 0 {
-        let listenfd = socket::socket(
-            AddressFamily::Vsock,
-            SockType::Stream,
-            SockFlag::SOCK_CLOEXEC,
-            None,
-        )?;
-        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
-        socket::bind(listenfd, &addr)?;
-        socket::listen(listenfd, 1)?;
-        loop {
-            let f: RawFd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
-            match run_debug_console_shell(logger, shell, f) {
-                Ok(_) => {
-                    info!(logger, "run_debug_console_shell session finished");
-                }
-                Err(err) => {
-                    error!(logger, "run_debug_console_shell failed: {:?}", err);
-                }
-            }
-        }
-    } else {
-        let mut flags = OFlag::empty();
-        flags.insert(OFlag::O_RDWR);
-        flags.insert(OFlag::O_CLOEXEC);
-        loop {
-            let f: RawFd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
-            match run_debug_console_shell(logger, shell, f) {
-                Ok(_) => {
-                    info!(logger, "run_debug_console_shell session finished");
-                }
-                Err(err) => {
-                    error!(logger, "run_debug_console_shell failed: {:?}", err);
-                }
-            }
-        }
-    };
-}
-
-fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> std::io::Result<u64>
-where
-    R: Read,
-    W: Write,
-{
-    let mut buf = [0; DEFAULT_BUF_SIZE];
-    let buf_len;
-
-    match reader.read(&mut buf) {
-        Ok(0) => return Ok(0),
-        Ok(len) => buf_len = len,
-        Err(err) => return Err(err),
-    };
-
-    // write and return
-    match writer.write_all(&buf[..buf_len]) {
-        Ok(_) => Ok(buf_len as u64),
-        Err(err) => Err(err),
-    }
-}
-
-fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Result<()> {
-    let pseduo = pty::openpty(None, None)?;
-    let _ = fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
-    let _ = fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
-
-    let slave_fd = pseduo.slave;
-
-    match fork() {
-        Ok(ForkResult::Child) => {
-            // create new session with child as session leader
-            setsid()?;
-
-            // dup stdin, stdout, stderr to let child act as a terminal
-            dup2(slave_fd, STDIN_FILENO)?;
-            dup2(slave_fd, STDOUT_FILENO)?;
-            dup2(slave_fd, STDERR_FILENO)?;
-
-            // set tty
-            unsafe {
-                libc::ioctl(0, libc::TIOCSCTTY);
-            }
-
-            let cmd = CString::new(shell).unwrap();
-            let args: Vec<&CStr> = vec![];
-
-            // run shell
-            let _ = unistd::execvp(cmd.as_c_str(), args.as_slice()).map_err(|e| match e {
-                nix::Error::Sys(errno) => {
-                    std::process::exit(errno as i32);
-                }
-                _ => std::process::exit(-2),
-            });
-        }
-
-        Ok(ForkResult::Parent { child: child_pid }) => {
-            info!(logger, "get debug shell pid {:?}", child_pid);
-
-            let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-            let master_fd = pseduo.master;
-            let debug_shell_logger = logger.clone();
-
-            // channel that used to sync between thread and main process
-            let (tx, rx) = std::sync::mpsc::channel::<i32>();
-
-            // start a thread to do IO copy between socket and pseduo.master
-            std::thread::spawn(move || {
-                let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
-                let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
-                let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };
-                let mut socket_writer = unsafe { File::from_raw_fd(socket_fd) };
-
-                loop {
-                    let mut fd_set = FdSet::new();
-                    fd_set.insert(rfd);
-                    fd_set.insert(master_fd);
-                    fd_set.insert(socket_fd);
-
-                    match select(
-                        Some(fd_set.highest().unwrap() + 1),
-                        &mut fd_set,
-                        None,
-                        None,
-                        None,
-                    ) {
-                        Ok(_) => (),
-                        Err(e) => {
-                            if e == nix::Error::from(nix::errno::Errno::EINTR) {
-                                continue;
-                            } else {
-                                error!(debug_shell_logger, "select error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-
-                    if fd_set.contains(rfd) {
-                        info!(
-                            debug_shell_logger,
-                            "debug shell process {} exited", child_pid
-                        );
-                        tx.send(1).unwrap();
-                        break;
-                    }
-
-                    if fd_set.contains(master_fd) {
-                        match io_copy(&mut master_reader, &mut socket_writer) {
-                            Ok(0) => {
-                                debug!(debug_shell_logger, "master fd closed");
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                            Ok(_) => {}
-                            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
-                            Err(e) => {
-                                error!(debug_shell_logger, "read master fd error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-
-                    if fd_set.contains(socket_fd) {
-                        match io_copy(&mut socket_reader, &mut master_writer) {
-                            Ok(0) => {
-                                debug!(debug_shell_logger, "socket fd closed");
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                            Ok(_) => {}
-                            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
-                            Err(e) => {
-                                error!(debug_shell_logger, "read socket fd error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-                }
-            });
-
-            let wait_status = wait::waitpid(child_pid, None);
-            info!(logger, "debug console process exit code: {:?}", wait_status);
-
-            info!(logger, "notify debug monitor thread to exit");
-            // close pipe to exit select loop
-            let _ = close(wfd);
-
-            // wait for thread exit.
-            let _ = rx.recv().unwrap();
-            info!(logger, "debug monitor thread has exited");
-
-            // close files
-            let _ = close(rfd);
-            let _ = close(master_fd);
-            let _ = close(slave_fd);
-        }
-        Err(err) => {
-            return Err(anyhow!("fork error: {:?}", err));
-        }
-    }
-
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tempfile::tempdir;
-
-    #[test]
-    fn test_setup_debug_console_no_shells() {
-        // Guarantee no shells have been added
-        // (required to avoid racing with
-        // test_setup_debug_console_invalid_shell()).
-        let shells_ref = SHELLS.clone();
-        let mut shells = shells_ref.lock().unwrap();
-        shells.clear();
-        let logger = slog_scope::logger();
-
-        let result = setup_debug_console(&logger, shells.to_vec(), 0);
-
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "no shell found to launch debug console"
-        );
-    }
-
-    #[test]
-    fn test_setup_debug_console_invalid_shell() {
-        let shells_ref = SHELLS.clone();
-        let mut shells = shells_ref.lock().unwrap();
-
-        let dir = tempdir().expect("failed to create tmpdir");
-
-        // Add an invalid shell
-        let shell = dir
-            .path()
-            .join("enoent")
-            .to_str()
-            .expect("failed to construct shell path")
-            .to_string();
-
-        shells.push(shell);
-        let logger = slog_scope::logger();
-
-        let result = setup_debug_console(&logger, shells.to_vec(), 0);
-
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "no shell found to launch debug console"
-        );
-    }
-}
--- a/src/agent/src/metrics.rs
+++ b/src/agent/src/metrics.rs
@@ -8,6 +8,7 @@ extern crate procfs;
 use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder};

 use anyhow::Result;
+use tracing::instrument;

 const NAMESPACE_KATA_AGENT: &str = "kata_agent";
 const NAMESPACE_KATA_GUEST: &str = "kata_guest";
@@ -68,6 +69,7 @@ lazy_static! {
    prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo").as_ref() , "Statistics about memory usage in the system.", &["item"]).unwrap();
 }

+#[instrument]
 pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
    AGENT_SCRAPE_COUNT.inc();

@@ -87,6 +89,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
    Ok(String::from_utf8(buffer).unwrap())
 }

+#[instrument]
 fn update_agent_metrics() {
    let me = procfs::process::Process::myself();

@@ -136,6 +139,7 @@ fn update_agent_metrics() {
    }
 }

+#[instrument]
 fn update_guest_metrics() {
    // try get load and task info
    match procfs::LoadAverage::new() {
@@ -218,6 +222,7 @@ fn update_guest_metrics() {
    }
 }

+#[instrument]
 fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
    gv.with_label_values(&["mem_total"])
        .set(meminfo.mem_total as f64);
@@ -332,6 +337,7 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
        .set(meminfo.k_reclaimable.unwrap_or(0) as f64);
 }

+#[instrument]
 fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
    gv.with_label_values(&[cpu, "user"])
        .set(cpu_time.user as f64);
@@ -355,6 +361,7 @@ fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procf
        .set(cpu_time.guest_nice.unwrap_or(0.0) as f64);
 }

+#[instrument]
 fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat) {
    gv.with_label_values(&[diskstat.name.as_str(), "reads"])
        .set(diskstat.reads as f64);
@@ -393,6 +400,7 @@ fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat
 }

 // set_gauge_vec_netdev set gauge for NetDevLine
+#[instrument]
 fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceStatus) {
    gv.with_label_values(&[status.name.as_str(), "recv_bytes"])
        .set(status.recv_bytes as f64);
@@ -429,6 +437,7 @@ fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceS
 }

 // set_gauge_vec_proc_status set gauge for ProcStatus
+#[instrument]
 fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process::Status) {
    gv.with_label_values(&["vmpeak"])
        .set(status.vmpeak.unwrap_or(0) as f64);
@@ -469,6 +478,7 @@ fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process
 }

 // set_gauge_vec_proc_io set gauge for ProcIO
+#[instrument]
 fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::Io) {
    gv.with_label_values(&["rchar"]).set(io_stat.rchar as f64);
    gv.with_label_values(&["wchar"]).set(io_stat.wchar as f64);
@@ -483,6 +493,7 @@ fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::I
 }

 // set_gauge_vec_proc_stat set gauge for ProcStat
+#[instrument]
 fn set_gauge_vec_proc_stat(gv: &prometheus::GaugeVec, stat: &procfs::process::Stat) {
    gv.with_label_values(&["utime"]).set(stat.utime as f64);
    gv.with_label_values(&["stime"]).set(stat.stime as f64);
--- a/src/agent/src/mount.rs
+++ b/src/agent/src/mount.rs
@@ -7,7 +7,7 @@ use std::collections::HashMap;
 use std::ffi::CString;
 use std::fs;
 use std::io;
-use std::os::unix::fs::PermissionsExt;
+use std::os::unix::fs::{MetadataExt, PermissionsExt};

 use std::path::Path;
 use std::ptr::null;
@@ -17,13 +17,14 @@ use tokio::sync::Mutex;

 use libc::{c_void, mount};
 use nix::mount::{self, MsFlags};
+use nix::unistd::Gid;

 use regex::Regex;
 use std::fs::File;
 use std::io::{BufRead, BufReader};

 use crate::device::{
-    get_pci_device_name, get_pmem_device_name, get_scsi_device_name, online_device,
+    get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
 };
 use crate::linux_abi::*;
 use crate::pci;
@@ -31,6 +32,7 @@ use crate::protocols::agent::Storage;
 use crate::Sandbox;
 use anyhow::{anyhow, Context, Result};
 use slog::Logger;
+use tracing::instrument;

 pub const DRIVER_9P_TYPE: &str = "9p";
 pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
@@ -45,6 +47,9 @@ pub const TYPE_ROOTFS: &str = "rootfs";

 pub const MOUNT_GUEST_TAG: &str = "kataShared";

+// Allocating an FSGroup that owns the pod's volumes
+const FS_GID: &str = "fsgid";
+
 #[rustfmt::skip]
 lazy_static! {
    pub static ref FLAGS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -152,6 +157,7 @@ pub struct BareMount<'a> {
 // * evaluate all symlinks
 // * ensure the source exists
 impl<'a> BareMount<'a> {
+    #[instrument]
    pub fn new(
        s: &'a str,
        d: &'a str,
@@ -170,6 +176,7 @@ impl<'a> BareMount<'a> {
        }
    }

+    #[instrument]
    pub fn mount(&self) -> Result<()> {
        let source;
        let dest;
@@ -228,6 +235,7 @@ impl<'a> BareMount<'a> {
    }
 }

+#[instrument]
 async fn ephemeral_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -241,11 +249,40 @@ async fn ephemeral_storage_handler(
    }

    fs::create_dir_all(Path::new(&storage.mount_point))?;
-    common_storage_handler(logger, storage)?;
+
+    // By now we only support one option field: "fsGroup" which
+    // isn't an valid mount option, thus we should remove it when
+    // do mount.
+    if storage.options.len() > 0 {
+        // ephemeral_storage didn't support mount options except fsGroup.
+        let mut new_storage = storage.clone();
+        new_storage.options = protobuf::RepeatedField::default();
+        common_storage_handler(logger, &new_storage)?;
+
+        let opts_vec: Vec<String> = storage.options.to_vec();
+
+        let opts = parse_options(opts_vec);
+
+        if let Some(fsgid) = opts.get(FS_GID) {
+            let gid = fsgid.parse::<u32>()?;
+
+            nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
+
+            let meta = fs::metadata(&storage.mount_point)?;
+            let mut permission = meta.permissions();
+
+            let o_mode = meta.mode() | 0o2000;
+            permission.set_mode(o_mode);
+            fs::set_permissions(&storage.mount_point, permission)?;
+        }
+    } else {
+        common_storage_handler(logger, &storage)?;
+    }

    Ok("".to_string())
 }

+#[instrument]
 async fn local_storage_handler(
    _logger: &Logger,
    storage: &Storage,
@@ -266,11 +303,24 @@ async fn local_storage_handler(
    let opts_vec: Vec<String> = storage.options.to_vec();

    let opts = parse_options(opts_vec);
-    let mode = opts.get("mode");
-    if let Some(mode) = mode {
+
+    let mut need_set_fsgid = false;
+    if let Some(fsgid) = opts.get(FS_GID) {
+        let gid = fsgid.parse::<u32>()?;
+
+        nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
+        need_set_fsgid = true;
+    }
+
+    if let Some(mode) = opts.get("mode") {
        let mut permission = fs::metadata(&storage.mount_point)?.permissions();

-        let o_mode = u32::from_str_radix(mode, 8)?;
+        let mut o_mode = u32::from_str_radix(mode, 8)?;
+
+        if need_set_fsgid {
+            // set SetGid mode mask.
+            o_mode |= 0o2000;
+        }
        permission.set_mode(o_mode);

        fs::set_permissions(&storage.mount_point, permission)?;
@@ -279,6 +329,7 @@ async fn local_storage_handler(
    Ok("".to_string())
 }

+#[instrument]
 async fn virtio9p_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -288,6 +339,7 @@ async fn virtio9p_storage_handler(
 }

 // virtiommio_blk_storage_handler handles the storage for mmio blk driver.
+#[instrument]
 async fn virtiommio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -298,6 +350,7 @@ async fn virtiommio_blk_storage_handler(
 }

 // virtiofs_storage_handler handles the storage for virtio-fs.
+#[instrument]
 async fn virtiofs_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -307,6 +360,7 @@ async fn virtiofs_storage_handler(
 }

 // virtio_blk_storage_handler handles the storage for blk driver.
+#[instrument]
 async fn virtio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -325,14 +379,15 @@ async fn virtio_blk_storage_handler(
        }
    } else {
        let pcipath = pci::Path::from_str(&storage.source)?;
-        let dev_path = get_pci_device_name(&sandbox, &pcipath).await?;
+        let dev_path = get_virtio_blk_pci_device_name(&sandbox, &pcipath).await?;
        storage.source = dev_path;
    }

    common_storage_handler(logger, &storage)
 }

-// virtio_scsi_storage_handler handles the storage for scsi driver.
+// virtio_scsi_storage_handler handles the  storage for scsi driver.
+#[instrument]
 async fn virtio_scsi_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -347,6 +402,7 @@ async fn virtio_scsi_storage_handler(
    common_storage_handler(logger, &storage)
 }

+#[instrument]
 fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String> {
    // Mount the storage device.
    let mount_point = storage.mount_point.to_string();
@@ -355,32 +411,22 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
 }

 // nvdimm_storage_handler handles the storage for NVDIMM driver.
+#[instrument]
 async fn nvdimm_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut storage = storage.clone();
-    // If hot-plugged, get the device node path based on the PCI address else
-    // use the virt path provided in Storage Source
-    let pmem_devname = match storage.source.strip_prefix("/dev/") {
-        Some(dev) => dev,
-        None => {
-            return Err(anyhow!(
-                "Storage source '{}' must start with /dev/",
-                storage.source
-            ))
-        }
-    };
+    let storage = storage.clone();

    // Retrieve the device path from NVDIMM address.
-    let dev_path = get_pmem_device_name(&sandbox, pmem_devname).await?;
-    storage.source = dev_path;
+    wait_for_pmem_device(&sandbox, &storage.source).await?;

    common_storage_handler(logger, &storage)
 }

 // mount_storage performs the mount described by the storage structure.
+#[instrument]
 fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -388,7 +434,10 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    // If so, skip doing the mount. This facilitates mounting the sharedfs automatically
    // in the guest before the agent service starts.
    if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? {
-        warn!(logger, "kataShared already mounted, ignoring...");
+        warn!(
+            logger,
+            "{} already mounted on {}, ignoring...", MOUNT_GUEST_TAG, &storage.mount_point
+        );
        return Ok(());
    }

@@ -428,6 +477,7 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
 }

 /// Looks for `mount_point` entry in the /proc/mounts.
+#[instrument]
 fn is_mounted(mount_point: &str) -> Result<bool> {
    let mount_point = mount_point.trim_end_matches('/');
    let found = fs::metadata(mount_point).is_ok()
@@ -445,6 +495,7 @@ fn is_mounted(mount_point: &str) -> Result<bool> {
    Ok(found)
 }

+#[instrument]
 fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
    let mut flags = MsFlags::empty();
    let mut options: String = "".to_string();
@@ -473,6 +524,7 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
 // associated operations such as waiting for the device to show up, and mount
 // it to a specific location, according to the type of handler chosen, and for
 // each storage.
+#[instrument]
 pub async fn add_storages(
    logger: Logger,
    storages: Vec<Storage>,
@@ -522,6 +574,7 @@ pub async fn add_storages(
    Ok(mount_list)
 }

+#[instrument]
 fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    let options_vec: Vec<&str> = m.options.clone();

@@ -547,6 +600,7 @@ fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    Ok(())
 }

+#[instrument]
 pub fn general_mount(logger: &Logger) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -564,6 +618,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {

 // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
 // any error ecountered.
+#[instrument]
 pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
    if mount_point.is_empty() {
        return Err(anyhow!("Invalid mount point {}", mount_point));
@@ -594,6 +649,7 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
    ))
 }

+#[instrument]
 pub fn get_cgroup_mounts(
    logger: &Logger,
    cg_path: &str,
@@ -684,6 +740,7 @@ pub fn get_cgroup_mounts(
    Ok(cg_mounts)
 }

+#[instrument]
 pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -699,6 +756,7 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
    Ok(())
 }

+#[instrument]
 pub fn remove_mounts(mounts: &[String]) -> Result<()> {
    for m in mounts.iter() {
        mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
@@ -708,6 +766,7 @@ pub fn remove_mounts(mounts: &[String]) -> Result<()> {

 // ensure_destination_exists will recursively create a given mountpoint. If directories
 // are created, their permissions are initialized to mountPerm(0755)
+#[instrument]
 fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
    let d = Path::new(destination);
    if !d.exists() {
@@ -728,6 +787,7 @@ fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
    Ok(())
 }

+#[instrument]
 fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
    let mut options = HashMap::new();
    for opt in option_list.iter() {
@@ -900,7 +960,7 @@ mod tests {
            let msg = format!("{}: result: {:?}", msg, result);

            if d.error_contains.is_empty() {
-                assert!(result.is_ok(), msg);
+                assert!(result.is_ok(), "{}", msg);

                // Cleanup
                unsafe {
@@ -912,7 +972,7 @@ mod tests {

                    let msg = format!("{}: umount result: {:?}", msg, result);

-                    assert!(ret == 0, msg);
+                    assert!(ret == 0, "{}", msg);
                };

                continue;
@@ -920,7 +980,7 @@ mod tests {

            let err = result.unwrap_err();
            let error_msg = format!("{}", err);
-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1026,13 +1086,13 @@ mod tests {
            let msg = format!("{}: result: {:?}", msg, result);

            if d.error_contains.is_empty() {
-                assert!(result.is_ok(), msg);
+                assert!(result.is_ok(), "{}", msg);
                continue;
            }

            let error_msg = format!("{:#}", result.unwrap_err());

-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1108,6 +1168,7 @@ mod tests {

            assert!(
                format!("{}", err).contains("No such file or directory"),
+                "{}",
                msg
            );
        }
@@ -1136,13 +1197,13 @@ mod tests {
            if d.error_contains.is_empty() {
                let fs_type = result.unwrap();

-                assert!(d.fs_type == fs_type, msg);
+                assert!(d.fs_type == fs_type, "{}", msg);

                continue;
            }

            let error_msg = format!("{}", result.unwrap_err());
-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1291,34 +1352,34 @@ mod tests {
            let msg = format!("{}: result: {:?}", msg, result);

            if !d.error_contains.is_empty() {
-                assert!(result.is_err(), msg);
+                assert!(result.is_err(), "{}", msg);

                let error_msg = format!("{}", result.unwrap_err());
-                assert!(error_msg.contains(d.error_contains), msg);
+                assert!(error_msg.contains(d.error_contains), "{}", msg);
                continue;
            }

-            assert!(result.is_ok(), msg);
+            assert!(result.is_ok(), "{}", msg);

            let mounts = result.unwrap();
            let count = mounts.len();

            if !d.devices_cgroup {
-                assert!(count == 0, msg);
+                assert!(count == 0, "{}", msg);
                continue;
            }

            // get_cgroup_mounts() adds the device cgroup plus two other mounts.
-            assert!(count == (1 + 2), msg);
+            assert!(count == (1 + 2), "{}", msg);

            // First mount
-            assert!(mounts[0].eq(&first_mount), msg);
+            assert!(mounts[0].eq(&first_mount), "{}", msg);

            // Last mount
-            assert!(mounts[2].eq(&last_mount), msg);
+            assert!(mounts[2].eq(&last_mount), "{}", msg);

            // Devices cgroup
-            assert!(mounts[1].eq(&cg_devices_mount), msg);
+            assert!(mounts[1].eq(&cg_devices_mount), "{}", msg);
        }
    }
 }
--- a/src/agent/src/namespace.rs
+++ b/src/agent/src/namespace.rs
@@ -11,6 +11,7 @@ use std::fmt;
 use std::fs;
 use std::fs::File;
 use std::path::{Path, PathBuf};
+use tracing::instrument;

 use crate::mount::{BareMount, FLAGS};
 use slog::Logger;
@@ -20,6 +21,7 @@ pub const NSTYPEIPC: &str = "ipc";
 pub const NSTYPEUTS: &str = "uts";
 pub const NSTYPEPID: &str = "pid";

+#[instrument]
 pub fn get_current_thread_ns_path(ns_type: &str) -> String {
    format!(
        "/proc/{}/task/{}/ns/{}",
@@ -40,31 +42,35 @@ pub struct Namespace {
 }

 impl Namespace {
+    #[instrument]
    pub fn new(logger: &Logger) -> Self {
        Namespace {
            logger: logger.clone(),
            path: String::from(""),
            persistent_ns_dir: String::from(PERSISTENT_NS_DIR),
-            ns_type: NamespaceType::IPC,
+            ns_type: NamespaceType::Ipc,
            hostname: None,
        }
    }

+    #[instrument]
    pub fn get_ipc(mut self) -> Self {
-        self.ns_type = NamespaceType::IPC;
+        self.ns_type = NamespaceType::Ipc;
        self
    }

+    #[instrument]
    pub fn get_uts(mut self, hostname: &str) -> Self {
-        self.ns_type = NamespaceType::UTS;
+        self.ns_type = NamespaceType::Uts;
        if !hostname.is_empty() {
            self.hostname = Some(String::from(hostname));
        }
        self
    }

+    #[instrument]
    pub fn get_pid(mut self) -> Self {
-        self.ns_type = NamespaceType::PID;
+        self.ns_type = NamespaceType::Pid;
        self
    }

@@ -76,12 +82,13 @@ impl Namespace {

    // setup creates persistent namespace without switching to it.
    // Note, pid namespaces cannot be persisted.
+    #[instrument]
    pub async fn setup(mut self) -> Result<Self> {
        fs::create_dir_all(&self.persistent_ns_dir)?;

        let ns_path = PathBuf::from(&self.persistent_ns_dir);
        let ns_type = self.ns_type;
-        if ns_type == NamespaceType::PID {
+        if ns_type == NamespaceType::Pid {
            return Err(anyhow!("Cannot persist namespace of PID type"));
        }
        let logger = self.logger.clone();
@@ -104,7 +111,7 @@ impl Namespace {

                unshare(cf)?;

-                if ns_type == NamespaceType::UTS && hostname.is_some() {
+                if ns_type == NamespaceType::Uts && hostname.is_some() {
                    nix::unistd::sethostname(hostname.unwrap())?;
                }
                // Bind mount the new namespace from the current thread onto the mount point to persist it.
@@ -147,27 +154,27 @@ impl Namespace {
 /// Represents the Namespace type.
 #[derive(Clone, Copy, PartialEq)]
 enum NamespaceType {
-    IPC,
-    UTS,
-    PID,
+    Ipc,
+    Uts,
+    Pid,
 }

 impl NamespaceType {
    /// Get the string representation of the namespace type.
    pub fn get(&self) -> &str {
        match *self {
-            Self::IPC => "ipc",
-            Self::UTS => "uts",
-            Self::PID => "pid",
+            Self::Ipc => "ipc",
+            Self::Uts => "uts",
+            Self::Pid => "pid",
        }
    }

    /// Get the associate flags with the namespace type.
    pub fn get_flags(&self) -> CloneFlags {
        match *self {
-            Self::IPC => CloneFlags::CLONE_NEWIPC,
-            Self::UTS => CloneFlags::CLONE_NEWUTS,
-            Self::PID => CloneFlags::CLONE_NEWPID,
+            Self::Ipc => CloneFlags::CLONE_NEWIPC,
+            Self::Uts => CloneFlags::CLONE_NEWUTS,
+            Self::Pid => CloneFlags::CLONE_NEWPID,
        }
    }
 }
@@ -178,12 +185,6 @@ impl fmt::Debug for NamespaceType {
    }
 }

-impl Default for NamespaceType {
-    fn default() -> Self {
-        NamespaceType::IPC
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::{Namespace, NamespaceType};
@@ -234,15 +235,15 @@ mod tests {

    #[test]
    fn test_namespace_type() {
-        let ipc = NamespaceType::IPC;
+        let ipc = NamespaceType::Ipc;
        assert_eq!("ipc", ipc.get());
        assert_eq!(CloneFlags::CLONE_NEWIPC, ipc.get_flags());

-        let uts = NamespaceType::UTS;
+        let uts = NamespaceType::Uts;
        assert_eq!("uts", uts.get());
        assert_eq!(CloneFlags::CLONE_NEWUTS, uts.get_flags());

-        let pid = NamespaceType::PID;
+        let pid = NamespaceType::Pid;
        assert_eq!("pid", pid.get());
        assert_eq!(CloneFlags::CLONE_NEWPID, pid.get_flags());
    }
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
@@ -542,12 +542,10 @@ impl Handle {
                ntype: NDA_UNSPEC as u8,
            },
            nlas: {
-                let mut nlas = vec![];
-
-                nlas.push(Nla::Destination(match ip {
+                let mut nlas = vec![Nla::Destination(match ip {
                    IpAddr::V4(v4) => v4.octets().to_vec(),
                    IpAddr::V6(v6) => v6.octets().to_vec(),
-                }));
+                })];

                if !neigh.lladdr.is_empty() {
                    nlas.push(Nla::LinkLocalAddress(
--- a/src/agent/src/random.rs
+++ b/src/agent/src/random.rs
@@ -9,6 +9,7 @@ use nix::fcntl::{self, OFlag};
 use nix::sys::stat::Mode;
 use std::fs;
 use std::os::unix::io::{AsRawFd, FromRawFd};
+use tracing::instrument;

 pub const RNGDEV: &str = "/dev/random";
 pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
@@ -20,6 +21,7 @@ type IoctlRequestType = libc::c_int;
 #[cfg(target_env = "gnu")]
 type IoctlRequestType = libc::c_ulong;

+#[instrument]
 pub fn reseed_rng(data: &[u8]) -> Result<()> {
    let len = data.len() as libc::c_long;
    fs::write(RNGDEV, data)?;
@@ -37,10 +39,10 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
            &len as *const libc::c_long,
        )
    };
-    let _ = Errno::result(ret).map(drop)?;
+    Errno::result(ret).map(drop)?;

    let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
-    let _ = Errno::result(ret).map(drop)?;
+    Errno::result(ret).map(drop)?;

    Ok(())
 }
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
@@ -20,9 +20,8 @@ use anyhow::{anyhow, Context, Result};
 use oci::{LinuxNamespace, Root, Spec};
 use protobuf::{RepeatedField, SingularPtrField};
 use protocols::agent::{
-    AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, ListProcessesResponse,
-    Metrics, OOMEvent, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse,
-    WriteStreamResponse,
+    AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, Metrics, OOMEvent,
+    ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse, WriteStreamResponse,
 };
 use protocols::empty::Empty;
 use protocols::health::{
@@ -104,7 +103,7 @@ impl AgentService {
    ) -> Result<()> {
        let cid = req.container_id.clone();

-        let _ = verify_cid(&cid)?;
+        verify_cid(&cid)?;

        let mut oci_spec = req.OCI.clone();
        let use_sandbox_pidns = req.get_sandbox_pidns();
@@ -370,7 +369,6 @@ impl AgentService {
        let s = self.sandbox.clone();
        let mut resp = WaitProcessResponse::new();
        let pid: pid_t;
-        let stream;

        let (exit_send, mut exit_recv) = tokio::sync::mpsc::channel(100);

@@ -381,22 +379,20 @@ impl AgentService {
            "exec-id" => eid.clone()
        );

-        {
+        let exit_rx = {
            let mut sandbox = s.lock().await;
            let p = find_process(&mut sandbox, cid.as_str(), eid.as_str(), false)?;

-            stream = p.get_reader(StreamType::ExitPipeR);
-
            p.exit_watchers.push(exit_send);
            pid = p.pid;
-        }

-        if stream.is_some() {
-            info!(sl!(), "reading exit pipe");
+            p.exit_rx.clone()
+        };

-            let reader = stream.unwrap();
-            let mut content: Vec<u8> = vec![0, 1];
-            let _ = reader.lock().await.read(&mut content).await;
+        if let Some(mut exit_rx) = exit_rx {
+            info!(sl!(), "cid {} eid {} waiting for exit signal", &cid, &eid);
+            while exit_rx.changed().await.is_ok() {}
+            info!(sl!(), "cid {} eid {} received exit signal", &cid, &eid);
        }

        let mut sandbox = s.lock().await;
@@ -576,91 +572,6 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
            .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
    }

-    async fn list_processes(
-        &self,
-        _ctx: &TtrpcContext,
-        req: protocols::agent::ListProcessesRequest,
-    ) -> ttrpc::Result<ListProcessesResponse> {
-        let cid = req.container_id.clone();
-        let format = req.format.clone();
-        let mut args = req.args.into_vec();
-        let mut resp = ListProcessesResponse::new();
-
-        let s = Arc::clone(&self.sandbox);
-        let mut sandbox = s.lock().await;
-
-        let ctr = sandbox.get_container(&cid).ok_or_else(|| {
-            ttrpc_error(
-                ttrpc::Code::INVALID_ARGUMENT,
-                "invalid container id".to_string(),
-            )
-        })?;
-
-        let pids = ctr.processes().unwrap();
-
-        match format.as_str() {
-            "table" => {}
-            "json" => {
-                resp.process_list = serde_json::to_vec(&pids).unwrap();
-                return Ok(resp);
-            }
-            _ => {
-                return Err(ttrpc_error(
-                    ttrpc::Code::INVALID_ARGUMENT,
-                    "invalid format!".to_string(),
-                ));
-            }
-        }
-
-        // format "table"
-        if args.is_empty() {
-            // default argument
-            args = vec!["-ef".to_string()];
-        }
-
-        let output = tokio::process::Command::new("ps")
-            .args(args.as_slice())
-            .stdout(Stdio::piped())
-            .output()
-            .await
-            .expect("ps failed");
-
-        let out: String = String::from_utf8(output.stdout).unwrap();
-        let mut lines: Vec<String> = out.split('\n').map(|v| v.to_string()).collect();
-
-        let pid_index = lines[0]
-            .split_whitespace()
-            .position(|v| v == "PID")
-            .unwrap();
-
-        let mut result = String::new();
-        result.push_str(lines[0].as_str());
-
-        lines.remove(0);
-        for line in &lines {
-            if line.trim().is_empty() {
-                continue;
-            }
-
-            let fields: Vec<String> = line.split_whitespace().map(|v| v.to_string()).collect();
-
-            if fields.len() < pid_index + 1 {
-                warn!(sl!(), "corrupted output?");
-                continue;
-            }
-            let pid = fields[pid_index].trim().parse::<i32>().unwrap();
-
-            for p in &pids {
-                if pid == *p {
-                    result.push_str(line.as_str());
-                }
-            }
-        }
-
-        resp.process_list = Vec::from(result);
-        Ok(resp)
-    }
-
    async fn update_container(
        &self,
        _ctx: &TtrpcContext,
@@ -1015,7 +926,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
            }

            for m in req.kernel_modules.iter() {
-                let _ = load_kernel_module(m)
+                load_kernel_module(m)
                    .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
            }

@@ -1618,27 +1529,22 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
 fn cleanup_process(p: &mut Process) -> Result<()> {
    if p.parent_stdin.is_some() {
        p.close_stream(StreamType::ParentStdin);
-        let _ = unistd::close(p.parent_stdin.unwrap())?;
+        unistd::close(p.parent_stdin.unwrap())?;
    }

    if p.parent_stdout.is_some() {
        p.close_stream(StreamType::ParentStdout);
-        let _ = unistd::close(p.parent_stdout.unwrap())?;
+        unistd::close(p.parent_stdout.unwrap())?;
    }

    if p.parent_stderr.is_some() {
        p.close_stream(StreamType::ParentStderr);
-        let _ = unistd::close(p.parent_stderr.unwrap())?;
+        unistd::close(p.parent_stderr.unwrap())?;
    }

    if p.term_master.is_some() {
        p.close_stream(StreamType::TermMaster);
-        let _ = unistd::close(p.term_master.unwrap())?;
-    }
-
-    if p.exit_pipe_r.is_some() {
-        p.close_stream(StreamType::ExitPipeR);
-        let _ = unistd::close(p.exit_pipe_r.unwrap())?;
+        unistd::close(p.term_master.unwrap())?;
    }

    p.notify_term_close();
@@ -2018,9 +1924,9 @@ mod tests {
            let msg = format!("{}, result: {:?}", msg, result);

            if result.is_ok() {
-                assert!(!d.expect_error, msg);
+                assert!(!d.expect_error, "{}", msg);
            } else {
-                assert!(d.expect_error, msg);
+                assert!(d.expect_error, "{}", msg);
            }
        }
    }
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -8,6 +8,7 @@ use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
 use crate::namespace::Namespace;
 use crate::netlink::Handle;
 use crate::network::Network;
+use crate::uevent::{Uevent, UeventMatcher};
 use anyhow::{anyhow, Context, Result};
 use libc::pid_t;
 use oci::{Hook, Hooks};
@@ -25,7 +26,11 @@ use std::path::Path;
 use std::sync::Arc;
 use std::{thread, time};
 use tokio::sync::mpsc::{channel, Receiver, Sender};
+use tokio::sync::oneshot;
 use tokio::sync::Mutex;
+use tracing::instrument;
+
+type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);

 #[derive(Debug)]
 pub struct Sandbox {
@@ -36,7 +41,8 @@ pub struct Sandbox {
    pub network: Network,
    pub mounts: Vec<String>,
    pub container_mounts: HashMap<String, Vec<String>>,
-    pub pci_device_map: HashMap<String, String>,
+    pub uevent_map: HashMap<String, Uevent>,
+    pub uevent_watchers: Vec<Option<UeventWatcher>>,
    pub shared_utsns: Namespace,
    pub shared_ipcns: Namespace,
    pub sandbox_pidns: Option<Namespace>,
@@ -51,6 +57,7 @@ pub struct Sandbox {
 }

 impl Sandbox {
+    #[instrument]
    pub fn new(logger: &Logger) -> Result<Self> {
        let fs_type = get_mount_fs_type("/")?;
        let logger = logger.new(o!("subsystem" => "sandbox"));
@@ -65,7 +72,8 @@ impl Sandbox {
            containers: HashMap::new(),
            mounts: Vec::new(),
            container_mounts: HashMap::new(),
-            pci_device_map: HashMap::new(),
+            uevent_map: HashMap::new(),
+            uevent_watchers: Vec::new(),
            shared_utsns: Namespace::new(&logger),
            shared_ipcns: Namespace::new(&logger),
            sandbox_pidns: None,
@@ -88,6 +96,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn set_sandbox_storage(&mut self, path: &str) -> bool {
        match self.storages.get_mut(path) {
            None => {
@@ -110,6 +119,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn unset_sandbox_storage(&mut self, path: &str) -> Result<bool> {
        match self.storages.get_mut(path) {
            None => Err(anyhow!("Sandbox storage with path {} not found", path)),
@@ -129,6 +139,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> {
        let mounts = vec![path.to_string()];
        remove_mounts(&mounts)?;
@@ -142,6 +153,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
        if self.unset_sandbox_storage(path)? {
            return self.remove_sandbox_storage(path);
@@ -150,6 +162,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
        // Set up shared IPC namespace
        self.shared_ipcns = Namespace::new(&self.logger)
@@ -168,10 +181,12 @@ impl Sandbox {
        Ok(true)
    }

+    #[instrument]
    pub fn add_container(&mut self, c: LinuxContainer) {
        self.containers.insert(c.id.clone(), c);
    }

+    #[instrument]
    pub fn update_shared_pidns(&mut self, c: &LinuxContainer) -> Result<()> {
        // Populate the shared pid path only if this is an infra container and
        // sandbox_pidns has not been passed in the create_sandbox request.
@@ -195,10 +210,12 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn get_container(&mut self, id: &str) -> Option<&mut LinuxContainer> {
        self.containers.get_mut(id)
    }

+    #[instrument]
    pub fn find_process(&mut self, pid: pid_t) -> Option<&mut Process> {
        for (_, c) in self.containers.iter_mut() {
            if c.processes.get(&pid).is_some() {
@@ -209,6 +226,7 @@ impl Sandbox {
        None
    }

+    #[instrument]
    pub async fn destroy(&mut self) -> Result<()> {
        for ctr in self.containers.values_mut() {
            ctr.destroy().await?;
@@ -216,6 +234,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> {
        if req.nb_cpus > 0 {
            // online cpus
@@ -259,6 +278,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn add_hooks(&mut self, dir: &str) -> Result<()> {
        let mut hooks = Hooks::default();
        if let Ok(hook) = self.find_hooks(dir, "prestart") {
@@ -274,6 +294,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    fn find_hooks(&self, hook_path: &str, hook_type: &str) -> Result<Vec<Hook>> {
        let mut hooks = Vec::new();
        for entry in fs::read_dir(Path::new(hook_path).join(hook_type))? {
@@ -310,6 +331,7 @@ impl Sandbox {
        Ok(hooks)
    }

+    #[instrument]
    pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
        let logger = self.logger.clone();

@@ -342,6 +364,7 @@ impl Sandbox {
    }
 }

+#[instrument]
 fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Result<i32> {
    let mut count = 0;
    let re = Regex::new(pattern)?;
@@ -387,6 +410,7 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res
 const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50;
 const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100;

+#[instrument]
 fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
    let mut onlined_count: i32 = 0;

@@ -416,6 +440,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
    ))
 }

+#[instrument]
 fn online_memory(logger: &Logger) -> Result<()> {
    online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?;
    Ok(())
--- a/src/agent/src/signal.rs
+++ b/src/agent/src/signal.rs
@@ -6,10 +6,10 @@

 use crate::sandbox::Sandbox;
 use anyhow::{anyhow, Result};
+use capctl::prctl::set_subreaper;
 use nix::sys::wait::WaitPidFlag;
 use nix::sys::wait::{self, WaitStatus};
 use nix::unistd;
-use prctl::set_child_subreaper;
 use slog::{error, info, o, Logger};
 use std::sync::Arc;
 use tokio::select;
@@ -22,6 +22,9 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
    info!(logger, "handling signal"; "signal" => "SIGCHLD");

    loop {
+        // Avoid reaping the undesirable child's signal, e.g., execute_hook's
+        // The lock should be released immediately.
+        rustjail::container::WAIT_PID_LOCKER.lock().await;
        let result = wait::waitpid(
            Some(Pid::from_raw(-1)),
            Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
@@ -55,13 +58,6 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
            }

            let mut p = process.unwrap();
-
-            if p.exit_pipe_w.is_none() {
-                info!(logger, "process exit pipe not set");
-                continue;
-            }
-
-            let pipe_write = p.exit_pipe_w.unwrap();
            let ret: i32;

            match wait_status {
@@ -75,7 +71,7 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
            }

            p.exit_code = ret;
-            let _ = unistd::close(pipe_write);
+            let _ = p.exit_tx.take();

            info!(logger, "notify term to close");
            // close the socket file to notify readStdio to close terminal specifically
@@ -92,7 +88,7 @@ pub async fn setup_signal_handler(
 ) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "signals"));

-    set_child_subreaper(true)
+    set_subreaper(true)
        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;

    let mut sigchild_stream = signal(SignalKind::child())?;
--- a/src/agent/src/tracer.rs
+++ b/src/agent/src/tracer.rs
@@ -0,0 +1,91 @@
+// Copyright (c) 2020-2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::config::AgentConfig;
+use anyhow::Result;
+use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
+use slog::{info, o, Logger};
+use std::error::Error;
+use std::fmt;
+use std::str::FromStr;
+use tracing_opentelemetry::OpenTelemetryLayer;
+use tracing_subscriber::layer::SubscriberExt;
+use tracing_subscriber::Registry;
+
+#[derive(Debug, PartialEq)]
+pub enum TraceType {
+    Disabled,
+    Isolated,
+}
+
+#[derive(Debug)]
+pub struct TraceTypeError {
+    details: String,
+}
+
+impl TraceTypeError {
+    fn new(msg: &str) -> TraceTypeError {
+        TraceTypeError {
+            details: msg.into(),
+        }
+    }
+}
+
+impl Error for TraceTypeError {}
+
+impl fmt::Display for TraceTypeError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.details)
+    }
+}
+
+impl FromStr for TraceType {
+    type Err = TraceTypeError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "isolated" => Ok(TraceType::Isolated),
+            "disabled" => Ok(TraceType::Disabled),
+            _ => Err(TraceTypeError::new("invalid trace type")),
+        }
+    }
+}
+
+pub fn setup_tracing(name: &'static str, logger: &Logger, _agent_cfg: &AgentConfig) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "vsock-tracer"));
+
+    let exporter = vsock_exporter::Exporter::builder()
+        .with_logger(&logger)
+        .init();
+
+    let config = Config::default();
+
+    let builder = opentelemetry::sdk::trace::TracerProvider::builder()
+        .with_simple_exporter(exporter)
+        .with_config(config);
+
+    let provider = builder.build();
+
+    // We don't need a versioned tracer.
+    let version = None;
+
+    let tracer = provider.get_tracer(name, version);
+
+    let _global_provider = global::set_tracer_provider(provider);
+
+    let layer = OpenTelemetryLayer::new(tracer);
+
+    let subscriber = Registry::default().with(layer);
+
+    tracing::subscriber::set_global_default(subscriber)?;
+
+    info!(logger, "tracing setup");
+
+    Ok(())
+}
+
+pub fn end_tracing() {
+    global::shutdown_tracer_provider();
+}
--- a/src/agent/src/uevent.rs
+++ b/src/agent/src/uevent.rs
@@ -6,26 +6,39 @@
 use crate::device::online_device;
 use crate::linux_abi::*;
 use crate::sandbox::Sandbox;
-use crate::GLOBAL_DEVICE_WATCHER;
+use crate::AGENT_CONFIG;
 use slog::Logger;

-use anyhow::Result;
+use anyhow::{anyhow, Result};
 use netlink_sys::{protocols, SocketAddr, TokioSocket};
 use nix::errno::Errno;
+use std::fmt::Debug;
 use std::os::unix::io::FromRawFd;
 use std::sync::Arc;
 use tokio::select;
 use tokio::sync::watch::Receiver;
 use tokio::sync::Mutex;
+use tracing::instrument;

-#[derive(Debug, Default)]
-struct Uevent {
-    action: String,
-    devpath: String,
-    devname: String,
-    subsystem: String,
+// Convenience macro to obtain the scope logger
+macro_rules! sl {
+    () => {
+        slog_scope::logger().new(o!("subsystem" => "uevent"))
+    };
+}
+
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct Uevent {
+    pub action: String,
+    pub devpath: String,
+    pub devname: String,
+    pub subsystem: String,
    seqnum: String,
-    interface: String,
+    pub interface: String,
+}
+
+pub trait UeventMatcher: Sync + Send + Debug + 'static {
+    fn is_match(&self, uev: &Uevent) -> bool;
 }

 impl Uevent {
@@ -52,89 +65,91 @@ impl Uevent {
        event
    }

-    // Check whether this is a block device hot-add event.
-    fn is_block_add_event(&self) -> bool {
-        let pci_root_bus_path = create_pci_root_bus_path();
-        self.action == U_EVENT_ACTION_ADD
-            && self.subsystem == "block"
-            && {
-                self.devpath.starts_with(pci_root_bus_path.as_str())
-                    || self.devpath.starts_with(ACPI_DEV_PATH) // NVDIMM/PMEM devices
-            }
-            && !self.devname.is_empty()
-    }
+    #[instrument]
+    async fn process_add(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
+        // Special case for memory hot-adds first
+        let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
+        if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
+            let _ = online_device(online_path.as_ref()).map_err(|e| {
+                error!(
+                    *logger,
+                    "failed to online device";
+                    "device" => &self.devpath,
+                    "error" => format!("{}", e),
+                )
+            });
+            return;
+        }

-    async fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
-        let pci_root_bus_path = create_pci_root_bus_path();
-
-        // Keep the same lock order as device::get_device_name(), otherwise it may cause deadlock.
-        let watcher = GLOBAL_DEVICE_WATCHER.clone();
-        let mut w = watcher.lock().await;
        let mut sb = sandbox.lock().await;

-        // Add the device node name to the pci device map.
-        sb.pci_device_map
-            .insert(self.devpath.clone(), self.devname.clone());
+        // Record the event by sysfs path
+        sb.uevent_map.insert(self.devpath.clone(), self.clone());

        // Notify watchers that are interested in the udev event.
-        // Close the channel after watcher has been notified.
-        let devpath = self.devpath.clone();
-        let empties: Vec<_> = w
-            .iter_mut()
-            .filter(|(dev_addr, _)| {
-                let pci_p = format!("{}/{}", pci_root_bus_path, *dev_addr);
-
-                // blk block device
-                devpath.starts_with(pci_p.as_str()) ||
-                // scsi block device
-                {
-                    (*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
-                        devpath.contains(*dev_addr)
-                } ||
-                // nvdimm/pmem device
-                {
-                    let pmem_suffix = format!("/{}/{}", SCSI_BLOCK_SUFFIX, self.devname);
-                    devpath.starts_with(ACPI_DEV_PATH) &&
-                        devpath.ends_with(pmem_suffix.as_str()) &&
-                        dev_addr.ends_with(pmem_suffix.as_str())
+        for watch in &mut sb.uevent_watchers {
+            if let Some((matcher, _)) = watch {
+                if matcher.is_match(&self) {
+                    let (_, sender) = watch.take().unwrap();
+                    let _ = sender.send(self.clone());
                }
-            })
-            .map(|(k, sender)| {
-                let devname = self.devname.clone();
-                let sender = sender.take().unwrap();
-                let _ = sender.send(devname);
-                k.clone()
-            })
-            .collect();
-
-        // Remove notified nodes from the watcher map.
-        for empty in empties {
-            w.remove(&empty);
+            }
        }
    }

+    #[instrument]
    async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
-        if self.is_block_add_event() {
-            return self.handle_block_add_event(sandbox).await;
-        } else if self.action == U_EVENT_ACTION_ADD {
-            let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
-            // It's a memory hot-add event.
-            if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
-                let _ = online_device(online_path.as_ref()).map_err(|e| {
-                    error!(
-                        *logger,
-                        "failed to online device";
-                        "device" => &self.devpath,
-                        "error" => format!("{}", e),
-                    )
-                });
-                return;
-            }
+        if self.action == U_EVENT_ACTION_ADD {
+            return self.process_add(logger, sandbox).await;
        }
        debug!(*logger, "ignoring event"; "uevent" => format!("{:?}", self));
    }
 }

+#[instrument]
+pub async fn wait_for_uevent(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    matcher: impl UeventMatcher,
+) -> Result<Uevent> {
+    let mut sb = sandbox.lock().await;
+    for uev in sb.uevent_map.values() {
+        if matcher.is_match(uev) {
+            info!(sl!(), "Device {:?} found in pci device map", uev);
+            return Ok(uev.clone());
+        }
+    }
+
+    // If device is not found in the device map, hotplug event has not
+    // been received yet, create and add channel to the watchers map.
+    // The key of the watchers map is the device we are interested in.
+    // Note this is done inside the lock, not to miss any events from the
+    // global udev listener.
+    let (tx, rx) = tokio::sync::oneshot::channel::<Uevent>();
+    let idx = sb.uevent_watchers.len();
+    sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
+    drop(sb); // unlock
+
+    info!(sl!(), "Waiting on channel for uevent notification\n");
+    let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
+
+    let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
+        Ok(v) => v?,
+        Err(_) => {
+            let mut sb = sandbox.lock().await;
+            let matcher = sb.uevent_watchers[idx].take().unwrap().0;
+
+            return Err(anyhow!(
+                "Timeout after {:?} waiting for uevent {:?}",
+                hotplug_timeout,
+                &matcher
+            ));
+        }
+    };
+
+    Ok(uev)
+}
+
+#[instrument]
 pub async fn watch_uevents(
    sandbox: Arc<Mutex<Sandbox>>,
    mut shutdown: Receiver<bool>,
@@ -199,3 +214,71 @@ pub async fn watch_uevents(

    Ok(())
 }
+
+// Used in the device module unit tests
+#[cfg(test)]
+pub(crate) fn spawn_test_watcher(sandbox: Arc<Mutex<Sandbox>>, uev: Uevent) {
+    tokio::spawn(async move {
+        loop {
+            let mut sb = sandbox.lock().await;
+            for w in &mut sb.uevent_watchers {
+                if let Some((matcher, _)) = w {
+                    if matcher.is_match(&uev) {
+                        let (_, sender) = w.take().unwrap();
+                        let _ = sender.send(uev);
+                        return;
+                    }
+                }
+            }
+            drop(sb); // unlock
+        }
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Debug, Clone, Copy)]
+    struct AlwaysMatch();
+
+    impl UeventMatcher for AlwaysMatch {
+        fn is_match(&self, _: &Uevent) -> bool {
+            true
+        }
+    }
+
+    #[tokio::test]
+    async fn test_wait_for_uevent() {
+        let uev = Uevent {
+            action: crate::linux_abi::U_EVENT_ACTION_ADD.to_string(),
+            subsystem: "test".to_string(),
+            devpath: "/test/sysfs/path".to_string(),
+            devname: "testdevname".to_string(),
+            ..Default::default()
+        };
+
+        let matcher = AlwaysMatch();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.insert(uev.devpath.clone(), uev.clone());
+        drop(sb); // unlock
+
+        let uev2 = wait_for_uevent(&sandbox, matcher).await;
+        assert!(uev2.is_ok());
+        assert_eq!(uev2.unwrap(), uev);
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.remove(&uev.devpath).unwrap();
+        drop(sb); // unlock
+
+        spawn_test_watcher(sandbox.clone(), uev.clone());
+
+        let uev2 = wait_for_uevent(&sandbox, matcher).await;
+        assert!(uev2.is_ok());
+        assert_eq!(uev2.unwrap(), uev);
+    }
+}
--- a/src/agent/src/util.rs
+++ b/src/agent/src/util.rs
@@ -3,10 +3,15 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+use anyhow::Result;
+use futures::StreamExt;
 use std::io;
 use std::io::ErrorKind;
+use std::os::unix::io::{FromRawFd, RawFd};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::sync::watch::Receiver;
+use tokio_vsock::{Incoming, VsockListener, VsockStream};
+use tracing::instrument;

 // Size of I/O read buffer
 const BUF_SIZE: usize = 8192;
@@ -52,6 +57,17 @@ where
    Ok(total_bytes)
 }

+#[instrument]
+pub fn get_vsock_incoming(fd: RawFd) -> Incoming {
+    unsafe { VsockListener::from_raw_fd(fd).incoming() }
+}
+
+#[instrument]
+pub async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
+    let stream = get_vsock_incoming(fd).next().await.unwrap()?;
+    Ok(stream)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/agent/vsock-exporter/Cargo.toml
+++ b/src/agent/vsock-exporter/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "vsock-exporter"
+version = "0.1.0"
+authors = ["James O. D. Hunt <james.o.hunt@intel.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+nix = "0.20.0"
+libc = "0.2.94"
+thiserror = "1.0.24"
+opentelemetry = { version = "0.14.0", features=["serialize"] }
+serde = { version = "1.0.126", features = ["derive"] }
+vsock = "0.2.3"
+bincode = "1.3.3"
+byteorder = "1.4.3"
+slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
+async-trait = "0.1.50"
--- a/src/agent/vsock-exporter/src/lib.rs
+++ b/src/agent/vsock-exporter/src/lib.rs
@@ -0,0 +1,196 @@
+// Copyright (c) 2020-2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// The VSOCK Exporter sends trace spans "out" to the forwarder running on the
+// host (which then forwards them on to a trace collector). The data is sent
+// via a VSOCK socket that the forwarder process is listening on. To allow the
+// forwarder to know how much data to each for each trace span the simplest
+// protocol is employed which uses a header packet and the payload (trace
+// span) data. The header packet is a simple count of the number of bytes in the
+// payload, which allows the forwarder to know how many bytes it must read to
+// consume the trace span. The payload is a serialised version of the trace span.
+
+use async_trait::async_trait;
+use byteorder::{ByteOrder, NetworkEndian};
+use opentelemetry::sdk::export::trace::{ExportResult, SpanData, SpanExporter};
+use opentelemetry::sdk::export::ExportError;
+use slog::{error, o, Logger};
+use std::io::{ErrorKind, Write};
+use std::net::Shutdown;
+use std::sync::Mutex;
+use vsock::{SockAddr, VsockStream};
+
+const ANY_CID: &str = "any";
+
+// Must match the value of the variable of the same name in the trace forwarder.
+const HEADER_SIZE_BYTES: u64 = std::mem::size_of::<u64>() as u64;
+
+// By default, the VSOCK exporter should talk "out" to the host where the
+// forwarder is running.
+const DEFAULT_CID: u32 = libc::VMADDR_CID_HOST;
+
+// The VSOCK port the forwarders listens on by default
+const DEFAULT_PORT: u32 = 10240;
+
+#[derive(Debug)]
+pub struct Exporter {
+    port: u32,
+    cid: u32,
+    conn: Mutex<VsockStream>,
+    logger: Logger,
+}
+
+impl Exporter {
+    /// Create a new exporter builder.
+    pub fn builder() -> Builder {
+        Builder::default()
+    }
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("connection error: {0}")]
+    ConnectionError(String),
+    #[error("serialisation error: {0}")]
+    SerialisationError(#[from] bincode::Error),
+    #[error("I/O error: {0}")]
+    IOError(#[from] std::io::Error),
+}
+
+impl ExportError for Error {
+    fn exporter_name(&self) -> &'static str {
+        "vsock-exporter"
+    }
+}
+
+fn make_io_error(desc: String) -> std::io::Error {
+    std::io::Error::new(ErrorKind::Other, desc)
+}
+
+// Send a trace span to the forwarder running on the host.
+fn write_span(writer: &mut dyn Write, span: &SpanData) -> Result<(), std::io::Error> {
+    let encoded_payload: Vec<u8> =
+        bincode::serialize(&span).map_err(|e| make_io_error(e.to_string()))?;
+
+    let payload_len: u64 = encoded_payload.len() as u64;
+
+    let mut payload_len_as_bytes: [u8; HEADER_SIZE_BYTES as usize] =
+        [0; HEADER_SIZE_BYTES as usize];
+
+    // Encode the header
+    NetworkEndian::write_u64(&mut payload_len_as_bytes, payload_len);
+
+    // Send the header
+    writer
+        .write_all(&payload_len_as_bytes)
+        .map_err(|e| make_io_error(format!("failed to write trace header: {:?}", e)))?;
+
+    writer
+        .write_all(&encoded_payload)
+        .map_err(|e| make_io_error(format!("failed to write trace payload: {:?}", e)))
+}
+
+fn handle_batch(writer: &mut dyn Write, batch: Vec<SpanData>) -> ExportResult {
+    for span_data in batch {
+        write_span(writer, &span_data).map_err(Error::IOError)?;
+    }
+
+    Ok(())
+}
+
+#[async_trait]
+impl SpanExporter for Exporter {
+    async fn export(&mut self, batch: Vec<SpanData>) -> ExportResult {
+        let conn = self.conn.lock();
+
+        match conn {
+            Ok(mut c) => handle_batch(&mut *c, batch),
+            Err(e) => {
+                error!(self.logger, "failed to obtain connection";
+                        "error" => format!("{}", e));
+
+                return Err(Error::ConnectionError(e.to_string()).into());
+            }
+        }
+    }
+
+    fn shutdown(&mut self) {
+        let conn = match self.conn.lock() {
+            Ok(conn) => conn,
+            Err(e) => {
+                error!(self.logger, "failed to obtain connection";
+                        "error" => format!("{}", e));
+                return;
+            }
+        };
+
+        conn.shutdown(Shutdown::Write)
+            .expect("failed to shutdown VSOCK connection");
+    }
+}
+
+#[derive(Debug)]
+pub struct Builder {
+    port: u32,
+    cid: u32,
+    logger: Logger,
+}
+
+impl Default for Builder {
+    fn default() -> Self {
+        let logger = Logger::root(slog::Discard, o!());
+
+        Builder {
+            cid: DEFAULT_CID,
+            port: DEFAULT_PORT,
+            logger,
+        }
+    }
+}
+
+impl Builder {
+    pub fn with_cid(self, cid: u32) -> Self {
+        Builder { cid, ..self }
+    }
+
+    pub fn with_port(self, port: u32) -> Self {
+        Builder { port, ..self }
+    }
+
+    pub fn with_logger(self, logger: &Logger) -> Self {
+        Builder {
+            logger: logger.new(o!()),
+            ..self
+        }
+    }
+
+    pub fn init(self) -> Exporter {
+        let Builder { port, cid, logger } = self;
+
+        let sock_addr = SockAddr::new_vsock(self.cid, self.port);
+
+        let cid_str: String;
+
+        if self.cid == libc::VMADDR_CID_ANY {
+            cid_str = ANY_CID.to_string();
+        } else {
+            cid_str = format!("{}", self.cid);
+        }
+
+        let msg = format!(
+            "failed to connect to VSOCK server (port: {}, cid: {}) - {}",
+            self.port, cid_str, "ensure trace forwarder is running on host"
+        );
+
+        let conn = VsockStream::connect(&sock_addr).expect(&msg);
+
+        Exporter {
+            port,
+            cid,
+            conn: Mutex::new(conn),
+            logger: logger.new(o!("cid" => cid_str, "port" => port)),
+        }
+    }
+}
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -166,6 +166,7 @@ DEFAULTEXPFEATURES := []

 #Default entropy source
 DEFENTROPYSOURCE := /dev/urandom
+DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"]

 DEFDISABLEBLOCK := false
 DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
@@ -454,6 +455,7 @@ USER_VARS += DEFFILEMEMBACKEND
 USER_VARS += DEFVALIDFILEMEMBACKENDS
 USER_VARS += DEFMSIZE9P
 USER_VARS += DEFENTROPYSOURCE
+USER_VARS += DEFVALIDENTROPYSOURCES
 USER_VARS += DEFSANDBOXCGROUPONLY
 USER_VARS += DEFBINDMOUNTS
 USER_VARS += FEATURE_SELINUX
--- a/src/runtime/VERSION
+++ b/src/runtime/VERSION
@@ -1 +0,0 @@
-2.0.0
--- a/src/runtime/VERSION
+++ b/src/runtime/VERSION
@@ -0,0 +1 @@
+../../VERSION
--- a/src/runtime/arch/s390x-options.mk
+++ b/src/runtime/arch/s390x-options.mk
@@ -11,3 +11,10 @@ MACHINEACCELERATORS :=
 CPUFEATURES :=

 QEMUCMD := qemu-system-s390x
+
+# See https://github.com/kata-containers/osbuilder/issues/217
+FEDORA_LIKE = $(shell grep -E "\<fedora\>" /etc/os-release 2> /dev/null)
+ifneq (,$(FEDORA_LIKE))
+	CC := gcc
+	export CC
+endif
--- a/src/runtime/cli/config/configuration-acrn.toml.in
+++ b/src/runtime/cli/config/configuration-acrn.toml.in
@@ -150,6 +150,10 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@"

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
--- a/src/runtime/cli/config/configuration-clh.toml.in
+++ b/src/runtime/cli/config/configuration-clh.toml.in
@@ -165,6 +165,10 @@ block_device_driver = "virtio-blk"

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
--- a/src/runtime/cli/config/configuration-fc.toml.in
+++ b/src/runtime/cli/config/configuration-fc.toml.in
@@ -161,23 +161,23 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"

 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
-# 
+#
 # Default false
 #enable_debug = true

 # Disable the customizations done in the runtime when it detects
 # that it is running on top a VMM. This will result in the runtime
 # behaving as it would when running on bare metal.
-# 
+#
 #disable_nesting_checks = true

-# This is the msize used for 9p shares. It is the number of bytes 
+# This is the msize used for 9p shares. It is the number of bytes
 # used for 9p packet payload.
 #msize_9p = @DEFMSIZE9P@

-# VFIO devices are hotplugged on a bridge by default. 
+# VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
-# a large PCI bar, as this is a current limitation with hotplugging on 
+# a large PCI bar, as this is a current limitation with hotplugging on
 # a bridge. This value is valid for "pc" machine type.
 # Default false
 #hotplug_vfio_on_root_bus = true
@@ -194,6 +194,11 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # all practical purposes.
 #entropy_source= "@DEFENTROPYSOURCE@"

+# List of valid annotations values for entropy_source
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
+valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
+
 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
 # the OCI spec passed to the runtime.
@@ -282,6 +287,10 @@ kernel_modules=[]

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
--- a/src/runtime/cli/config/configuration-qemu.toml.in
+++ b/src/runtime/cli/config/configuration-qemu.toml.in
@@ -16,6 +16,14 @@ kernel = "@KERNELPATH@"
 image = "@IMAGEPATH@"
 machine_type = "@MACHINETYPE@"

+# Enable confidential guest support.
+# Toggling that setting may trigger different hardware features, ranging
+# from memory encryption to both memory and CPU-state encryption and integrity.
+# The Kata Containers runtime dynamically detects the available feature set and
+# aims at enabling the largest possible one.
+# Default false
+# confidential_guest = true
+
 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
 # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -296,6 +304,11 @@ pflashes = []
 # all practical purposes.
 #entropy_source= "@DEFENTROPYSOURCE@"

+# List of valid annotations values for entropy_source
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
+valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
+
 # Path to OCI hook binaries in the *guest rootfs*.
 # This does not affect host-side hooks which must instead be added to
 # the OCI spec passed to the runtime.
@@ -432,6 +445,10 @@ kernel_modules=[]

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
@@ -527,3 +544,30 @@ experimental=@DEFAULTEXPFEATURES@
 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
 # enable_pprof = true
+
+# WARNING: All the options in the following section have not been implemented yet.
+# This section was added as a placeholder. DO NOT USE IT!
+[image]
+# Container image service.
+#
+# Offload the CRI image management service to the Kata agent.
+# (default: false)
+#service_offload = true
+
+# Container image decryption keys provisioning.
+# Applies only if service_offload is true.
+# Keys can be provisioned locally (e.g. through a special command or
+# a local file) or remotely (usually after the guest is remotely attested).
+# The provision setting is a complete URL that lets the Kata agent decide
+# which method to use in order to fetch the keys.
+#
+# Keys can be stored in a local file, in a measured and attested initrd:
+#provision=data:///local/key/file
+#
+# Keys could be fetched through a special command or binary from the
+# initrd (guest) image, e.g. a firmware call:
+#provision=file:///path/to/bin/fetcher/in/guest
+#
+# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
+# a HTTPS URL:
+#provision=https://my-key-broker.foo/tenant/<tenant-id>
--- a/src/runtime/cli/console.go
+++ b/src/runtime/cli/console.go
@@ -1,134 +0,0 @@
-// Copyright (c) 2014,2015,2016 Docker, Inc.
-// Copyright (c) 2017 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-package main
-
-import (
-	"fmt"
-	"io"
-	"os"
-	"syscall"
-	"unsafe"
-
-	"golang.org/x/sys/unix"
-)
-
-var ptmxPath = "/dev/ptmx"
-
-// Console represents a pseudo TTY.
-type Console struct {
-	io.ReadWriteCloser
-
-	master    *os.File
-	slavePath string
-}
-
-// isTerminal returns true if fd is a terminal, else false
-func isTerminal(fd uintptr) bool {
-	var termios syscall.Termios
-	_, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, syscall.TCGETS, uintptr(unsafe.Pointer(&termios)))
-	return err == 0
-}
-
-// ConsoleFromFile creates a console from a file
-func ConsoleFromFile(f *os.File) *Console {
-	return &Console{
-		master: f,
-	}
-}
-
-// NewConsole returns an initialized console that can be used within a container by copying bytes
-// from the master side to the slave that is attached as the tty for the container's init process.
-func newConsole() (*Console, error) {
-	master, err := os.OpenFile(ptmxPath, unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
-	if err != nil {
-		return nil, err
-	}
-	if err := saneTerminal(master); err != nil {
-		return nil, err
-	}
-	console, err := ptsname(master)
-	if err != nil {
-		return nil, err
-	}
-	if err := unlockpt(master); err != nil {
-		return nil, err
-	}
-	return &Console{
-		slavePath: console,
-		master:    master,
-	}, nil
-}
-
-// File returns master
-func (c *Console) File() *os.File {
-	return c.master
-}
-
-// Path to slave
-func (c *Console) Path() string {
-	return c.slavePath
-}
-
-// Read from master
-func (c *Console) Read(b []byte) (int, error) {
-	return c.master.Read(b)
-}
-
-// Write to master
-func (c *Console) Write(b []byte) (int, error) {
-	return c.master.Write(b)
-}
-
-// Close master
-func (c *Console) Close() error {
-	if m := c.master; m != nil {
-		return m.Close()
-	}
-	return nil
-}
-
-// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
-// unlockpt should be called before opening the slave side of a pty.
-func unlockpt(f *os.File) error {
-	var u int32
-	if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))); err != 0 {
-		return err
-	}
-	return nil
-}
-
-// ptsname retrieves the name of the first available pts for the given master.
-func ptsname(f *os.File) (string, error) {
-	var u uint32
-	if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&u))); err != 0 {
-		return "", err
-	}
-	return fmt.Sprintf("/dev/pts/%d", u), nil
-}
-
-// saneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
-// created by us acts normally. In particular, a not-very-well-known default of
-// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
-// problem for terminal emulators, because we relay data from the terminal we
-// also relay that funky line discipline.
-func saneTerminal(terminal *os.File) error {
-	// Go doesn't have a wrapper for any of the termios ioctls.
-	var termios unix.Termios
-
-	if _, _, err := unix.Syscall(unix.SYS_IOCTL, terminal.Fd(), unix.TCGETS, uintptr(unsafe.Pointer(&termios))); err != 0 {
-		return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
-	}
-
-	// Set -onlcr so we don't have to deal with \r.
-	termios.Oflag &^= unix.ONLCR
-
-	if _, _, err := unix.Syscall(unix.SYS_IOCTL, terminal.Fd(), unix.TCSETS, uintptr(unsafe.Pointer(&termios))); err != 0 {
-		return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
-	}
-
-	return nil
-}
--- a/src/runtime/cli/console_test.go
+++ b/src/runtime/cli/console_test.go
@@ -1,129 +0,0 @@
-// Copyright (c) 2017 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-package main
-
-import (
-	"io/ioutil"
-	"os"
-	"testing"
-
-	ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestConsoleFromFile(t *testing.T) {
-	assert := assert.New(t)
-
-	console := ConsoleFromFile(os.Stdout)
-
-	assert.NotNil(console.File(), "console file is nil")
-}
-
-func TestNewConsole(t *testing.T) {
-	if tc.NotValid(ktu.NeedRoot()) {
-		t.Skip(testDisabledAsNonRoot)
-	}
-	assert := assert.New(t)
-
-	console, err := newConsole()
-	assert.NoError(err, "failed to create a new console: %s", err)
-	defer console.Close()
-
-	assert.NotEmpty(console.Path(), "console path is empty")
-
-	assert.NotNil(console.File(), "console file is nil")
-}
-
-func TestIsTerminal(t *testing.T) {
-	if tc.NotValid(ktu.NeedRoot()) {
-		t.Skip(testDisabledAsNonRoot)
-	}
-	assert := assert.New(t)
-
-	var fd uintptr = 4
-	assert.False(isTerminal(fd), "Fd %d is not a terminal", fd)
-
-	console, err := newConsole()
-	assert.NoError(err, "failed to create a new console: %s", err)
-	defer console.Close()
-
-	fd = console.File().Fd()
-	assert.True(isTerminal(fd), "Fd %d is a terminal", fd)
-}
-
-func TestReadWrite(t *testing.T) {
-	assert := assert.New(t)
-
-	// write operation
-	f, err := ioutil.TempFile(os.TempDir(), ".tty")
-	assert.NoError(err, "failed to create a temporal file")
-	defer os.Remove(f.Name())
-
-	console := ConsoleFromFile(f)
-	assert.NotNil(console)
-	defer console.Close()
-
-	msgWrite := "hello"
-	l, err := console.Write([]byte(msgWrite))
-	assert.NoError(err, "failed to write message: %s", msgWrite)
-	assert.Equal(len(msgWrite), l)
-
-	console.master.Sync()
-	console.master.Seek(0, 0)
-
-	// Read operation
-	msgRead := make([]byte, len(msgWrite))
-	l, err = console.Read(msgRead)
-	assert.NoError(err, "failed to read message: %s", msgWrite)
-	assert.Equal(len(msgWrite), l)
-	assert.Equal(msgWrite, string(msgRead))
-}
-
-func TestNewConsoleFail(t *testing.T) {
-	assert := assert.New(t)
-
-	orgPtmxPath := ptmxPath
-	defer func() { ptmxPath = orgPtmxPath }()
-
-	// OpenFile failure
-	ptmxPath = "/this/file/does/not/exist"
-	c, err := newConsole()
-	assert.Error(err)
-	assert.Nil(c)
-
-	// saneTerminal failure
-	f, err := ioutil.TempFile("", "")
-	assert.NoError(err)
-	assert.NoError(f.Close())
-	defer os.Remove(f.Name())
-	ptmxPath = f.Name()
-	c, err = newConsole()
-	assert.Error(err)
-	assert.Nil(c)
-}
-
-func TestConsoleClose(t *testing.T) {
-	assert := assert.New(t)
-
-	// nil master
-	c := &Console{}
-	assert.NoError(c.Close())
-
-	f, err := ioutil.TempFile("", "")
-	assert.NoError(err)
-	defer os.Remove(f.Name())
-
-	c.master = f
-	assert.NoError(c.Close())
-}
-
-func TestConsolePtsnameFail(t *testing.T) {
-	assert := assert.New(t)
-
-	pts, err := ptsname(nil)
-	assert.Error(err)
-	assert.Empty(pts)
-}
--- a/src/runtime/cli/kata-check.go
+++ b/src/runtime/cli/kata-check.go
@@ -25,6 +25,7 @@ import (
 	"strings"
 	"syscall"

+	"github.com/containerd/cgroups"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@@ -389,13 +390,6 @@ EXAMPLES:
 		if verbose {
 			kataLog.Logger.SetLevel(logrus.InfoLevel)
 		}
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-
-		span, _ := katautils.Trace(ctx, "check")
-		defer span.End()

 		if !context.Bool("no-network-checks") && os.Getenv(noNetworkEnvVar) == "" {
 			cmd := RelCmdCheck
@@ -407,8 +401,7 @@ EXAMPLES:
 			if os.Geteuid() == 0 {
 				kataLog.Warn("Not running network checks as super user")
 			} else {
-
-				err = HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
+				err := HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
 				if err != nil {
 					return err
 				}
@@ -424,7 +417,12 @@ EXAMPLES:
 			return errors.New("check: cannot determine runtime config")
 		}

-		err = setCPUtype(runtimeConfig.HypervisorType)
+		// check if cgroup can work use the same logic for creating containers
+		if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly {
+			return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.")
+		}
+
+		err := setCPUtype(runtimeConfig.HypervisorType)
 		if err != nil {
 			return err
 		}
@@ -437,7 +435,6 @@ EXAMPLES:
 		}

 		err = hostIsVMContainerCapable(details)
-
 		if err != nil {
 			return err
 		}
--- a/src/runtime/cli/kata-env.go
+++ b/src/runtime/cli/kata-env.go
@@ -13,7 +13,6 @@ import (
 	"strings"

 	"github.com/BurntSushi/toml"
-	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
@@ -448,14 +447,6 @@ var kataEnvCLICommand = cli.Command{
 		},
 	},
 	Action: func(context *cli.Context) error {
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-
-		span, _ := katautils.Trace(ctx, "kata-env")
-		defer span.End()
-
 		return handleSettings(defaultOutputFile, context)
 	},
 }
--- a/src/runtime/cli/kata-env_s390x_test.go
+++ b/src/runtime/cli/kata-env_s390x_test.go
@@ -1,91 +1,13 @@
-// Copyright (c) 2018 IBM
+// Copyright (c) 2021 IBM
 //
 // SPDX-License-Identifier: Apache-2.0
 //

 package main

-import (
-	"fmt"
-	vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
-	"path/filepath"
-	goruntime "runtime"
-)
-
 func getExpectedHostDetails(tmpdir string) (HostInfo, error) {
-	type filesToCreate struct {
-		file     string
-		contents string
-	}
-
-	const expectedKernelVersion = "99.1"
-	const expectedArch = goruntime.GOARCH
-
-	expectedDistro := DistroInfo{
-		Name:    "Foo",
-		Version: "42",
-	}
-
-	expectedCPU := CPUInfo{
-		Vendor: "moi",
-		Model:  "awesome XI",
-	}
-
-	expectedHostDetails := HostInfo{
-		Kernel:             expectedKernelVersion,
-		Architecture:       expectedArch,
-		Distro:             expectedDistro,
-		CPU:                expectedCPU,
-		VMContainerCapable: true,
-		SupportVSocks:      vcUtils.SupportsVsocks(),
-	}
-
-	testProcCPUInfo := filepath.Join(tmpdir, "cpuinfo")
-	testOSRelease := filepath.Join(tmpdir, "os-release")
-
-	// XXX: This file is *NOT* created by this function on purpose
-	// (to ensure the only file checked by the tests is
-	// testOSRelease). osReleaseClr handling is tested in
-	// utils_test.go.
-	testOSReleaseClr := filepath.Join(tmpdir, "os-release-clr")
-
-	testProcVersion := filepath.Join(tmpdir, "proc-version")
-
-	// override
-	procVersion = testProcVersion
-	osRelease = testOSRelease
-	osReleaseClr = testOSReleaseClr
-	procCPUInfo = testProcCPUInfo
-
-	procVersionContents := fmt.Sprintf("Linux version %s a b c",
-		expectedKernelVersion)
-
-	osReleaseContents := fmt.Sprintf(`
-NAME="%s"
-VERSION_ID="%s"
-`, expectedDistro.Name, expectedDistro.Version)
-
-	procCPUInfoContents := fmt.Sprintf(`
-%s	: %s
-processor 0: version = 00,  identification = 3929E7,  %s = %s
-`,
-		archCPUVendorField,
-		expectedCPU.Vendor,
-		archCPUModelField,
-		expectedCPU.Model)
-
-	data := []filesToCreate{
-		{procVersion, procVersionContents},
-		{osRelease, osReleaseContents},
-		{procCPUInfo, procCPUInfoContents},
-	}
-
-	for _, d := range data {
-		err := createFile(d.file, d.contents)
-		if err != nil {
-			return HostInfo{}, err
-		}
-	}
-
-	return expectedHostDetails, nil
+	expectedVendor := "moi"
+	expectedModel := "awesome XI"
+	expectedVMContainerCapable := true
+	return genericGetExpectedHostDetails(tmpdir, expectedVendor, expectedModel, expectedVMContainerCapable)
 }
--- a/src/runtime/cli/kata-env_test.go
+++ b/src/runtime/cli/kata-env_test.go
@@ -161,7 +161,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
 		return "", oci.RuntimeConfig{}, err
 	}

-	_, config, err = katautils.LoadConfiguration(configFile, true, false)
+	_, config, err = katautils.LoadConfiguration(configFile, true)
 	if err != nil {
 		return "", oci.RuntimeConfig{}, err
 	}
--- a/src/runtime/cli/kata-exec.go
+++ b/src/runtime/cli/kata-exec.go
@@ -14,7 +14,6 @@ import (
 	"net/http"
 	"net/url"
 	"os"
-	"path/filepath"
 	"strings"

 	"sync"
@@ -26,7 +25,6 @@ import (
 	clientUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client"
 	"github.com/pkg/errors"
 	"github.com/urfave/cli"
-	"go.opentelemetry.io/otel/label"
 )

 const (
@@ -38,10 +36,8 @@ const (

 	subCommandName = "exec"
 	// command-line parameters name
-	paramRuntimeNamespace                    = "runtime-namespace"
 	paramDebugConsolePort                    = "kata-debug-port"
 	defaultKernelParamDebugConsoleVPortValue = 1026
-	defaultRuntimeNamespace                  = "k8s.io"
 )

 var (
@@ -57,34 +53,16 @@ var kataExecCLICommand = cli.Command{
 	Name:  subCommandName,
 	Usage: "Enter into guest by debug console",
 	Flags: []cli.Flag{
-		cli.StringFlag{
-			Name:  paramRuntimeNamespace,
-			Usage: "Namespace that containerd or CRI-O are using for containers. (Default: k8s.io, only works for containerd)",
-		},
 		cli.Uint64Flag{
 			Name:  paramDebugConsolePort,
 			Usage: "Port that debug console is listening on. (Default: 1026)",
 		},
 	},
 	Action: func(context *cli.Context) error {
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-		span, _ := katautils.Trace(ctx, subCommandName)
-		defer span.End()
-
-		namespace := context.String(paramRuntimeNamespace)
-		if namespace == "" {
-			namespace = defaultRuntimeNamespace
-		}
-		span.SetAttributes(label.Key("namespace").String(namespace))
-
 		port := context.Uint64(paramDebugConsolePort)
 		if port == 0 {
 			port = defaultKernelParamDebugConsoleVPortValue
 		}
-		span.SetAttributes(label.Key("port").Uint64(port))

 		sandboxID := context.Args().Get(0)

@@ -92,9 +70,8 @@ var kataExecCLICommand = cli.Command{
 			return err
 		}

-		span.SetAttributes(label.Key("sandbox").String(sandboxID))
+		conn, err := getConn(sandboxID, port)

-		conn, err := getConn(namespace, sandboxID, port)
 		if err != nil {
 			return err
 		}
@@ -177,9 +154,8 @@ func (s *iostream) Read(data []byte) (n int, err error) {
 	return s.conn.Read(data)
 }

-func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
-	socketAddr := filepath.Join(string(filepath.Separator), "containerd-shim", namespace, sandboxID, "shim-monitor.sock")
-	client, err := kataMonitor.BuildUnixSocketClient(socketAddr, defaultTimeout)
+func getConn(sandboxID string, port uint64) (net.Conn, error) {
+	client, err := kataMonitor.BuildShimClient(sandboxID, defaultTimeout)
 	if err != nil {
 		return nil, err
 	}
@@ -190,7 +166,7 @@ func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
 	}

 	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("Failed to get %s: %d", socketAddr, resp.StatusCode)
+		return nil, fmt.Errorf("Failure from %s shim-monitor: %d", sandboxID, resp.StatusCode)
 	}

 	defer resp.Body.Close()
--- a/src/runtime/cli/kata-metrics.go
+++ b/src/runtime/cli/kata-metrics.go
@@ -0,0 +1,38 @@
+// Copyright (c) 2021 Apple Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+package main
+
+import (
+	"fmt"
+
+	kataMonitor "github.com/kata-containers/kata-containers/src/runtime/pkg/kata-monitor"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
+	"github.com/urfave/cli"
+)
+
+var kataMetricsCLICommand = cli.Command{
+	Name:      "metrics",
+	Usage:     "gather metrics associated with infrastructure used to run a sandbox",
+	UsageText: "metrics <sandbox id>",
+	Action: func(context *cli.Context) error {
+
+		sandboxID := context.Args().Get(0)
+
+		if err := katautils.VerifyContainerID(sandboxID); err != nil {
+			return err
+		}
+
+		// Get the metrics!
+		metrics, err := kataMonitor.GetSandboxMetrics(sandboxID)
+		if err != nil {
+			return err
+		}
+
+		fmt.Printf("%s\n", metrics)
+
+		return nil
+	},
+}
--- a/src/runtime/cli/main.go
+++ b/src/runtime/cli/main.go
@@ -22,14 +22,12 @@ import (
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
 	vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
+	tl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory/template"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli"
-	"go.opentelemetry.io/otel"
-	"go.opentelemetry.io/otel/label"
-	otelTrace "go.opentelemetry.io/otel/trace"
 )

 // specConfig is the name of the file holding the containers configuration
@@ -125,6 +123,7 @@ var runtimeCommands = []cli.Command{
 	kataCheckCLICommand,
 	kataEnvCLICommand,
 	kataExecCLICommand,
+	kataMetricsCLICommand,
 	factoryCLICommand,
 }

@@ -132,10 +131,6 @@ var runtimeCommands = []cli.Command{
 // parsing occurs.
 var runtimeBeforeSubcommands = beforeSubcommands

-// runtimeAfterSubcommands is the function to run after the command-line
-// has been parsed.
-var runtimeAfterSubcommands = afterSubcommands
-
 // runtimeCommandNotFound is the function to handle an invalid sub-command.
 var runtimeCommandNotFound = commandNotFound

@@ -168,10 +163,6 @@ func init() {

 // setupSignalHandler sets up signal handling, starting a go routine to deal
 // with signals as they arrive.
-//
-// Note that the specified context is NOT used to create a trace span (since the
-// first (root) span must be created in beforeSubcommands()): it is simply
-// used to pass to the crash handling functions to finalise tracing.
 func setupSignalHandler(ctx context.Context) {
 	signals.SetLogger(kataLog)

@@ -181,10 +172,6 @@ func setupSignalHandler(ctx context.Context) {
 		signal.Notify(sigCh, sig)
 	}

-	dieCb := func() {
-		katautils.StopTracing(ctx)
-	}
-
 	go func() {
 		for {
 			sig := <-sigCh
@@ -198,7 +185,6 @@ func setupSignalHandler(ctx context.Context) {

 			if signals.FatalSignal(nativeSignal) {
 				kataLog.WithField("signal", sig).Error("received fatal signal")
-				signals.Die(dieCb)
 			} else if debug && signals.NonFatalSignal(nativeSignal) {
 				kataLog.WithField("signal", sig).Debug("handling signal")
 				signals.Backtrace()
@@ -210,24 +196,15 @@ func setupSignalHandler(ctx context.Context) {
 // setExternalLoggers registers the specified logger with the external
 // packages which accept a logger to handle their own logging.
 func setExternalLoggers(ctx context.Context, logger *logrus.Entry) {
-	var span otelTrace.Span
-
-	// Only create a new span if a root span already exists. This is
-	// required to ensure that this function will not disrupt the root
-	// span logic by creating a span before the proper root span has been
-	// created.
-
-	if otelTrace.SpanFromContext(ctx) != nil {
-		span, ctx = katautils.Trace(ctx, "setExternalLoggers")
-		defer span.End()
-	}
-
 	// Set virtcontainers logger.
 	vci.SetLogger(ctx, logger)

 	// Set vm factory logger.
 	vf.SetLogger(ctx, logger)

+	// Set vm factory template logger.
+	tl.SetLogger(ctx, logger)
+
 	// Set the OCI package logger.
 	oci.SetLogger(ctx, logger)

@@ -244,7 +221,6 @@ func beforeSubcommands(c *cli.Context) error {
 	var configFile string
 	var runtimeConfig oci.RuntimeConfig
 	var err error
-	var traceFlushFunc func()

 	katautils.SetConfigOptions(name, defaultRuntimeConfiguration, defaultSysConfRuntimeConfiguration)

@@ -270,7 +246,6 @@ func beforeSubcommands(c *cli.Context) error {
 	// Issue: https://github.com/kata-containers/runtime/issues/2428

 	ignoreConfigLogs := false
-	var traceRootSpan string

 	subCmdIsCheckCmd := (c.NArg() >= 1 && ((c.Args()[0] == "kata-check") || (c.Args()[0] == "check")))
 	if subCmdIsCheckCmd {
@@ -302,16 +277,13 @@ func beforeSubcommands(c *cli.Context) error {
 		cmdName := c.Args().First()
 		if c.App.Command(cmdName) != nil {
 			kataLog = kataLog.WithField("command", cmdName)
-
-			// Name for the root span (used for tracing) now the
-			// sub-command name is known.
-			traceRootSpan = name + " " + cmdName
 		}

-		// Since a context is required, pass a new (throw-away) one - we
-		// cannot use the main context as tracing hasn't been enabled yet
-		// (meaning any spans created at this point will be silently ignored).
-		setExternalLoggers(context.Background(), kataLog)
+		ctx, err := cliContextToContext(c)
+		if err != nil {
+			return err
+		}
+		setExternalLoggers(ctx, kataLog)

 		if c.NArg() == 1 && (c.Args()[0] == "kata-env" || c.Args()[0] == "env") {
 			// simply report the logging setup
@@ -319,26 +291,12 @@ func beforeSubcommands(c *cli.Context) error {
 		}
 	}

-	configFile, runtimeConfig, err = katautils.LoadConfiguration(c.GlobalString("kata-config"), ignoreConfigLogs, false)
+	configFile, runtimeConfig, err = katautils.LoadConfiguration(c.GlobalString("kata-config"), ignoreConfigLogs)
 	if err != nil {
 		fatal(err)
 	}
 	if !subCmdIsCheckCmd {
 		debug = runtimeConfig.Debug
-
-		if traceRootSpan != "" {
-			// Create the tracer.
-			//
-			// Note: no spans are created until the command-line has been parsed.
-			// This delays collection of trace data slightly but benefits the user by
-			// ensuring the first span is the name of the sub-command being
-			// invoked from the command-line.
-			traceFlushFunc, err = setupTracing(c, traceRootSpan, &runtimeConfig)
-			if err != nil {
-				return err
-			}
-			defer traceFlushFunc()
-		}
 	}

 	args := strings.Join(c.Args(), " ")
@@ -377,36 +335,6 @@ func handleShowConfig(context *cli.Context) {
 	}
 }

-func setupTracing(context *cli.Context, rootSpanName string, config *oci.RuntimeConfig) (func(), error) {
-	flush, err := katautils.CreateTracer(name, config)
-	if err != nil {
-		return nil, err
-	}
-
-	ctx, err := cliContextToContext(context)
-	if err != nil {
-		return nil, err
-	}
-
-	// Create the root span now that the sub-command name is
-	// known.
-	//
-	// Note that this "Before" function is called (and returns)
-	// before the subcommand handler is called. As such, we cannot
-	// "Finish()" the span here - that is handled in the .After
-	// function.
-	tracer := otel.Tracer("kata")
-	newCtx, span := tracer.Start(ctx, rootSpanName)
-
-	span.SetAttributes(label.Key("subsystem").String("runtime"))
-
-	// Add tracer to metadata and update the context
-	context.App.Metadata["tracer"] = tracer
-	context.App.Metadata["context"] = newCtx
-
-	return flush, nil
-}
-
 // add supported experimental features in context
 func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error {
 	ctx, err := cliContextToContext(clictx)
@@ -420,22 +348,11 @@ func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error
 	}

 	ctx = exp.ContextWithExp(ctx, exps)
-	// Add tracer to metadata and update the context
+	// Add experimental features to metadata and update the context
 	clictx.App.Metadata["context"] = ctx
 	return nil
 }

-func afterSubcommands(c *cli.Context) error {
-	ctx, err := cliContextToContext(c)
-	if err != nil {
-		return err
-	}
-
-	katautils.StopTracing(ctx)
-
-	return nil
-}
-
 // function called when an invalid command is specified which causes the
 // runtime to error.
 func commandNotFound(c *cli.Context, command string) {
@@ -502,7 +419,6 @@ func createRuntimeApp(ctx context.Context, args []string) error {
 	app.Flags = runtimeFlags
 	app.Commands = runtimeCommands
 	app.Before = runtimeBeforeSubcommands
-	app.After = runtimeAfterSubcommands
 	app.EnableBashCompletion = true

 	// allow sub-commands to access context
@@ -578,12 +494,5 @@ func cliContextToContext(c *cli.Context) (context.Context, error) {
 func main() {
 	// create a new empty context
 	ctx := context.Background()
-
-	dieCb := func() {
-		katautils.StopTracing(ctx)
-	}
-
-	defer signals.HandlePanic(dieCb)
-
 	createRuntime(ctx)
 }
--- a/src/runtime/cli/main_test.go
+++ b/src/runtime/cli/main_test.go
@@ -20,7 +20,6 @@ import (
 	"strings"
 	"testing"

-	"github.com/dlespiau/covertool/pkg/cover"
 	ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
@@ -152,16 +151,6 @@ func runUnitTests(m *testing.M) {
 // TestMain is the common main function used by ALL the test functions
 // for this package.
 func TestMain(m *testing.M) {
-	// Parse the command line using the stdlib flag package so the flags defined
-	// in the testing package get populated.
-	cover.ParseAndStripTestFlags()
-
-	// Make sure we have the opportunity to flush the coverage report to disk when
-	// terminating the process.
-	defer func() {
-		cover.FlushProfiles()
-	}()
-
 	// If the test binary name is kata-runtime.coverage, we've are being asked to
 	// run the coverage-instrumented kata-runtime.
 	if path.Base(os.Args[0]) == name+".coverage" ||
@@ -869,7 +858,7 @@ func TestMainCreateRuntime(t *testing.T) {
 	assert := assert.New(t)

 	const cmd = "foo"
-	const msg = "moo FAILURE"
+	const msg = "moo message"

 	resetCLIGlobals()

@@ -942,7 +931,7 @@ func TestMainFatalWriter(t *testing.T) {
 	assert := assert.New(t)

 	const cmd = "foo"
-	const msg = "moo FAILURE"
+	const msg = "moo message"

 	// create buffer to save logger output
 	buf := &bytes.Buffer{}
--- a/src/runtime/cli/version.go
+++ b/src/runtime/cli/version.go
@@ -6,7 +6,6 @@
 package main

 import (
-	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/urfave/cli"
 )

@@ -14,14 +13,6 @@ var versionCLICommand = cli.Command{
 	Name:  "version",
 	Usage: "display version details",
 	Action: func(context *cli.Context) error {
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-
-		span, _ := katautils.Trace(ctx, "version")
-		defer span.End()
-
 		cli.VersionPrinter(context)
 		return nil
 	},
--- a/src/runtime/containerd-shim-v2/create.go
+++ b/src/runtime/containerd-shim-v2/create.go
@@ -23,6 +23,7 @@ import (

 	// only register the proto type
 	_ "github.com/containerd/containerd/runtime/linux/runctypes"
+	_ "github.com/containerd/containerd/runtime/v2/runc/options"
 	crioption "github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1"

 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
@@ -191,7 +192,7 @@ func loadRuntimeConfig(s *service, r *taskAPI.CreateTaskRequest, anno map[string
 		configPath = os.Getenv("KATA_CONF_FILE")
 	}

-	_, runtimeConfig, err := katautils.LoadConfiguration(configPath, false, true)
+	_, runtimeConfig, err := katautils.LoadConfiguration(configPath, false)
 	if err != nil {
 		return nil, err
 	}
--- a/src/runtime/containerd-shim-v2/service.go
+++ b/src/runtime/containerd-shim-v2/service.go
@@ -294,8 +294,7 @@ func trace(ctx context.Context, name string) (otelTrace.Span, context.Context) {
 		ctx = context.Background()
 	}
 	tracer := otel.Tracer("kata")
-	ctx, span := tracer.Start(ctx, name)
-	span.SetAttributes([]label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("containerdshim")}...)
+	ctx, span := tracer.Start(ctx, name, otelTrace.WithAttributes(label.String("source", "runtime"), label.String("package", "containerdshim")))

 	return span, ctx
 }
--- a/src/runtime/containerd-shim-v2/shim_management.go
+++ b/src/runtime/containerd-shim-v2/shim_management.go
@@ -16,7 +16,6 @@ import (
 	"strconv"
 	"strings"

-	"github.com/containerd/containerd/namespaces"
 	cdshim "github.com/containerd/containerd/runtime/v2/shim"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
@@ -129,11 +128,7 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily {

 func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
 	// metrics socket will under sandbox's bundle path
-	metricsAddress, err := socketAddress(ctx, s.id)
-	if err != nil {
-		shimMgtLog.WithError(err).Error("failed to create socket address")
-		return
-	}
+	metricsAddress := SocketAddress(s.id)

 	listener, err := cdshim.NewSocket(metricsAddress)
 	if err != nil {
@@ -166,7 +161,7 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec
 	svr.Serve(listener)
 }

-// mountServeDebug provides a debug endpoint
+// mountPprofHandle provides a debug endpoint
 func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {

 	// return if not enabled
@@ -188,10 +183,8 @@ func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {
 	m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
 }

-func socketAddress(ctx context.Context, id string) (string, error) {
-	ns, err := namespaces.NamespaceRequired(ctx)
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(string(filepath.Separator), "containerd-shim", ns, id, "shim-monitor.sock"), nil
+// SocketAddress returns the address of the abstract domain socket for communicating with the
+// shim management endpoint
+func SocketAddress(id string) string {
+	return filepath.Join(string(filepath.Separator), "run", "vc", id, "shim-monitor")
 }
--- a/src/runtime/containerd-shim-v2/start.go
+++ b/src/runtime/containerd-shim-v2/start.go
@@ -14,7 +14,7 @@ import (
 )

 func startContainer(ctx context.Context, s *service, c *container) error {
-	//start a container
+	// start a container
 	if c.cType == "" {
 		err := fmt.Errorf("Bug, the container %s type is empty", c.id)
 		return err
@@ -37,8 +37,8 @@ func startContainer(ctx context.Context, s *service, c *container) error {
 		}
 		go watchSandbox(ctx, s)

-		// We don't rely on the context passed to startContainer as it can be cancelled after
-		// this rpc call.
+		// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
+		// shim context and the context passed to startContainer for tracing.
 		go watchOOMEvents(ctx, s)
 	} else {
 		_, err := s.sandbox.StartContainer(ctx, c.id)
@@ -74,10 +74,10 @@ func startContainer(ctx context.Context, s *service, c *container) error {
 		c.ttyio = tty
 		go ioCopy(c.exitIOch, c.stdinCloser, tty, stdin, stdout, stderr)
 	} else {
-		//close the io exit channel, since there is no io for this container,
-		//otherwise the following wait goroutine will hang on this channel.
+		// close the io exit channel, since there is no io for this container,
+		// otherwise the following wait goroutine will hang on this channel.
 		close(c.exitIOch)
-		//close the stdin closer channel to notify that it's safe to close process's
+		// close the stdin closer channel to notify that it's safe to close process's
 		// io.
 		close(c.stdinCloser)
 	}
@@ -88,7 +88,7 @@ func startContainer(ctx context.Context, s *service, c *container) error {
 }

 func startExec(ctx context.Context, s *service, containerID, execID string) (*exec, error) {
-	//start an exec
+	// start an exec
 	c, err := s.getContainer(containerID)
 	if err != nil {
 		return nil, err
--- a/src/runtime/containerd-shim-v2/stream.go
+++ b/src/runtime/containerd-shim-v2/stream.go
@@ -87,7 +87,6 @@ func newTtyIO(ctx context.Context, stdin, stdout, stderr string, console bool) (

 func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPipe, stderrPipe io.Reader) {
 	var wg sync.WaitGroup
-	var closeOnce sync.Once

 	if tty.Stdin != nil {
 		wg.Add(1)
@@ -109,7 +108,11 @@ func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteClo
 			defer bufPool.Put(p)
 			io.CopyBuffer(tty.Stdout, stdoutPipe, *p)
 			wg.Done()
-			closeOnce.Do(tty.close)
+			if tty.Stdin != nil {
+				// close stdin to make the other routine stop
+				tty.Stdin.Close()
+				tty.Stdin = nil
+			}
 		}()
 	}

@@ -124,6 +127,6 @@ func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteClo
 	}

 	wg.Wait()
-	closeOnce.Do(tty.close)
+	tty.close()
 	close(exitch)
 }
--- a/src/runtime/containerd-shim-v2/wait.go
+++ b/src/runtime/containerd-shim-v2/wait.go
@@ -142,7 +142,7 @@ func watchOOMEvents(ctx context.Context, s *service) {

 	for {
 		select {
-		case <-ctx.Done():
+		case <-s.ctx.Done():
 			return
 		default:
 			containerID, err := s.sandbox.GetOOMEvent(ctx)
--- a/src/runtime/go.mod
+++ b/src/runtime/go.mod
@@ -18,7 +18,6 @@ require (
 	github.com/containerd/typeurl v1.0.1-0.20190228175220-2a93cfde8c20
 	github.com/containernetworking/plugins v0.8.2
 	github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af
-	github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a
 	github.com/docker/distribution v2.7.1+incompatible // indirect
 	github.com/docker/docker v1.13.1 // indirect
 	github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
@@ -31,7 +30,7 @@ require (
 	github.com/gogo/googleapis v1.4.0 // indirect
 	github.com/gogo/protobuf v1.3.1
 	github.com/hashicorp/go-multierror v1.0.0
-	github.com/kata-containers/govmm v0.0.0-20210112013750-7d320e8f5dca
+	github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f
 	github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
 	github.com/opencontainers/image-spec v1.0.1 // indirect
 	github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c
--- a/src/runtime/go.sum
+++ b/src/runtime/go.sum
@@ -108,8 +108,6 @@ github.com/d2g/hardwareaddr v0.0.0-20190221164911-e7d9fbe030e4/go.mod h1:bMl4RjI
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a h1:+cYgqwB++gEE09SluRYGqJyDhWmLmdWZ2cXlOXSGV8w=
-github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a/go.mod h1:/eQMcW3eA1bzKx23ZYI2H3tXPdJB5JWYTHzoUPBvQY4=
 github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=
 github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
 github.com/docker/docker v1.13.1 h1:IkZjBSIc8hBjLpqeAbeE5mca5mNgeatLHBy3GO78BWo=
@@ -200,7 +198,6 @@ github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt
 github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
 github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
@@ -218,9 +215,7 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -242,7 +237,6 @@ github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hf
 github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/google/uuid v1.1.0 h1:Jf4mxPC/ziBnoPIdpQdPJ9OeiomAUHLvxmPRSPH9m4s=
 github.com/google/uuid v1.1.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -274,8 +268,10 @@ github.com/juju/errors v0.0.0-20180806074554-22422dad46e1/go.mod h1:W54LbzXuIE0b
 github.com/juju/loggo v0.0.0-20190526231331-6e530bcce5d8/go.mod h1:vgyd7OREkbtVEN/8IXZe5Ooef3LQePvuBm9UWj6ZL8U=
 github.com/juju/testing v0.0.0-20190613124551-e81189438503/go.mod h1:63prj8cnj0tU0S9OHjGJn+b1h0ZghCndfnbQolrYTwA=
 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
-github.com/kata-containers/govmm v0.0.0-20210112013750-7d320e8f5dca h1:UdXFthwasAPnmv37gLJUEFsW9FaabYA+mM6FoSi8kzU=
-github.com/kata-containers/govmm v0.0.0-20210112013750-7d320e8f5dca/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk=
+github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee h1:M4N7AdSHgWz/ubV5AZQdeqmK+9Ztpea6oqeXgk8GCHk=
+github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk=
+github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f h1:jXMZY7GIz5kSv3/Rdiesg1WMvgXJKNOk3KxwxgNWAVk=
+github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk=
 github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
@@ -303,15 +299,12 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
 github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
-github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b h1:Ey6yH0acn50T/v6CB75bGP4EMJqnv9WvnjN7oZaj+xE=
 github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
-github.com/onsi/ginkgo v1.10.1 h1:q/mM8GF/n0shIN8SaAZ0V+jnLPzen6WIVZdiwrRlMlo=
 github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/ginkgo v1.11.0 h1:JAKSXpt1YjtLA7YpPiqO9ss6sNXEsPfSGdwN0UHqzrw=
 github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
-github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a h1:KfNOeFvoAssuZLT7IntKZElKwi/5LRuxY71k+t6rfaM=
 github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
 github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
 github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=
@@ -331,7 +324,6 @@ github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
 github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -375,7 +367,6 @@ github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A=
 github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -427,7 +418,6 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk
 golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
 golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
 golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
@@ -459,7 +449,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
 golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20191108221443-4ba9e2ef068c h1:SRpq/kuj/xNci/RdvEs+RSvpfxqvLAzTKuKGlzoGdZQ=
 golang.org/x/net v0.0.0-20191108221443-4ba9e2ef068c/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@@ -473,7 +462,6 @@ golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/
 golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
 golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
 golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA=
 golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
@@ -489,7 +477,6 @@ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 h1:qwRHBd0NqMbJxfbotnDhm2ByMI1Shq4Y6oRJo21SGJA=
@@ -531,17 +518,14 @@ golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1 h1:ogLJMz+qpzav7lGMh10LMvAkM/fAoGlaiiHYiFYdm80=
 golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f h1:Fqb3ao1hUmOR3GkUOg/Y+BadLwykBIzs5q8Ez2SbHyc=
 golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -593,7 +577,6 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc
 golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -616,7 +599,6 @@ google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz513
 google.golang.org/api v0.32.0 h1:Le77IccnTqEa8ryp9wIpX5W3zYm7Gf9LhOp9PHcwFts=
 google.golang.org/api v0.32.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
@@ -633,14 +615,12 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ
 google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
 google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
 google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U=
 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -654,10 +634,8 @@ gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3M
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c=
 gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
--- a/src/runtime/pkg/kata-monitor/metrics.go
+++ b/src/runtime/pkg/kata-monitor/metrics.go
@@ -176,7 +176,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
 	for sandboxID, namespace := range sandboxes {
 		wg.Add(1)
 		go func(sandboxID, namespace string, results chan<- []*dto.MetricFamily) {
-			sandboxMetrics, err := km.getSandboxMetrics(sandboxID, namespace)
+			sandboxMetrics, err := getParsedMetrics(sandboxID)
 			if err != nil {
 				monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox")
 			}
@@ -229,13 +229,12 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
 			return err
 		}
 	}
-
 	return nil
+
 }

-// getSandboxMetrics will get sandbox's metrics from shim
-func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.MetricFamily, error) {
-	body, err := km.doGet(sandboxID, namespace, defaultTimeout, "metrics")
+func getParsedMetrics(sandboxID string) ([]*dto.MetricFamily, error) {
+	body, err := doGet(sandboxID, defaultTimeout, "metrics")
 	if err != nil {
 		return nil, err
 	}
@@ -243,6 +242,16 @@ func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.Me
 	return parsePrometheusMetrics(sandboxID, body)
 }

+// GetSandboxMetrics will get sandbox's metrics from shim
+func GetSandboxMetrics(sandboxID string) (string, error) {
+	body, err := doGet(sandboxID, defaultTimeout, "metrics")
+	if err != nil {
+		return "", err
+	}
+
+	return string(body), nil
+}
+
 // parsePrometheusMetrics will decode metrics from Prometheus text format
 // and return array of *dto.MetricFamily with an ASC order
 func parsePrometheusMetrics(sandboxID string, body []byte) ([]*dto.MetricFamily, error) {
--- a/src/runtime/pkg/kata-monitor/monitor.go
+++ b/src/runtime/pkg/kata-monitor/monitor.go
@@ -87,13 +87,8 @@ func (km *KataMonitor) GetAgentURL(w http.ResponseWriter, r *http.Request) {
 		commonServeError(w, http.StatusBadRequest, err)
 		return
 	}
-	namespace, err := km.getSandboxNamespace(sandboxID)
-	if err != nil {
-		commonServeError(w, http.StatusBadRequest, err)
-		return
-	}

-	data, err := km.doGet(sandboxID, namespace, defaultTimeout, "agent-url")
+	data, err := doGet(sandboxID, defaultTimeout, "agent-url")
 	if err != nil {
 		commonServeError(w, http.StatusBadRequest, err)
 		return
--- a/src/runtime/pkg/kata-monitor/shim_client.go
+++ b/src/runtime/pkg/kata-monitor/shim_client.go
@@ -11,6 +11,8 @@ import (
 	"net"
 	"net/http"
 	"time"
+
+	shim "github.com/kata-containers/kata-containers/src/runtime/containerd-shim-v2"
 )

 const (
@@ -33,16 +35,13 @@ func getSandboxIDFromReq(r *http.Request) (string, error) {
 	return "", fmt.Errorf("sandbox not found in %+v", r.URL.Query())
 }

-func (km *KataMonitor) buildShimClient(sandboxID, namespace string, timeout time.Duration) (*http.Client, error) {
-	socketAddr, err := km.getMonitorAddress(sandboxID, namespace)
-	if err != nil {
-		return nil, err
-	}
-	return BuildUnixSocketClient(socketAddr, timeout)
+// BuildShimClient builds and returns an http client for communicating with the provided sandbox
+func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) {
+	return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout)
 }

-// BuildUnixSocketClient build http client for Unix socket
-func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) {
+// buildUnixSocketClient build http client for Unix socket
+func buildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) {
 	transport := &http.Transport{
 		DisableKeepAlives: true,
 		Dial: func(proto, addr string) (conn net.Conn, err error) {
@@ -61,8 +60,8 @@ func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Clie
 	return client, nil
 }

-func (km *KataMonitor) doGet(sandboxID, namespace string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) {
-	client, err := km.buildShimClient(sandboxID, namespace, timeoutInSeconds)
+func doGet(sandboxID string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) {
+	client, err := BuildShimClient(sandboxID, timeoutInSeconds)
 	if err != nil {
 		return nil, err
 	}
--- a/src/runtime/pkg/katautils/config-settings.go.in
+++ b/src/runtime/pkg/katautils/config-settings.go.in
@@ -54,6 +54,7 @@ const defaultDisableImageNvdimm = false
 const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
 const defaultRxRateLimiterMaxRate = uint64(0)
 const defaultTxRateLimiterMaxRate = uint64(0)
+const defaultConfidentialGuest = false

 var defaultSGXEPCSize = int64(0)

--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2021 Intel Corporation
 // Copyright (c) 2018 HyperHQ Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
@@ -61,6 +61,12 @@ type tomlConfig struct {
 	Runtime    runtime
 	Factory    factory
 	Netmon     netmon
+	Image      image
+}
+
+type image struct {
+	ServiceOffload bool   `toml:"service_offload"`
+	Provision      string `toml:"provision"`
 }

 type factory struct {
@@ -99,6 +105,7 @@ type hypervisor struct {
 	PFlashList              []string `toml:"pflashes"`
 	VhostUserStorePathList  []string `toml:"valid_vhost_user_store_paths"`
 	FileBackedMemRootList   []string `toml:"valid_file_mem_backends"`
+	EntropySourceList       []string `toml:"valid_entropy_sources"`
 	EnableAnnotations       []string `toml:"enable_annotations"`
 	RxRateLimiterMaxRate    uint64   `toml:"rx_rate_limiter_max_rate"`
 	TxRateLimiterMaxRate    uint64   `toml:"tx_rate_limiter_max_rate"`
@@ -129,6 +136,7 @@ type hypervisor struct {
 	HotplugVFIOOnRootBus    bool     `toml:"hotplug_vfio_on_root_bus"`
 	DisableVhostNet         bool     `toml:"disable_vhost_net"`
 	GuestMemoryDumpPaging   bool     `toml:"guest_memory_dump_paging"`
+	ConfidentialGuest       bool     `toml:"confidential_guest"`
 }

 type runtime struct {
@@ -153,6 +161,7 @@ type agent struct {
 	Debug               bool     `toml:"enable_debug"`
 	Tracing             bool     `toml:"enable_tracing"`
 	DebugConsoleEnabled bool     `toml:"debug_console_enabled"`
+	DialTimeout         uint32   `toml:"dial_timeout"`
 }

 type netmon struct {
@@ -470,6 +479,10 @@ func (a agent) debugConsoleEnabled() bool {
 	return a.DebugConsoleEnabled
 }

+func (a agent) dialTimout() uint32 {
+	return a.DialTimeout
+}
+
 func (a agent) debug() bool {
 	return a.Debug
 }
@@ -557,6 +570,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		MemorySize:            h.defaultMemSz(),
 		MemSlots:              h.defaultMemSlots(),
 		EntropySource:         h.GetEntropySource(),
+		EntropySourceList:     h.EntropySourceList,
 		DefaultBridges:        h.defaultBridges(),
 		DisableBlockDeviceUse: h.DisableBlockDeviceUse,
 		HugePages:             h.HugePages,
@@ -663,6 +677,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		MemOffset:               h.defaultMemOffset(),
 		VirtioMem:               h.VirtioMem,
 		EntropySource:           h.GetEntropySource(),
+		EntropySourceList:       h.EntropySourceList,
 		DefaultBridges:          h.defaultBridges(),
 		DisableBlockDeviceUse:   h.DisableBlockDeviceUse,
 		SharedFS:                sharedFS,
@@ -699,6 +714,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		EnableAnnotations:       h.EnableAnnotations,
 		GuestMemoryDumpPath:     h.GuestMemoryDumpPath,
 		GuestMemoryDumpPaging:   h.GuestMemoryDumpPaging,
+		ConfidentialGuest:       h.ConfidentialGuest,
 	}, nil
 }

@@ -754,6 +770,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		MemorySize:            h.defaultMemSz(),
 		MemSlots:              h.defaultMemSlots(),
 		EntropySource:         h.GetEntropySource(),
+		EntropySourceList:     h.EntropySourceList,
 		DefaultBridges:        h.defaultBridges(),
 		HugePages:             h.HugePages,
 		Mlock:                 !h.Swap,
@@ -830,6 +847,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		MemOffset:               h.defaultMemOffset(),
 		VirtioMem:               h.VirtioMem,
 		EntropySource:           h.GetEntropySource(),
+		EntropySourceList:       h.EntropySourceList,
 		DefaultBridges:          h.defaultBridges(),
 		DisableBlockDeviceUse:   h.DisableBlockDeviceUse,
 		SharedFS:                sharedFS,
@@ -905,7 +923,7 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi
 	return nil
 }

-func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig, builtIn bool) error {
+func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
 	for _, agent := range tomlConf.Agent {
 		config.AgentConfig = vc.KataAgentConfig{
 			LongLiveConn:       true,
@@ -915,6 +933,7 @@ func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oc
 			TraceType:          agent.traceType(),
 			KernelModules:      agent.kernelModules(),
 			EnableDebugConsole: agent.debugConsoleEnabled(),
+			DialTimeout:        agent.dialTimout(),
 		}
 	}

@@ -980,12 +999,12 @@ func SetKernelParams(runtimeConfig *oci.RuntimeConfig) error {
 	return nil
 }

-func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig, builtIn bool) error {
+func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
 	if err := updateRuntimeConfigHypervisor(configPath, tomlConf, config); err != nil {
 		return err
 	}

-	if err := updateRuntimeConfigAgent(configPath, tomlConf, config, builtIn); err != nil {
+	if err := updateRuntimeConfigAgent(configPath, tomlConf, config); err != nil {
 		return err
 	}

@@ -1050,6 +1069,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
 		RxRateLimiterMaxRate:    defaultRxRateLimiterMaxRate,
 		TxRateLimiterMaxRate:    defaultTxRateLimiterMaxRate,
 		SGXEPCSize:              defaultSGXEPCSize,
+		ConfidentialGuest:       defaultConfidentialGuest,
 	}
 }

@@ -1076,7 +1096,7 @@ func initConfig() (config oci.RuntimeConfig, err error) {
 //
 // All paths are resolved fully meaning if this function does not return an
 // error, all paths are valid at the time of the call.
-func LoadConfiguration(configPath string, ignoreLogging, builtIn bool) (resolvedConfigPath string, config oci.RuntimeConfig, err error) {
+func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPath string, config oci.RuntimeConfig, err error) {

 	config, err = initConfig()
 	if err != nil {
@@ -1118,7 +1138,7 @@ func LoadConfiguration(configPath string, ignoreLogging, builtIn bool) (resolved
 			}).Info("loaded configuration")
 	}

-	if err := updateRuntimeConfig(resolved, tomlConf, &config, builtIn); err != nil {
+	if err := updateRuntimeConfig(resolved, tomlConf, &config); err != nil {
 		return "", config, err
 	}

--- a/src/runtime/pkg/katautils/config_test.go
+++ b/src/runtime/pkg/katautils/config_test.go
@@ -260,7 +260,7 @@ func testLoadConfiguration(t *testing.T, dir string,
 					assert.NoError(t, err)
 				}

-				resolvedConfigPath, config, err := LoadConfiguration(file, ignoreLogging, false)
+				resolvedConfigPath, config, err := LoadConfiguration(file, ignoreLogging)
 				if expectFail {
 					assert.Error(t, err)

@@ -566,7 +566,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
 		t.Error(err)
 	}

-	_, config, err := LoadConfiguration(configPath, false, false)
+	_, config, err := LoadConfiguration(configPath, false)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1398,7 +1398,7 @@ func TestUpdateRuntimeConfigurationVMConfig(t *testing.T) {
 		},
 	}

-	err := updateRuntimeConfig("", tomlConf, &config, false)
+	err := updateRuntimeConfig("", tomlConf, &config)
 	assert.NoError(err)

 	assert.Equal(expectedVMConfig, config.HypervisorConfig.MemorySize)
@@ -1416,7 +1416,7 @@ func TestUpdateRuntimeConfigurationFactoryConfig(t *testing.T) {

 	tomlConf := tomlConfig{Factory: factory{Template: true}}

-	err := updateRuntimeConfig("", tomlConf, &config, false)
+	err := updateRuntimeConfig("", tomlConf, &config)
 	assert.NoError(err)

 	assert.Equal(expectedFactoryConfig, config.FactoryConfig)
@@ -1443,7 +1443,7 @@ func TestUpdateRuntimeConfigurationInvalidKernelParams(t *testing.T) {
 		}
 	}

-	err := updateRuntimeConfig("", tomlConf, &config, false)
+	err := updateRuntimeConfig("", tomlConf, &config)
 	assert.EqualError(err, "Empty kernel parameter")
 }

--- a/src/runtime/pkg/katautils/create.go
+++ b/src/runtime/pkg/katautils/create.go
@@ -104,7 +104,7 @@ func SetEphemeralStorageType(ociSpec specs.Spec) specs.Spec {
 // CreateSandbox create a sandbox container
 func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeConfig oci.RuntimeConfig, rootFs vc.RootFs,
 	containerID, bundlePath, console string, disableOutput, systemdCgroup bool) (_ vc.VCSandbox, _ vc.Process, err error) {
-	span, ctx := Trace(ctx, "createSandbox")
+	span, ctx := Trace(ctx, "CreateSandbox", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("sandbox"), label.Key("container_id").String(containerID)}...)
 	defer span.End()

 	sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, console, disableOutput, systemdCgroup)
@@ -159,7 +159,7 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo

 	sid := sandbox.ID()
 	kataUtilsLogger = kataUtilsLogger.WithField("sandbox", sid)
-	span.SetAttributes(label.Key("sandbox").String(sid))
+	span.SetAttributes(label.Key("sandbox_id").String(sid))

 	containers := sandbox.GetAllContainers()
 	if len(containers) != 1 {
@@ -202,7 +202,7 @@ func checkForFIPS(sandboxConfig *vc.SandboxConfig) error {
 func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Spec, rootFs vc.RootFs, containerID, bundlePath, console string, disableOutput bool) (vc.Process, error) {
 	var c vc.VCContainer

-	span, ctx := Trace(ctx, "createContainer")
+	span, ctx := Trace(ctx, "CreateContainer", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("sandbox"), label.Key("container_id").String(containerID)}...)
 	defer span.End()

 	ociSpec = SetEphemeralStorageType(ociSpec)
@@ -228,7 +228,7 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp
 		return vc.Process{}, err
 	}

-	span.SetAttributes(label.Key("sandbox").String(sandboxID))
+	span.SetAttributes(label.Key("sandbox_id").String(sandboxID))

 	c, err = sandbox.CreateContainer(ctx, contConfig)
 	if err != nil {
--- a/src/runtime/pkg/katautils/hook.go
+++ b/src/runtime/pkg/katautils/hook.go
@@ -26,11 +26,9 @@ func hookLogger() *logrus.Entry {
 }

 func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error {
-	span, _ := Trace(ctx, "hook")
+	span, _ := Trace(ctx, "runHook", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("hook")}...)
 	defer span.End()

-	span.SetAttributes(label.Key("subsystem").String("runHook"))
-
 	// FIXME
 	// span.LogFields(
 	// 	log.String("hook-name", hook.Path),
@@ -90,11 +88,9 @@ func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error
 }

 func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType string) error {
-	span, _ := Trace(ctx, "hooks")
+	span, _ := Trace(ctx, "runHooks", []label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("katautils"), label.Key("subsystem").String("hook"), label.Key("type").String(hookType)}...)
 	defer span.End()

-	span.SetAttributes(label.Key("subsystem").String(hookType))
-
 	for _, hook := range hooks {
 		if err := runHook(ctx, hook, cid, bundlePath); err != nil {
 			hookLogger().WithFields(logrus.Fields{
--- a/src/runtime/pkg/katautils/tracing.go
+++ b/src/runtime/pkg/katautils/tracing.go
@@ -30,7 +30,7 @@ var _ export.SpanExporter = (*kataSpanExporter)(nil)
 // ExportSpans exports SpanData to Jaeger.
 func (e *kataSpanExporter) ExportSpans(ctx context.Context, spans []*export.SpanData) error {
 	for _, span := range spans {
-		kataUtilsLogger.Infof("Reporting span %+v", span)
+		kataUtilsLogger.Tracef("Reporting span %+v", span)
 	}
 	return nil
 }
@@ -110,12 +110,11 @@ func StopTracing(ctx context.Context) {
 }

 // Trace creates a new tracing span based on the specified name and parent
-// context.
-func Trace(parent context.Context, name string) (otelTrace.Span, context.Context) {
+// context and an opentelemetry label.KeyValue slice for span attributes.
+func Trace(parent context.Context, name string, tags ...label.KeyValue) (otelTrace.Span, context.Context) {

 	tracer := otel.Tracer("kata")
-	ctx, span := tracer.Start(parent, name)
-	span.SetAttributes(label.Key("source").String("runtime"))
+	ctx, span := tracer.Start(parent, name, otelTrace.WithAttributes(tags...))

 	// This is slightly confusing: when tracing is disabled, trace spans
 	// are still created - but the tracer used is a NOP. Therefore, only
--- a/Show More
+++ b/Show More