Merge pull request #2015 from fidencio/2.2.0-alpha0-branch-bump

# Kata Containers 2.2.0-alpha0
Merge pull request #1995 from GabyCT/topic/removetravisreference
2026-03-15 01:02:32 +00:00 · 2021-06-11 18:51:08 +02:00 · 2021-06-11 09:23:47 -05:00 · 2021-06-11 16:10:01 +02:00 · 2021-06-11 00:31:51 +02:00 · 2021-06-11 00:31:34 +02:00
960 changed files with 76333 additions and 39927 deletions
--- a/.github/workflows/kata-deploy-test.yaml
+++ b/.github/workflows/kata-deploy-test.yaml
@@ -1,7 +1,12 @@
-on: issue_comment
+on:
+  issue_comment:
+    types: [created, edited]
+
 name: test-kata-deploy
+
 jobs:
  check_comments:
+    if: ${{ github.event.issue.pull_request }}
    runs-on: ubuntu-latest
    steps:
      - name: Check for Command
@@ -9,7 +14,7 @@ jobs:
        uses: kata-containers/slash-command-action@v1
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
-          command: "test-kata-deploy"
+          command: "test_kata_deploy"
          reaction: "true"
          reaction-type: "eyes"
          allow-edits: "false"
@@ -17,6 +22,7 @@ jobs:
      - name: verify command arg is kata-deploy
        run: |
           echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
+
  create-and-test-container:
    needs: check_comments
    runs-on: ubuntu-latest
@@ -27,22 +33,26 @@ jobs:
            ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed  's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
            echo "reference for PR: " ${ref}
            echo "##[set-output name=pr-ref;]${ref}"
-      - uses: actions/checkout@v2-beta
+
+      - name: check out
+        uses: actions/checkout@v2
        with:
-          ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
+           ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
+
      - name: build-container-image
        id: build-container-image
        run: |
            PR_SHA=$(git log --format=format:%H -n1)
-            VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/VERSION)
+            VERSION="2.0.0"
            ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
-            wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
-            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
+            wget "${ARTIFACT_URL}" -O tools/packaging/kata-deploy/kata-static.tar.xz
+            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./tools/packaging/kata-deploy
            docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
            docker push katadocker/kata-deploy-ci:$PR_SHA
            echo "##[set-output name=pr-sha;]${PR_SHA}"
+
      - name: test-kata-deploy-ci-in-aks
-        uses: ./kata-deploy/action
+        uses: ./tools/packaging/kata-deploy/action
        with:
          packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
        env:
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -103,59 +103,6 @@ jobs:
          name: kata-artifacts
          path: kata-static-qemu.tar.gz

-  build-nemu:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_nemu"
-    steps:
-      - uses: actions/checkout@v1
-      - name: get-artifact-list
-        uses: actions/download-artifact@master
-        with:
-          name: artifact-list
-      - name: build-nemu
-        run: |
-         if grep -q $buildstr ./artifact-list/artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@master
-        with:
-          name: kata-artifacts
-          path: kata-static-nemu.tar.gz
-
-  # Job for building the QEMU binaries with virtiofs support
-  build-qemu-virtiofsd:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_qemu_virtiofsd"
-    steps:
-      - uses: actions/checkout@v1
-      - name: get-artifact-list
-        uses: actions/download-artifact@master
-        with:
-          name: artifact-list
-      - name: build-qemu-virtiofsd
-        run: |
-         if grep -q $buildstr ./artifact-list/artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@master
-        with:
-          name: kata-artifacts
-          path: kata-static-qemu-virtiofsd.tar.gz
-
  # Job for building the image
  build-image:
    runs-on: ubuntu-16.04
@@ -266,7 +213,7 @@ jobs:

  gather-artifacts:
    runs-on: ubuntu-16.04
-    needs: [build-experimental-kernel, build-kernel, build-qemu, build-qemu-virtiofsd, build-image, build-firecracker, build-kata-components, build-nemu, build-clh]
+    needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
    steps:
      - uses: actions/checkout@v1
      - name: get-artifacts
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -104,32 +104,6 @@ jobs:
          name: kata-artifacts
          path: kata-static-qemu.tar.gz

-  build-qemu-virtiofsd:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_qemu_virtiofsd"
-    steps:
-      - uses: actions/checkout@v2
-      - name: get-artifact-list
-        uses: actions/download-artifact@v2
-        with:
-          name: artifact-list
-      - name: build-qemu-virtiofsd
-        run: |
-         if grep -q $buildstr artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-local-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@v2
-        with:
-          name: kata-artifacts
-          path: kata-static-qemu-virtiofsd.tar.gz
-
  build-image:
    runs-on: ubuntu-16.04
    needs: get-artifact-list
@@ -237,7 +211,7 @@ jobs:

  gather-artifacts:
    runs-on: ubuntu-16.04
-    needs: [build-experimental-kernel, build-kernel, build-qemu, build-qemu-virtiofsd, build-image, build-firecracker, build-kata-components, build-clh]
+    needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
    steps:
      - uses: actions/checkout@v2
      - name: get-artifacts
--- a/.github/workflows/require-pr-porting-labels.yaml
+++ b/.github/workflows/require-pr-porting-labels.yaml
@@ -12,6 +12,9 @@ on:
      - reopened
      - labeled
      - unlabeled
+   pull_request:
+     branches:
+      - main

 jobs:
  check-pr-porting-labels:
--- a/.github/workflows/snap-release.yaml
+++ b/.github/workflows/snap-release.yaml
@@ -21,8 +21,8 @@ jobs:
          kata_url="https://github.com/kata-containers/kata-containers"
          latest_version=$(git ls-remote --tags ${kata_url}  | egrep -o "refs.*" | egrep -v "\-alpha|\-rc|{}" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r | head -1)
          current_version="$(echo ${GITHUB_REF} | cut -d/ -f3)"
-          # Check if the current tag is the latest tag
-          if echo -e "$latest_version\n$current_version" | sort -C -V; then
+          # Check semantic versioning format (x.y.z) and if the current tag is the latest tag
+          if echo "${current_version}" | grep -q "^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+$" && echo -e "$latest_version\n$current_version" | sort -C -V; then
            # Current version is the latest version, build it
            snapcraft -d snap --destructive-mode
          fi
@@ -33,5 +33,5 @@ jobs:
          snap_file="kata-containers_${snap_version}_amd64.snap"
          # Upload the snap if it exists
          if [ -f ${snap_file} ]; then
-            snapcraft upload --release=candidate ${snap_file}
+            snapcraft upload --release=stable ${snap_file}
          fi
--- a/.github/workflows/snap.yaml
+++ b/.github/workflows/snap.yaml
@@ -1,15 +1,5 @@
 name: snap CI
-on:
-  pull_request:
-    paths:
-      - "**/Makefile"
-      - "**/*.go"
-      - "**/*.mk"
-      - "**/*.rs"
-      - "**/*.sh"
-      - "**/*.toml"
-      - "**/*.yaml"
-      - "**/*.yml"
+on: ["pull_request"]
 jobs:
  test:
    runs-on: ubuntu-20.04
--- a/.github/workflows/static-checks.yaml
+++ b/.github/workflows/static-checks.yaml
@@ -5,16 +5,14 @@ jobs:
    strategy:
      matrix:
        go-version: [1.13.x, 1.14.x, 1.15.x]
-        os: [ubuntu-18.04]
+        os: [ubuntu-20.04]
    runs-on: ${{ matrix.os }}
    env:
-      GO111MODULE: off
      TRAVIS: "true"
      TRAVIS_BRANCH: ${{ github.base_ref }}
      TRAVIS_PULL_REQUEST_BRANCH: ${{ github.head_ref }}
      TRAVIS_PULL_REQUEST_SHA : ${{ github.event.pull_request.head.sha }}
      RUST_BACKTRACE: "1"
-      RUST_AGENT: "yes"
      target_branch: ${TRAVIS_BRANCH}
    steps:
    - name: Install Go
@@ -25,9 +23,6 @@ jobs:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Setup GOPATH
      run: |
-        gopath_org=$(go env GOPATH)/src/github.com/kata-containers/
-        mkdir -p ${gopath_org}
-        ln -s ${PWD} ${gopath_org}
        echo "TRAVIS_BRANCH: ${TRAVIS_BRANCH}"
        echo "TRAVIS_PULL_REQUEST_BRANCH: ${TRAVIS_PULL_REQUEST_BRANCH}"
        echo "TRAVIS_PULL_REQUEST_SHA: ${TRAVIS_PULL_REQUEST_SHA}"
@@ -43,26 +38,29 @@ jobs:
        path: ./src/github.com/${{ github.repository }}
    - name: Setup travis references
      run: |
-        echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}" 
+        echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}"
        target_branch=${TRAVIS_BRANCH}
    - name: Setup
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/setup.sh
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
      env:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Building rust
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/install_rust.sh
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh
        PATH=$PATH:"$HOME/.cargo/bin"
-    - name: Make clippy
+        rustup target add x86_64-unknown-linux-musl
+        rustup component add rustfmt clippy
+    # Must build before static checks as we depend on some generated code in runtime and agent
+    - name: Build
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && rustup target add x86_64-unknown-linux-musl && rustup component add rustfmt && rustup component add clippy && make clippy
-    - name: Static checks
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make
+    - name: Static Checks
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/static-checks.sh
-    - name: Build agent
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/static-checks.sh
+    - name: Run Compiler Checks
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && make
-    - name: Run agent unit tests
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make check
+    - name: Run Unit Tests
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && make check
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make test
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 **/*.bk
+**/*~
 **/*.orig
 **/*.rej
 **/target
--- a/README.md
+++ b/README.md
@@ -126,9 +126,9 @@ The following repositories are used by both the current and first generation Kat

 | Component | Description | Current | First generation | Notes |
 |-|-|-|-|-|
-| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/2.0-dev) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
+| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/main) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
 | kernel | The Linux kernel used by the hypervisor to boot the guest image. | [Kata 2.x][kernel] | [Kata 1.x][kernel] | Patches are stored in the packaging component. |
-| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/2.0-dev) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
+| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/main) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
 | www.katacontainers.io | Contains the source for the [main web site](https://www.katacontainers.io). | [Kata 2.x][github-katacontainers.io] | [Kata 1.x][github-katacontainers.io] | | |

 ### Packaging and releases
--- a/2
+++ b/2
@@ -1 +1 @@
-2.1-alpha0
+2.2.0-alpha0
--- a/ci/install_musl.sh
+++ b/ci/install_musl.sh
@@ -12,10 +12,11 @@ install_aarch64_musl() {
 		local musl_tar="${arch}-linux-musl-native.tgz"
 		local musl_dir="${arch}-linux-musl-native"
 		pushd /tmp
-		curl -sLO https://musl.cc/${musl_tar}
-		tar -zxf ${musl_tar}
-		mkdir -p /usr/local/musl/
-		cp -r ${musl_dir}/* /usr/local/musl/
+		if curl -sLO --fail https://musl.cc/${musl_tar}; then
+			tar -zxf ${musl_tar}
+			mkdir -p /usr/local/musl/
+			cp -r ${musl_dir}/* /usr/local/musl/
+		fi
 		popd
 	fi
 }
--- a/ci/install_yq.sh
+++ b/ci/install_yq.sh
@@ -18,7 +18,9 @@ function install_yq() {
 	GOPATH=${GOPATH:-${HOME}/go}
 	local yq_path="${GOPATH}/bin/yq"
 	local yq_pkg="github.com/mikefarah/yq"
-	[ -x  "${GOPATH}/bin/yq" ] && return
+	local yq_version=3.4.1
+
+	[ -x  "${GOPATH}/bin/yq" ] && [ "`${GOPATH}/bin/yq --version`"X == "yq version ${yq_version}"X ] && return

 	read -r -a sysInfo <<< "$(uname -sm)"

@@ -56,8 +58,6 @@ function install_yq() {
 		die "Please install curl"
 	fi

-	local yq_version=3.1.0
-
 	## NOTE: ${var,,} => gives lowercase value of var
 	local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
 	curl -o "${yq_path}" -LSsf "${yq_url}"
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -5,18 +5,27 @@

 export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
 export tests_repo_dir="$GOPATH/src/$tests_repo"
-export branch="${branch:-2.0-dev}"
+export branch="${branch:-main}"

+# Clones the tests repository and checkout to the branch pointed out by
+# the global $branch variable.
+# If the clone exists and `CI` is exported then it does nothing. Otherwise
+# it will clone the repository or `git pull` the latest code.
+#
 clone_tests_repo()
 {
-	if [ -d "$tests_repo_dir" -a -n "$CI" ]
-	then
-		return
+	if [ -d "$tests_repo_dir" ]; then
+		[ -n "$CI" ] && return
+		pushd "${tests_repo_dir}"
+		git checkout "${branch}"
+		git pull
+		popd
+	else
+		git clone -q "https://${tests_repo}" "$tests_repo_dir"
+		pushd "${tests_repo_dir}"
+		git checkout "${branch}"
+		popd
 	fi
-
-	go get -d -u "$tests_repo" || true
-
-	pushd "${tests_repo_dir}" && git checkout "${branch}" && popd
 }

 run_static_checks()
--- a/ci/openshift-ci/images/Dockerfile.buildroot
+++ b/ci/openshift-ci/images/Dockerfile.buildroot
@@ -0,0 +1,9 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# This is the build root image for Kata Containers on OpenShift CI.
+#
+FROM centos:8
+
+RUN yum -y update && yum -y install git sudo wget
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -1,55 +1,55 @@
-* [Warning](#warning)
-* [Assumptions](#assumptions)
-* [Initial setup](#initial-setup)
-* [Requirements to build individual components](#requirements-to-build-individual-components)
-* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
-* [Check hardware requirements](#check-hardware-requirements)
-    * [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
-    * [Enable full debug](#enable-full-debug)
-        * [debug logs and shimv2](#debug-logs-and-shimv2)
-            * [Enabling full `containerd` debug](#enabling-full-containerd-debug)
-            * [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
-            * [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
-        * [journald rate limiting](#journald-rate-limiting)
-            * [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
-            * [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
-* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
-    * [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
-    * [Get the osbuilder](#get-the-osbuilder)
-    * [Create a rootfs image](#create-a-rootfs-image)
-        * [Create a local rootfs](#create-a-local-rootfs)
-        * [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
-        * [Build a rootfs image](#build-a-rootfs-image)
-        * [Install the rootfs image](#install-the-rootfs-image)
-    * [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
-        * [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
-        * [Build an initrd image](#build-an-initrd-image)
-        * [Install the initrd image](#install-the-initrd-image)
-* [Install guest kernel images](#install-guest-kernel-images)
-* [Install a hypervisor](#install-a-hypervisor)
-    * [Build a custom QEMU](#build-a-custom-qemu)
-        * [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
-* [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
-* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
-* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
-* [Appendices](#appendices)
-    * [Checking Docker default runtime](#checking-docker-default-runtime)
-    * [Set up a debug console](#set-up-a-debug-console)
-      * [Simple debug console setup](#simple-debug-console-setup)
-          * [Enable agent debug console](#enable-agent-debug-console)
-          * [Start `kata-monitor`](#start-kata-monitor)
-          * [Connect to debug console](#connect-to-debug-console)
-      * [Traditional debug console setup](#traditional-debug-console-setup)
-          * [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
-          * [Build the debug image](#build-the-debug-image)
-          * [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
-          * [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
-              * [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
-              * [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
-          * [Create a container](#create-a-container)
-          * [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
-    * [Obtain details of the image](#obtain-details-of-the-image)
-    * [Capturing kernel boot logs](#capturing-kernel-boot-logs)
+- [Warning](#warning)
+- [Assumptions](#assumptions)
+- [Initial setup](#initial-setup)
+- [Requirements to build individual components](#requirements-to-build-individual-components)
+- [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
+- [Check hardware requirements](#check-hardware-requirements)
+  - [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
+  - [Enable full debug](#enable-full-debug)
+    - [debug logs and shimv2](#debug-logs-and-shimv2)
+      - [Enabling full `containerd` debug](#enabling-full-containerd-debug)
+      - [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
+      - [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
+    - [journald rate limiting](#journald-rate-limiting)
+      - [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
+      - [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
+- [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
+  - [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
+  - [Get the osbuilder](#get-the-osbuilder)
+  - [Create a rootfs image](#create-a-rootfs-image)
+    - [Create a local rootfs](#create-a-local-rootfs)
+    - [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
+    - [Build a rootfs image](#build-a-rootfs-image)
+    - [Install the rootfs image](#install-the-rootfs-image)
+  - [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
+    - [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
+    - [Build an initrd image](#build-an-initrd-image)
+    - [Install the initrd image](#install-the-initrd-image)
+- [Install guest kernel images](#install-guest-kernel-images)
+- [Install a hypervisor](#install-a-hypervisor)
+  - [Build a custom QEMU](#build-a-custom-qemu)
+    - [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
+- [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
+- [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
+- [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
+- [Appendices](#appendices)
+  - [Checking Docker default runtime](#checking-docker-default-runtime)
+  - [Set up a debug console](#set-up-a-debug-console)
+    - [Simple debug console setup](#simple-debug-console-setup)
+      - [Enable agent debug console](#enable-agent-debug-console)
+      - [Start `kata-monitor` - ONLY NEEDED FOR 2.0.x](#start-kata-monitor---only-needed-for-20x)
+      - [Connect to debug console](#connect-to-debug-console)
+    - [Traditional debug console setup](#traditional-debug-console-setup)
+      - [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
+      - [Build the debug image](#build-the-debug-image)
+      - [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
+      - [Create a container](#create-a-container)
+      - [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
+        - [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
+        - [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
+        - [Connecting to the debug console](#connecting-to-the-debug-console)
+  - [Obtain details of the image](#obtain-details-of-the-image)
+  - [Capturing kernel boot logs](#capturing-kernel-boot-logs)

 # Warning

@@ -305,7 +305,7 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `f
 > - You should only do this step if you are testing with the latest version of the agent.

 ```
-$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
+$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/usr/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
 $ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
 $ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
 ```
@@ -385,31 +385,56 @@ You can build and install the guest kernel image as shown [here](../tools/packag

 # Install a hypervisor

-When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
+When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
+`QEMU` VMM is installed automatically. Cloud-Hypervisor and Firecracker VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
+You may choose to manually build your VMM/hypervisor.

 ## Build a custom QEMU

-Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
+Kata Containers makes use of upstream QEMU branch. The exact version
+and repository utilized can be found by looking at the [versions file](../versions.yaml).

+Find the correct version of QEMU from the versions file:
 ```
-$ go get -d github.com/kata-containers/qemu
-$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/kata-containers/versions.yaml | cut -d '"' -f2)
-$ cd ${GOPATH}/src/github.com/kata-containers/qemu
-$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
-$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
+$ source ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/lib.sh
+$ qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version")
+$ echo ${qemu_version}
+```
+Get source from the matching branch of QEMU:
+```
+$ go get -d github.com/qemu/qemu
+$ cd ${GOPATH}/src/github.com/qemu/qemu
+$ git checkout ${qemu_version}
+$ your_qemu_directory=${GOPATH}/src/github.com/qemu/qemu
 ```

-To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
-
+There are scripts to manage the build and packaging of QEMU. For the examples below, set your
+environment as:
+```
+$ go get -d github.com/kata-containers/kata-containers
+$ packaging_dir="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging"
+```
+
+Kata often utilizes patches for not-yet-upstream and/or backported fixes for components,
+including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches),
+and it's *recommended* that you apply them. For example, suppose that you are going to build QEMU
+version 5.2.0, do:
 ```
-$ go get -d github.com/kata-containers/kata-containers/tools/packaging
 $ cd $your_qemu_directory
-$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
+$ $packaging_dir/scripts/apply_patches.sh $packaging_dir/qemu/patches/5.2.x/
+```
+
+To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
+```
+$ cd $your_qemu_directory
+$ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
 $ eval ./configure "$(cat kata.cfg)"
 $ make -j $(nproc)
 $ sudo -E make install
 ```

+See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata.
+
 ### Build a custom QEMU for aarch64/arm64 - REQUIRED
 > **Note:**
 >
@@ -477,9 +502,9 @@ debug_console_enabled = true

 This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.

-#### Start `kata-monitor`
+#### Start `kata-monitor` - ONLY NEEDED FOR 2.0.x

-The `kata-runtime exec` command needs `kata-monitor` to get the sandbox's `vsock` address to connect to, first start `kata-monitor`.
+For Kata Containers `2.0.x` releases, the `kata-runtime exec` command depends on the`kata-monitor` running, in order to get the sandbox's `vsock` address to connect to. Thus, first start the `kata-monitor` process.

 ```
 $ sudo kata-monitor
@@ -487,7 +512,6 @@ $ sudo kata-monitor

 `kata-monitor` will serve at `localhost:8090` by default.

-
 #### Connect to debug console

 Command `kata-runtime exec` is used to connect to the debug console.
@@ -502,6 +526,10 @@ bash-4.2# exit
 exit
 ```

+`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/master/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured
+ with Kubernetes. For CRI-O, the namespace should set to `default` explicitly. This should not be confused with [Kubernetes namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
+For other CRI-runtimes and configurations, you may need to set the namespace utilizing the `runtime-namespace` option.
+
 If you want to access guest OS through a traditional way, see [Traditional debug console setup)](#traditional-debug-console-setup).

 ### Traditional debug console setup
@@ -621,8 +649,11 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
 > **Note** Ports 1024 and 1025 are reserved for communication with the agent
 > and gathering of agent logs respectively. 

-Next, connect to the debug console. The VSOCKS paths vary slightly between
-cloud-hypervisor and firecracker.
+##### Connecting to the debug console
+
+Next, connect to the debug console. The VSOCKS paths vary slightly between each
+VMM solution.
+
 In case of cloud-hypervisor, connect to the `vsock` as shown:
 ```
 $ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
@@ -639,6 +670,12 @@ CONNECT 1026

 **Note**: You need to press the `RETURN` key to see the shell prompt.

+
+For QEMU, connect to the `vsock` as shown:
+```
+$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"
+```
+
 To disconnect from the virtual machine, type `CONTROL+q` (hold down the
 `CONTROL` key and press `q`).

--- a/docs/Documentation-Requirements.md
+++ b/docs/Documentation-Requirements.md
@@ -25,7 +25,7 @@ All documents must:
 - Have a `.md` file extension.
 - Include a TOC (table of contents) at the top of the document with links to
  all heading sections. We recommend using the
-  [`check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
+  [`kata-check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
  tool to generate the TOC.
 - Be linked to from another document in the same repository.

--- a/docs/Licensing-strategy.md
+++ b/docs/Licensing-strategy.md
@@ -22,4 +22,4 @@ licensing and allows automated tooling to check the license of individual
 files.

 This SPDX licence identifier requirement is enforced by the
-[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/master/.ci/static-checks.sh).
+[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/main/.ci/static-checks.sh).
--- a/docs/Limitations.md
+++ b/docs/Limitations.md
@@ -19,6 +19,8 @@
        * [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
        * [docker --net=host](#docker---nethost)
        * [docker run --link](#docker-run---link)
+    * [Storage limitations](#storage-limitations)
+        * [Kubernetes `volumeMounts.subPaths`](#kubernetes-volumemountssubpaths)
    * [Host resource sharing](#host-resource-sharing)
        * [docker run --privileged](#docker-run---privileged)
 * [Miscellaneous](#miscellaneous)
@@ -26,7 +28,7 @@
 * [Appendices](#appendices)
    * [The constraints challenge](#the-constraints-challenge)

---
+***

 # Overview

@@ -92,7 +94,9 @@ This section lists items that might be possible to fix.
 ### checkpoint and restore

 The runtime does not provide `checkpoint` and `restore` commands. There
-are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
+are discussions about using VM save and restore to give us a
+`[criu](https://github.com/checkpoint-restore/criu)`-like functionality,
+which might provide a solution.

 Note that the OCI standard does not specify `checkpoint` and `restore`
 commands.
@@ -216,6 +220,17 @@ Equivalent functionality can be achieved with the newer docker networking comman
 See more documentation at
 [docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).

+## Storage limitations
+
+### Kubernetes `volumeMounts.subPaths`
+
+Kubernetes `volumeMount.subPath` is not supported by Kata Containers at the
+moment.
+
+See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
+[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
+
+
 ## Host resource sharing

 ### docker run --privileged
@@ -224,7 +239,7 @@ Privileged support in Kata is essentially different from `runc` containers.
 Kata does support `docker run --privileged` command, but in this case full access
 to the guest VM is provided in addition to some host access.

-The container runs with elevated capabilities within the guest and is granted 
+The container runs with elevated capabilities within the guest and is granted
 access to guest devices instead of the host devices.
 This is also true with using `securityContext privileged=true` with Kubernetes.

--- a/docs/README.md
+++ b/docs/README.md
@@ -49,6 +49,7 @@ Documents that help to understand and contribute to Kata Containers.
 ### Design and Implementations

 * [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
+* [Kata Containers E2E Flow](design/end-to-end-flow.md): The entire end-to-end flow of Kata Containers
 * [Kata Containers design](./design/README.md): More Kata Containers design documents

 ### How to Contribute
--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -18,8 +18,7 @@
 ## Requirements

 - [hub](https://github.com/github/hub)
-
- OBS account with permissions on [`/home:katacontainers`](https://build.opensuse.org/project/subprojects/home:katacontainers)
+  * Using an [application token](https://github.com/settings/tokens) is required for hub.

 - GitHub permissions to push tags and create releases in Kata repositories.

@@ -30,16 +29,12 @@

 ## Release Process

+
 ### Bump all Kata repositories

-  - We have set up a Jenkins job to bump the version in the `VERSION` file in all Kata repositories. Go to the [Jenkins bump-job page](http://jenkins.katacontainers.io/job/release/build) to trigger a new job.
-  - Start a new job with variables for the job passed as:
-     - `BRANCH=<the-branch-you-want-to-bump>`
-     - `NEW_VERSION=<the-new-kata-version>`
-
-     For example, in the case where you want to make a patch release `1.10.2`, the variable `NEW_VERSION` should be `1.10.2` and `BRANCH` should point to  `stable-1.10`. In case of an alpha or release candidate release, `BRANCH` should point to `master` branch.
-
-  Alternatively, you can also bump the repositories using a script in the Kata packaging repo
+  Bump the repositories using a script in the Kata packaging repo, where:
+  - `BRANCH=<the-branch-you-want-to-bump>`
+  - `NEW_VERSION=<the-new-kata-version>`
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  $ export NEW_VERSION=<the-new-kata-version>
@@ -47,6 +42,23 @@
  $ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
  ```

+### Point tests repository to stable branch
+
+  If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then
+  you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch.
+  The objective is that changes in the CI on the main branch will not impact the stable branch.
+
+  In the test directory, change references the main branch in:
+  * `README.md`
+  * `versions.yaml`
+  * `cmd/github-labels/labels.yaml.in`
+  * `cmd/pmemctl/pmemctl.sh`
+  * `.ci/lib.sh`
+  * `.ci/static-checks.sh`
+
+  See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes.
+
+
 ### Merge all bump version Pull requests

  - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
@@ -56,7 +68,7 @@
 ### Tag all Kata repositories

  Once all the pull requests to bump versions in all Kata repositories are merged,
-  tag all the repositories as shown below.  
+  tag all the repositories as shown below.
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  $ git checkout  <kata-branch-to-release>
@@ -66,7 +78,7 @@

 ### Check Git-hub Actions

-  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
+  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.

  Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).

@@ -79,9 +91,9 @@
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  # Note: OLD_VERSION is where the script should start to get changes.
-  $ ./runtime-release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
+  $ ./release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
  # Edit the `notes.md` file to review and make any changes to the release notes.
-  # Add the release notes in GitHub runtime.
+  # Add the release notes in the project's GitHub.
  $ hub release edit -F notes.md "${NEW_VERSION}"
  ```

--- a/docs/Stable-Branch-Strategy.md
+++ b/docs/Stable-Branch-Strategy.md
@@ -32,16 +32,16 @@ provides additional information regarding release `99.123.77` in the previous ex
  changing the existing behavior*.

 - When `MAJOR` increases, the new release adds **new features, bug fixes, or
-  both** and which *changes the behavior from the previous release* (incompatible with previous releases).
+  both** and which **changes the behavior from the previous release** (incompatible with previous releases).

  A major release will also likely require a change of the container manager version used, 
-for example Docker\*. Please refer to the release notes for further details.
+for example Containerd or CRI-O. Please refer to the release notes for further details.

 ## Release Strategy

 Any new features added since the last release will be available in the next minor
 release. These will include bug fixes as well. To facilitate a stable user environment, 
-Kata provides stable branch-based releases and a master branch release.
+Kata provides stable branch-based releases and a main branch release.

 ## Stable branch patch criteria

@@ -49,9 +49,10 @@ No new features should be introduced to stable branches.  This is intended to li
 providing only bug and security fixes.

 ## Branch Management
-Kata Containers will maintain two stable release branches in addition to the master branch.
-Once a new MAJOR or MINOR release is created from master, a new stable branch is created for
-the prior MAJOR or MINOR release and the older stable branch is no longer maintained. End of
+Kata Containers will maintain **one** stable release branch, in addition to the main branch, for
+each active major release.
+Once a new MAJOR or MINOR release is created from main, a new stable branch is created for
+the prior MAJOR or MINOR release and the previous stable branch is no longer maintained. End of
 maintenance for a branch is announced on the Kata Containers mailing list.  Users can determine
 the version currently installed by running `kata-runtime kata-env`. It is recommended to use the
 latest stable branch available.
@@ -61,59 +62,59 @@ A couple of examples follow to help clarify this process.
 ### New bug fix introduced

 A bug fix is submitted against the runtime which does not introduce new inter-component dependencies.
-This fix is applied to both the master and stable branches, and there is no need to create a new
+This fix is applied to both the main and stable branches, and there is no need to create a new
 stable branch.

 | Branch | Original version | New version |
 |--|--|--|
-| `master` | `1.3.0-rc0` | `1.3.0-rc1` |
-| `stable-1.2` | `1.2.0` | `1.2.1` |
-| `stable-1.1` | `1.1.2` | `1.1.3` |
+| `main` | `2.3.0-rc0` | `2.3.0-rc1` |
+| `stable-2.2` | `2.2.0` | `2.2.1` |
+| `stable-2.1` | (unmaintained) | (unmaintained) |


 ### New release made feature or change adding new inter-component dependency

 A new feature is introduced, which adds a new inter-component dependency. In this case a new stable
-branch is created (stable-1.3) starting from master and the older stable branch (stable-1.1)
+branch is created (stable-2.3) starting from main and the previous stable branch (stable-2.2)
 is dropped from maintenance.


 | Branch | Original version | New version |
 |--|--|--|
-| `master` | `1.3.0-rc1` | `1.3.0` |
-| `stable-1.3` | N/A| `1.3.0` |
-| `stable-1.2` | `1.2.1` | `1.2.2` |
-| `stable-1.1` | `1.1.3` | (unmaintained) |
+| `main` | `2.3.0-rc1` | `2.3.0` |
+| `stable-2.3` | N/A| `2.3.0` |
+| `stable-2.2` | `2.2.1` | (unmaintained) |
+| `stable-2.1` | (unmaintained) | (unmaintained) |

-Note, the stable-1.1 branch will still exist with tag 1.1.3, but under current plans it is
-not maintained further. The next tag applied to master will be 1.4.0-alpha0. We would then
+Note, the stable-2.2 branch will still exist with tag 2.2.1, but under current plans it is
+not maintained further. The next tag applied to main will be 2.4.0-alpha0. We would then
 create a couple of alpha releases gathering features targeted for that particular release (in
-this case 1.4.0), followed by a release candidate. The release candidate marks a feature freeze.
+this case 2.4.0), followed by a release candidate. The release candidate marks a feature freeze.
 A new stable branch is created for the release candidate. Only bug fixes and any security issues
-are added to the branch going forward until release 1.4.0 is made.
+are added to the branch going forward until release 2.4.0 is made.
   
 ## Backporting Process 

-Development that occurs against the master branch and applicable code commits should also be submitted
+Development that occurs against the main branch and applicable code commits should also be submitted
 against the stable branches. Some guidelines for this process follow::
  1. Only bug and security fixes which do not introduce inter-component dependencies are
 candidates for stable branches. These PRs should be marked with "bug" in GitHub.
-  2. Once a PR is created against master which meets requirement of (1), a comparable one
+  2. Once a PR is created against main which meets requirement of (1), a comparable one
 should also be submitted against the stable branches. It is the responsibility of the submitter
 to apply their pull request against stable, and it is the responsibility of the
 reviewers to help identify stable-candidate pull requests.
 
 ## Continuous Integration Testing

-The test repository is forked to create stable branches from master. Full CI
-runs on each stable and master PR using its respective tests repository branch.
+The test repository is forked to create stable branches from main. Full CI
+runs on each stable and main PR using its respective tests repository branch.

 ### An alternative method for CI testing:

-Ideally, the continuous integration infrastructure will run the same test suite on both master
+Ideally, the continuous integration infrastructure will run the same test suite on both main
 and the stable branches.  When tests are modified or new feature tests are introduced, explicit
 logic should exist within the testing CI to make sure only applicable tests are executed against
-stable and master. While this is not in place currently, it should be considered in the long term.
+stable and main. While this is not in place currently, it should be considered in the long term.

 ## Release Management

@@ -121,7 +122,7 @@ stable and master. While this is not in place currently, it should be considered

 Releases are made every three weeks, which include a GitHub release as
 well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
-for the next `MAJOR` or `MINOR` is created from master. If there are no changes across all the repositories, no
+for the next `MAJOR` or `MINOR` is created from main. If there are no changes across all the repositories, no
 release is created and an announcement is made on the developer mailing list to highlight this.
 If a release is being made, each repository is tagged for this release, regardless
 of whether changes are introduced. The release schedule can be seen on the
@@ -142,10 +143,10 @@ maturity, we have increased the cadence from six weeks to twelve weeks. The rele
 ### Compatibility
 Kata guarantees compatibility between components that are within one minor release of each other. 
 
-This is critical for dependencies which cross between host (runtime, shim, proxy) and
+This is critical for dependencies which cross between host (shimv2 runtime) and
 the guest (hypervisor, rootfs and agent).  For example, consider a cluster with a long-running
-deployment, workload-never-dies, all on Kata version 1.1.3 components. If the operator updates
-the Kata components to the next new minor release (i.e. 1.2.0), we need to guarantee that the 1.2.0
-runtime still communicates with 1.1.3 agent within workload-never-dies.
+deployment, workload-never-dies, all on Kata version 2.1.3 components. If the operator updates
+the Kata components to the next new minor release (i.e. 2.2.0), we need to guarantee that the 2.2.0
+shimv2 runtime still communicates with 2.1.3 agent within workload-never-dies.

 Handling live-update is out of the scope of this document. See this [`kata-runtime` issue](https://github.com/kata-containers/runtime/issues/492) for details.
--- a/docs/design/arch-images/katacontainers-e2e-with-bg.jpg
+++ b/docs/design/arch-images/katacontainers-e2e-with-bg.jpg
--- a/docs/design/arch-images/katacontainers-e2e.svg
+++ b/docs/design/arch-images/katacontainers-e2e.svg
--- a/docs/design/architecture.md
+++ b/docs/design/architecture.md
@@ -137,7 +137,7 @@ The runtime uses a TOML format configuration file called `configuration.toml`. B

 The actual configuration file paths can be determined by running:
 ```
-$ kata-runtime --kata-show-default-config-paths
+$ kata-runtime --show-default-config-paths
 ```
 Most users will not need to modify the configuration file.

--- a/docs/design/end-to-end-flow.md
+++ b/docs/design/end-to-end-flow.md
@@ -0,0 +1,4 @@
+# Kata Containers E2E Flow
+
+
+![Kata containers e2e flow](arch-images/katacontainers-e2e-with-bg.jpg)
--- a/docs/how-to/README.md
+++ b/docs/how-to/README.md
@@ -37,3 +37,4 @@
 - [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
 - [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
 - [How to monitor Kata Containers in K8s](how-to-set-prometheus-in-k8s.md)
+- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)
--- a/docs/how-to/how-to-hotplug-memory-arm64.md
+++ b/docs/how-to/how-to-hotplug-memory-arm64.md
@@ -0,0 +1,32 @@
+# How to use memory hotplug feature in Kata Containers on arm64
+
+- [Introduction](#introduction)
+- [Install UEFI ROM](#install-uefi-rom)
+- [Run for test](#run-for-test)
+
+## Introduction
+
+Memory hotplug is a key feature for containers to allocate memory dynamically in deployment.
+As Kata Container bases on VM, this feature needs support both from VMM and guest kernel. Luckily, it has been fully supported for the current default version of QEMU and guest kernel used by Kata on arm64. For other VMMs, e.g, Cloud Hypervisor, the enablement work is on the road. Apart from VMM and guest kernel, memory hotplug also depends on ACPI which depends on firmware either. On x86, you can boot a VM using QEMU with ACPI enabled directly, because it boots up with firmware implicitly. For arm64, however, you need specify firmware explicitly. That is to say, if you are ready to run a normal Kata Container on arm64, what you need extra to do is to install the UEFI ROM before use the memory hotplug feature.
+
+## Install UEFI ROM
+
+We have offered a helper script for you to install the UEFI ROM. If you have installed Kata normally on your host, you just need to run the script as fellows:
+
+```bash
+$ pushd $GOPATH/src/github.com/kata-containers/tests
+$ sudo .ci/aarch64/install_rom_aarch64.sh
+$ popd
+```
+
+## Run for test
+
+Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test.
+
+```bash
+$ sudo ctr image pull docker.io/library/ubuntu:latest
+$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
+$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --memory-limit 536870912 --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
+```
+
+Compare the results between the two tests. If the latter is 0.5G larger than the former, you have done what you want, and congratulation!
--- a/docs/how-to/how-to-import-kata-logs-with-fluentd.md
+++ b/docs/how-to/how-to-import-kata-logs-with-fluentd.md
@@ -185,7 +185,7 @@ in Kibana:
 ![Kata tags in EFK](./images/efk_syslog_entry_detail.png).

 We can however further sub-parse the Kata entries using the
-[Fluentd plugins](https://docs.fluentbit.io/manual/parser/logfmt) that will parse
+[Fluentd plugins](https://docs.fluentbit.io/manual/pipeline/parsers/logfmt) that will parse
 `logfmt` formatted data. We can utilise these to parse the sub-fields using a Fluentd filter
 section. At the same time, we will prefix the new fields with `kata_` to make it clear where
 they have come from:
@@ -222,7 +222,7 @@ test to check the parsing works. The resulting output from Fluentd is:
  "_COMM":"kata-runtime",
  "_EXE":"/opt/kata/bin/kata-runtime",
  "SYSLOG_TIMESTAMP":"Feb 21 10:31:27 ",
-  "_CMDLINE":"/opt/kata/bin/kata-runtime --kata-config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
+  "_CMDLINE":"/opt/kata/bin/kata-runtime --config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
  "SYSLOG_PID":"14314",
  "_PID":"14314",
  "MESSAGE":"time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox",
@@ -281,7 +281,7 @@ own file (rather than into the system journal).

 ```bash
 #!/bin/bash
-/opt/kata/bin/kata-runtime --kata-config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
+/opt/kata/bin/kata-runtime --config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
 ```

 And then we'll add the Fluentd config section to parse that file. Note, we inform the parser that Kata is
--- a/docs/how-to/how-to-set-prometheus-in-k8s.md
+++ b/docs/how-to/how-to-set-prometheus-in-k8s.md
@@ -34,7 +34,7 @@ Also you should ensure that `kubectl` working correctly.
 Start Prometheus by utilizing our sample manifest:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/prometheus.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/prometheus.yml
 ```

 This will create a new namespace, `prometheus`, and create the following resources:
@@ -60,7 +60,7 @@ go_gc_duration_seconds{quantile="0.75"} 0.000229911
 `kata-monitor` can be started on the cluster as follows:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/kata-monitor-daemonset.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/kata-monitor-daemonset.yml
 ```

 This will create a new namespace `kata-system` and a `daemonset` in it.
@@ -73,7 +73,7 @@ Once the `daemonset` is running, Prometheus should discover `kata-monitor` as a
 Run this command to run Grafana in Kubernetes:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/grafana.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/grafana.yml
 ```

 This will create deployment and service for Grafana under namespace `prometheus`.
@@ -99,7 +99,7 @@ You can import this dashboard using Grafana UI, or using `curl` command in conso
 $ curl -XPOST -i localhost:3000/api/dashboards/import \
    -u admin:admin \
    -H "Content-Type: application/json" \
-	-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/dashboard.json )}"
+	-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/dashboard.json )}"
 ```

 ## References
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -26,6 +26,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
 | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
 | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
+| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process |

 ## Agent Options
 | Key | Value Type | Comments |
@@ -60,7 +61,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
 | `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) |
 | `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) |
-| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
+| `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
 | `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory |
 | `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
 | `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
@@ -95,6 +96,8 @@ There are several kinds of Kata configurations and they are listed below.

 In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.

+# containerd Configuration
+
 For containerd, annotations specified in the pod spec are passed down to Kata
 starting with version `1.3.0` of containerd. Additionally, extra configuration is
 needed for containerd, by providing a `pod_annotations` field in the containerd config
@@ -107,11 +110,9 @@ for passing annotations to Kata from containerd:
 $ cat /etc/containerd/config
 ....

-[plugins.cri.containerd.runtimes.kata]
-           runtime_type = "io.containerd.runc.v1"
+         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
+           runtime_type = "io.containerd.kata.v2"
           pod_annotations = ["io.katacontainers.*"]
-           [plugins.cri.containerd.runtimes.kata.options]
-             BinaryName = "/usr/bin/kata-runtime"
 ....

 ```
@@ -197,6 +198,7 @@ the configuration entry:
 | Key | Config file entry | Comments |
 |-------| ----- | ----- |
 | `ctlpath`  | `valid_ctlpaths` | Valid paths for `acrnctl` binary |
+| `entropy_source` | `valid_entropy_sources` | Valid entropy sources, e.g. `/dev/random` |
 | `file_mem_backend`  | `valid_file_mem_backends` | Valid locations for the file-based memory backend root directory |
 | `jailer_path`  | `valid_jailer_paths`| Valid paths for the jailer constraining the container VM (Firecracker) |
 | `path`  | `valid_hypervisor_paths` | Valid hypervisors to run the container VM |
--- a/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
+++ b/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
@@ -7,9 +7,10 @@
    * [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
    * [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
 * [Start Kubernetes](#start-kubernetes)
-* [Install a Pod Network](#install-a-pod-network)
+* [Configure Pod Network](#configure-pod-network)
 * [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
-* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
+* [Create runtime class for Kata Containers](#create-runtime-class-for-kata-containers)
+* [Run pod in Kata Containers](#run-pod-in-kata-containers)
 * [Delete created pod](#delete-created-pod)

 This document describes how to set up a single-machine Kubernetes (k8s) cluster.
@@ -18,9 +19,6 @@ The Kubernetes cluster will use the
 [CRI containerd plugin](https://github.com/containerd/cri) and
 [Kata Containers](https://katacontainers.io) to launch untrusted workloads.

-For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
-to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
-
 ## Requirements

 - Kubernetes, Kubelet, `kubeadm`
@@ -125,43 +123,33 @@ $ sudo systemctl daemon-reload
  $ sudo -E kubectl get pods
  ```

-## Install a Pod Network
+## Configure Pod Network

 A pod network plugin is needed to allow pods to communicate with each other.
+You can find more about CNI plugins from the [Creating a cluster with `kubeadm`](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions) guide.

- Install the `flannel` plugin by following the
-  [Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
-  guide, starting from the **Installing a pod network** section.
-
- Create a pod network using flannel
-
-  > **Note:** There is no known way to determine programmatically the best version (commit) to use.
-  > See https://github.com/coreos/flannel/issues/995.
+By default the CNI plugin binaries is installed under `/opt/cni/bin` (in package `kubernetes-cni`), you only need to create a configuration file for CNI plugin.

  ```bash
-  $ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
-  ```
+  $ sudo -E mkdir -p /etc/cni/net.d

- Wait for the pod network to become available
-
-  ```bash
-  # number of seconds to wait for pod network to become available
-  $ timeout_dns=420
-
-  $ while [ "$timeout_dns" -gt 0 ]; do
-      if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
-          break
-      fi
-
-      sleep 1s
-      ((timeout_dns--))
-   done
-  ```
-
- Check the pod network is running
-
-  ```bash
-  $ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
+  $ sudo -E cat > /etc/cni/net.d/10-mynet.conf <<EOF
+  {
+    "cniVersion": "0.2.0",
+    "name": "mynet",
+    "type": "bridge",
+    "bridge": "cni0",
+    "isGateway": true,
+    "ipMasq": true,
+    "ipam": {
+      "type": "host-local",
+      "subnet": "172.19.0.0/24",
+      "routes": [
+        { "dst": "0.0.0.0/0" }
+      ]
+    }
+  }
+  EOF
  ```

 ## Allow pods to run in the master node
@@ -172,24 +160,38 @@ By default, the cluster will not schedule pods in the master node. To enable mas
 $ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
 ```

-## Create an untrusted pod using Kata Containers
+## Create runtime class for Kata Containers

 By default, all pods are created with the default runtime configured in CRI containerd plugin.
+From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods.

-If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
+```bash
+$ cat > runtime.yaml <<EOF
+apiVersion: node.k8s.io/v1beta1
+kind: RuntimeClass
+metadata:
+  name: kata
+handler: kata
+EOF
+
+$ sudo -E kubectl apply -f runtime.yaml
+```
+
+## Run pod in Kata Containers
+
+If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the
 [Kata Containers runtime](../../src/runtime/README.md).

- Create an untrusted pod configuration
+- Create an pod configuration that using Kata Containers runtime

  ```bash
-  $ cat << EOT | tee nginx-untrusted.yaml
+  $ cat << EOT | tee nginx-kata.yaml
  apiVersion: v1
  kind: Pod
  metadata:
-    name: nginx-untrusted
-    annotations:
-      io.kubernetes.cri.untrusted-workload: "true"
+    name: nginx-kata
  spec:
+    runtimeClassName: kata
    containers:
    - name: nginx
      image: nginx
@@ -197,9 +199,9 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
  EOT
  ```

- Create an untrusted pod
+- Create the pod
  ```bash
-  $ sudo -E kubectl apply -f nginx-untrusted.yaml
+  $ sudo -E kubectl apply -f nginx-kata.yaml
  ```

 - Check pod is running
@@ -216,5 +218,5 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
 ## Delete created pod

 ```bash
-$ sudo -E kubectl delete -f nginx-untrusted.yaml
+$ sudo -E kubectl delete -f nginx-kata.yaml
 ```
--- a/docs/how-to/how-to-use-kata-containers-with-acrn.md
+++ b/docs/how-to/how-to-use-kata-containers-with-acrn.md
@@ -91,7 +91,7 @@ To configure Kata Containers with ACRN, copy the generated `configuration-acrn.t
 The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)

 ```bash
-$ sudo kata-runtime --kata-show-default-config-paths
+$ sudo kata-runtime --show-default-config-paths
 ```

 >**Warning:** Please offline CPUs using [this](offline_cpu.sh) script, else VM launches will fail.
--- a/docs/how-to/how-to-use-virtio-fs-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-fs-with-kata.md
@@ -1,61 +1,12 @@
 # Kata Containers with virtio-fs

- [Introduction](#introduction)
- [Pre-requisites](#pre-requisites)
- [Install Kata Containers with virtio-fs support](#install-kata-containers-with-virtio-fs-support)
- [Run a Kata Container utilizing virtio-fs](#run-a-kata-container-utilizing-virtio-fs)
+- [Kata Containers with virtio-fs](#kata-containers-with-virtio-fs)
+  - [Introduction](#introduction)

 ## Introduction

 Container deployments utilize explicit or implicit file sharing between host filesystem and containers. From a trust perspective, avoiding a shared file-system between the trusted host and untrusted container is recommended. This is not always feasible. In Kata Containers, block-based volumes are preferred as they allow usage of either device pass through or `virtio-blk` for access within the virtual machine.

-As of the 1.7 release of Kata Containers, [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt) is the default filesystem sharing mechanism. While this does allow for workload compatibility, it does so with degraded performance and potential for POSIX compliance limitations.
+As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.

-To help address these limitations, [virtio-fs](https://virtio-fs.gitlab.io/) has been developed. virtio-fs is a shared file system that lets virtual machines access a directory tree on the host. In Kata Containers, virtio-fs can be used to share container volumes, secrets, config-maps, configuration files (hostname, hosts, `resolv.conf`) and the container rootfs on the host with the guest.  virtio-fs provides significant performance and POSIX compliance improvements compared to 9pfs.
-
-Enabling of virtio-fs requires changes in the guest kernel as well as the VMM. For Kata Containers, experimental virtio-fs support is enabled through `qemu` and `cloud-hypervisor` VMMs.
-
-**Note: virtio-fs support is experimental in the 1.7 release of Kata Containers. Work is underway to improve stability, performance and upstream integration. This is available for early preview - use at your own risk**
-
-This document describes how to get Kata Containers to work with virtio-fs.
-
-## Pre-requisites
-
-Before Kata 1.8 this feature required the host to have hugepages support enabled. Enable this with the `sysctl vm.nr_hugepages=1024` command on the host.In later versions of Kata, virtio-fs leverages `/dev/shm` as the shared memory backend. The default size of `/dev/shm` on a system is typically half of the total system memory. This can pose a physical limit to the maximum number of pods that can be launched with virtio-fs. This can be overcome by increasing the size of `/dev/shm` as shown below:
-
-```bash
-$ mount -o remount,size=${desired_shm_size} /dev/shm
-```
- 
-## Install Kata Containers with virtio-fs support
-
-The Kata Containers `qemu` configuration with virtio-fs and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/runtime/releases) artifacts starting with the 1.9 release. Installation is available through [distribution packages](https://github.com/kata-containers/documentation/blob/master/install/README.md#supported-distributions) as well through [`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
-
-**Note: Support for virtio-fs was first introduced in `NEMU` hypervisor in Kata 1.8 release. This hypervisor has been deprecated.**
-
-Install the latest release of Kata with `kata-deploy` as follows:
-```
-docker run --runtime=runc -v /opt/kata:/opt/kata -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd -v /etc/docker:/etc/docker -it katadocker/kata-deploy kata-deploy-docker install
-```
-
-This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-qemu-virtiofs`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
-
-## Run a Kata Container utilizing virtio-fs
-
-Once installed, start a new container, utilizing `qemu` + `virtiofs`:
-```bash
-$ docker run --runtime=kata-qemu-virtiofs -it busybox
-```
-
-Verify the new container is running with the `qemu` hypervisor as well as using `virtiofsd`. To do this look for the hypervisor path and the `virtiofs` daemon process on the host:
-```bash
-$ ps -aux | grep virtiofs
-root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
-...  -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
-root ... /home/foo/build-x86_64_virt/virtiofsd-x86_64 ...
-```
-
-You can also try out virtio-fs using `cloud-hypervisor` VMM:
-```bash
-$ docker run --runtime=kata-clh -it busybox
-```
+virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
--- a/docs/how-to/how-to-use-virtio-mem-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-mem-with-kata.md
@@ -13,26 +13,23 @@ Kata Containers with `virtio-mem` supports memory resize.

 ## Requisites

-Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
-The Linux kernel and QEMU upstream version still not support `virtio-mem`.  @davidhildenbrand is working on them.
-Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
+Kata Containers just supports `virtio-mem` with QEMU.
+Install and setup Kata Containers as shown [here](../install/README.md).

-The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
-The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
-
-The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem.  Its base version of QEMU cannot work with Kata Containers.  So merge the commit of `virtio-mem` to upstream QEMU.)
-
-Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
-```toml
-[hypervisor.qemu]
-path = "qemu-dir"
-kernel = "vmlinux-dir"
+### With x86_64
+The `virtio-mem` config of the x86_64 Kata Linux kernel is open.
+Enable `virtio-mem` as follows:
+```
+$ sudo sed -i -e 's/^#enable_virtio_mem.*$/enable_virtio_mem = true/g' /etc/kata-containers/configuration.toml
 ```

-Enable `virtio-mem` with following line in the Kata Containers configuration:
-```toml
-enable_virtio_mem = true
+### With other architectures
+The `virtio-mem` config of the others Kata Linux kernel is not open.
+You can open `virtio-mem` config as follows:
 ```
+CONFIG_VIRTIO_MEM=y
+```
+Then you can build and install the guest kernel image as shown [here](../../tools/packaging/kernel/README.md#build-kata-containers-kernel).

 ## Run a Kata Container utilizing `virtio-mem`

@@ -41,13 +38,35 @@ Use following command to enable memory overcommitment of a Linux kernel.  Becaus
 $ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
 ```

-Use following command start a Kata Container.
+Use following command to start a Kata Container.
 ```
-$ docker run --rm -it --runtime=kata --name test busybox
+$ pod_yaml=pod.yaml
+$ container_yaml=${REPORT_DIR}/container.yaml
+$ image="quay.io/prometheus/busybox:latest"
+$ cat << EOF > "${pod_yaml}"
+metadata:
+  name: busybox-sandbox1
+EOF
+$ cat << EOF > "${container_yaml}"
+metadata:
+  name: busybox-killed-vmm
+image:
+  image: "$image"
+command:
+- top
+EOF
+$ sudo crictl pull $image
+$ podid=$(sudo crictl runp $pod_yaml)
+$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
+$ sudo crictl start $cid
 ```

-Use following command set the memory size of test to default_memory + 512m.
+Use the following command to set the container memory limit to 2g and the memory size of the VM to its default_memory + 2g.
 ```
-$ docker update -m 512m --memory-swap -1 test
+$ sudo crictl update --memory $((2*1024*1024*1024)) $cid
 ```

+Use the following command to set the container memory limit to 1g and the memory size of the VM to its default_memory + 1g.
+```
+$ sudo crictl update --memory $((1*1024*1024*1024)) $cid
+```
--- a/docs/how-to/run-kata-with-k8s.md
+++ b/docs/how-to/run-kata-with-k8s.md
@@ -171,10 +171,10 @@ $ sudo systemctl daemon-reload
 $ sudo systemctl restart kubelet

 # If using CRI-O
-$ sudo kubeadm init --skip-preflight-checks --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
+$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16

 # If using CRI-containerd
-$ sudo kubeadm init --skip-preflight-checks --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
+$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16

 $ export KUBECONFIG=/etc/kubernetes/admin.conf
 ```
--- a/docs/install/README.md
+++ b/docs/install/README.md
@@ -50,9 +50,7 @@ Kata packages are provided by official distribution repositories for:
 | Distribution (link to installation guide)                | Minimum versions                                                               |
 |----------------------------------------------------------|--------------------------------------------------------------------------------|
 | [CentOS](centos-installation-guide.md)                   | 8                                                                              |
-| [Fedora](fedora-installation-guide.md)                   | 32, Rawhide                                                                    |
-| [openSUSE](opensuse-installation-guide.md)               | [Leap 15.1](opensuse-leap-15.1-installation-guide.md)<br>Leap 15.2, Tumbleweed |
-| [SUSE Linux Enterprise (SLE)](sle-installation-guide.md) | SLE 15 SP1, 15 SP2                                                             |
+| [Fedora](fedora-installation-guide.md)                   | 34                                                                             |

 > **Note::**
 >
--- a/docs/install/centos-installation-guide.md
+++ b/docs/install/centos-installation-guide.md
@@ -3,15 +3,9 @@
 1. Install the Kata Containers components with the following commands:

   ```bash
+   $ sudo -E dnf install -y centos-release-advanced-virtualization
+   $ sudo -E dnf module disable -y virt:rhel
   $ source /etc/os-release
-   $ cat <<EOF | sudo -E tee /etc/yum.repos.d/advanced-virt.repo
-     [advanced-virt]
-     name=Advanced Virtualization
-     baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/advanced-virtualization
-     enabled=1
-     gpgcheck=1
-     skip_if_unavailable=1
-     EOF
   $ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
     [kata-containers]
     name=Kata Containers
@@ -20,8 +14,7 @@
     gpgcheck=1
     skip_if_unavailable=1
     EOF
-   $ sudo -E dnf module disable -y virt:rhel
-   $ sudo -E dnf install -y kata-runtime
+   $ sudo -E dnf install -y kata-containers
   ```

 2. Decide which container manager to use and select the corresponding link that follows:
--- a/docs/install/fedora-installation-guide.md
+++ b/docs/install/fedora-installation-guide.md
@@ -3,7 +3,7 @@
 1. Install the Kata Containers components with the following commands:

   ```bash
-   $ sudo -E dnf -y install kata-runtime
+   $ sudo -E dnf -y install kata-containers
   ```

 2. Decide which container manager to use and select the corresponding link that follows:
--- a/docs/install/opensuse-installation-guide.md
+++ b/docs/install/opensuse-installation-guide.md
@@ -1,10 +0,0 @@
-# Install Kata Containers on openSUSE
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ sudo -E zypper -n install katacontainers
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/install/opensuse-leap-15.1-installation-guide.md
+++ b/docs/install/opensuse-leap-15.1-installation-guide.md
@@ -1,11 +0,0 @@
-# Install Kata Containers on openSUSE Leap 15.1
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/openSUSE_Leap_15.1/devel:kubic.repo"
-   $ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/install/sle-installation-guide.md
+++ b/docs/install/sle-installation-guide.md
@@ -1,13 +0,0 @@
-# Install Kata Containers on SLE
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ source /etc/os-release
-   $ DISTRO_VERSION=$(sed "s/-/_/g" <<< "$VERSION")
-   $ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/SLE_${DISTRO_VERSION}_Backports/devel:kubic.repo"
-   $ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/install/snap-installation-guide.md
+++ b/docs/install/snap-installation-guide.md
@@ -2,9 +2,6 @@

 * [Install Kata Containers](#install-kata-containers)
 * [Configure Kata Containers](#configure-kata-containers)
-* [Integration with non-compatible shim v2 Container Engines](#integration-with-non-compatible-shim-v2-container-engines)
-    * [Integration with Docker](#integration-with-docker)
-    * [Integration with Podman](#integration-with-podman)
 * [Integration with shim v2 Container Engines](#integration-with-shim-v2-container-engines)
 * [Remove Kata Containers snap package](#remove-kata-containers-snap-package)

@@ -14,23 +11,10 @@
 Kata Containers can be installed in any Linux distribution that supports
 [snapd](https://docs.snapcraft.io/installing-snapd).

-> NOTE: From Kata Containers 2.x, only the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
-> is supported, note that some container engines (`docker`, `podman`, etc) may not
-> be able to run Kata Containers 2.x.
-
-Kata Containers 1.x is released through the *stable* channel while Kata Containers
-2.x is available in the *candidate* channel.
-
-Run the following command to install **Kata Containers 1.x**:
+Run the following command to install **Kata Containers**:

 ```sh
-$ sudo snap install kata-containers --classic
-```
-
-Run the following command to install **Kata Containers 2.x**:
-
-```sh
-$ sudo snap install kata-containers --candidate --classic
+$ sudo snap install kata-containers --stable --classic
 ```

 ## Configure Kata Containers
@@ -46,55 +30,6 @@ $ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/confi
 $ $EDITOR /etc/kata-containers/configuration.toml
 ```

-## Integration with non-compatible shim v2 Container Engines
-
-At the time of writing this document, `docker` and `podman` **do not support Kata
-Containers 2.x, therefore Kata Containers 1.x must be used instead.**
-
-The path to the runtime provided by the Kata Containers 1.x snap package is
-`/snap/bin/kata-containers.runtime`, it should be used to run Kata Containers 1.x.
-
-### Integration with Docker
-
-`/etc/docker/daemon.json` is the configuration file for `docker`, use the
-following configuration to add a new runtime (`kata`) to `docker`.
-
-```json
-{
-  "runtimes": {
-    "kata": {
-      "path": "/snap/bin/kata-containers.runtime"
-    }
-  }
-}
-```
-
-Once the above configuration has been applied, use the
-following commands to restart `docker` and run Kata Containers 1.x.
-
-```sh
-$ sudo systemctl restart docker
-$ docker run -ti --runtime kata busybox sh
-```
-
-### Integration with Podman
-
-`/usr/share/containers/containers.conf` is the configuration file for `podman`,
-add the following configuration in the `[engine.runtimes]` section.
-
-```toml
-kata = [
-   "/snap/bin/kata-containers.runtime"
-]
-```
-
-Once the above configuration has been applied, use the following command to run
-Kata Containers 1.x with `podman`
-
-```sh
-$ sudo podman run -ti --runtime kata docker.io/library/busybox sh
-```
-
 ## Integration with shim v2 Container Engines

 The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the
@@ -110,8 +45,8 @@ can be used as runtime.

 Read the following documents to know how to run Kata Containers 2.x with `containerd`.

-* [How to use Kata Containers and Containerd](https://github.com/kata-containers/kata-containers/blob/2.0-dev/docs/how-to/containerd-kata.md)
-* [Install Kata Containers with containerd](https://github.com/kata-containers/kata-containers/blob/2.0-dev/docs/install/container-manager/containerd/containerd-install.md)
+* [How to use Kata Containers and Containerd](https://github.com/kata-containers/kata-containers/blob/main/docs/how-to/containerd-kata.md)
+* [Install Kata Containers with containerd](https://github.com/kata-containers/kata-containers/blob/main/docs/install/container-manager/containerd/containerd-install.md)


 ## Remove Kata Containers snap package
--- a/docs/install/ubuntu-installation-guide.md
+++ b/docs/install/ubuntu-installation-guide.md
@@ -1,15 +0,0 @@
-# Install Kata Containers on Ubuntu
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ ARCH=$(arch)
-   $ BRANCH="${BRANCH:-master}"
-   $ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
-   $ curl -sL  http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
-   $ sudo -E apt-get update
-   $ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/use-cases/using-Intel-QAT-and-kata.md
+++ b/docs/use-cases/using-Intel-QAT-and-kata.md
@@ -1,56 +1,62 @@
 # Table of Contents

-* [Table of Contents](#table-of-contents)
-* [Introduction](#introduction)
-    * [Helpful Links before starting](#helpful-links-before-starting)
-    * [Steps to enable Intel QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
-    * [Script variables](#script-variables)
-        * [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
-    * [Prepare the Clear Linux Host](#prepare-the-clear-linux-host)
-        * [Identify which PCI Bus the Intel QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
-        * [Install necessary bundles for Clear Linux](#install-necessary-bundles-for-clear-linux)
-        * [Download Intel QAT drivers](#download-intel-qat-drivers)
-        * [Copy Intel QAT configuration files and enable Virtual Functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
-        * [Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
-        * [Check Intel QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
-    * [Prepare Kata Containers](#prepare-kata-containers)
-        * [Download Kata kernel Source](#download-kata-kernel-source)
-        * [Build Kata kernel](#build-kata-kernel)
-        * [Copy Kata kernel](#copy-kata-kernel)
-        * [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
-        * [Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
-        * [Copy Kata rootfs](#copy-kata-rootfs)
-        * [Update Kata configuration to point to custom kernel and rootfs](#update-kata-configuration-to-point-to-custom-kernel-and-rootfs)
-    * [Verify Intel QAT works in a Docker Kata Containers container](#verify-intel-qat-works-in-a-docker-kata-containers-container)
-    * [Build OpenSSL Intel QAT engine container](#build-openssl-intel-qat-engine-container)
-        * [Test Intel QAT in Docker](#test-intel-qat-in-docker)
-        * [Troubleshooting](#troubleshooting)
-    * [Optional Scripts](#optional-scripts)
-        * [Verify Intel QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)
+- [Table of Contents](#table-of-contents)
+- [Introduction](#introduction)
+  - [Helpful Links before starting](#helpful-links-before-starting)
+  - [Steps to enable Intel® QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
+  - [Script variables](#script-variables)
+    - [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
+  - [Prepare the Ubuntu Host](#prepare-the-ubuntu-host)
+    - [Identify which PCI Bus the Intel® QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
+    - [Install necessary packages for Ubuntu](#install-necessary-packages-for-ubuntu)
+    - [Download Intel® QAT drivers](#download-intel-qat-drivers)
+    - [Copy Intel® QAT configuration files and enable virtual functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
+    - [Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
+    - [Check Intel® QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
+  - [Prepare Kata Containers](#prepare-kata-containers)
+    - [Download Kata kernel Source](#download-kata-kernel-source)
+    - [Build Kata kernel](#build-kata-kernel)
+    - [Copy Kata kernel](#copy-kata-kernel)
+    - [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
+    - [Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
+    - [Copy Kata rootfs](#copy-kata-rootfs)
+  - [Verify Intel® QAT works in a container](#verify-intel-qat-works-in-a-container)
+    - [Build OpenSSL Intel® QAT engine container](#build-openssl-intel-qat-engine-container)
+    - [Test Intel® QAT with the ctr tool](#test-intel-qat-with-the-ctr-tool)
+    - [Test Intel® QAT in Kubernetes](#test-intel-qat-in-kubernetes)
+    - [Troubleshooting](#troubleshooting)
+  - [Optional Scripts](#optional-scripts)
+    - [Verify Intel® QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)

 # Introduction

-Intel QuickAssist Technology (Intel QAT) provides hardware acceleration 
+Intel® QuickAssist Technology (QAT) provides hardware acceleration 
 for security (cryptography) and compression. These instructions cover the 
-steps for [Clear Linux](https://clearlinux.org) but can be adapted to any 
-Linux distribution. Your distribution may already have the Intel QAT 
-drivers, but it is likely they do not contain the necessary user space 
-components. These instructions guide the user on how to download the kernel 
-sources, compile kernel driver modules against those sources, and load them 
-onto the host as well as preparing a specially built Kata Containers kernel 
-and custom Kata Containers rootfs.  
+steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop) 
+which already include the QAT host driver. These instructions can be adapted to 
+any Linux distribution. These instructions guide the user on how to download 
+the kernel sources, compile kernel driver modules against those sources, and 
+load them onto the host as well as preparing a specially built Kata Containers 
+kernel and custom Kata Containers rootfs.
+
+* Download kernel sources
+* Compile Kata kernel
+* Compile kernel driver modules against those sources
+* Download rootfs
+* Add driver modules to rootfs
+* Build rootfs image 

 ## Helpful Links before starting

-[Intel QAT Engine](https://github.com/intel/QAT_Engine)
+[Intel® QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)

-[Intel QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)
+[Intel® QuickAssist Technology Engine for OpenSSL](https://github.com/intel/QAT_Engine)

 [Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)

-[Intel QuickAssist Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
+[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)

-## Steps to enable Intel QAT in Kata Containers
+## Steps to enable Intel® QAT in Kata Containers

 There are some steps to complete only once, some steps to complete with every
 reboot, and some steps to complete when the host kernel changes.
@@ -67,91 +73,95 @@ needed to point to updated drivers or different install locations.
 Make sure to check [`01.org`](https://01.org/intel-quickassist-technology) for 
 the latest driver.

-```sh
-$ export QAT_DRIVER_VER=qat1.7.l.4.8.0-00005.tar.gz 
-$ export QAT_DRIVER_URL=https://01.org/sites/default/files/downloads/${QAT_DRIVER_VER}
+```bash
+$ export QAT_DRIVER_VER=qat1.7.l.4.12.0-00011.tar.gz
+$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
 $ export QAT_CONF_LOCATION=~/QAT_conf
 $ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile
 $ export QAT_SRC=~/src/QAT
 $ export GOPATH=~/src/go
-$ export OSBUILDER=~/src/osbuilder
 $ export KATA_KERNEL_LOCATION=~/kata
 $ export KATA_ROOTFS_LOCATION=~/kata
 ```

-## Prepare the Clear Linux Host
+## Prepare the Ubuntu Host

 The host could be a bare metal instance or a virtual machine. If using a 
 virtual machine, make sure that KVM nesting is enabled. The following 
-instructions reference an Intel QAT. Some of the instructions must be 
-modified if using a different Intel QAT device. You can identify the Intel QAT
-chipset by executing the following.
+instructions reference an Intel® C62X chipset. Some of the instructions must be 
+modified if using a different Intel® QAT device. The Intel® QAT chipset can be
+identified by executing the following.

-### Identify which PCI Bus the Intel QAT card is on
+### Identify which PCI Bus the Intel® QAT card is on

-```sh
+```bash
 $ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
 ```

-### Install necessary bundles for Clear Linux
+### Install necessary packages for Ubuntu

-Clear Linux version 30780 (Released August 13, 2019) includes a 
-`linux-firmware-qat` bundle that has the necessary QAT firmware along with a
-functional QAT host driver that works with Kata Containers. 
+These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
+prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
+also needs to be installed to be able to build the rootfs. To test that 
+everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
+pass through of the virtual functions the kernel boot parameter needs to have
+`INTEL_IOMMU=on`.

-```sh
-$ sudo swupd bundle-add network-basic linux-firmware-qat make c-basic go-basic containers-virt dev-utils devpkg-elfutils devpkg-systemd devpkg-ssl
-$ sudo clr-boot-manager update
-$ sudo systemctl enable --now docker
+```bash
+$ sudo apt update
+$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
+$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
+$ sudo update-grub
 $ sudo reboot
 ```

-### Download Intel QAT drivers
+### Download Intel® QAT drivers

-This will download the Intel QAT drivers from [`01.org`](https://01.org/intel-quickassist-technology). 
+This will download the [Intel® QAT drivers](https://01.org/intel-quickassist-technology). 
 Make sure to check the website for the latest version.

-```sh
+```bash
 $ mkdir -p $QAT_SRC
 $ cd $QAT_SRC
 $ curl -L $QAT_DRIVER_URL | tar zx
 ```

-### Copy Intel QAT configuration files and enable Virtual Functions
+### Copy Intel® QAT configuration files and enable virtual functions

-Modify the instructions below as necessary if using a different QAT hardware 
+Modify the instructions below as necessary if using a different Intel® QAT hardware 
 platform. You can learn more about customizing configuration files at the 
-[Intel QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
+[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
 This section starts from a base config file and changes the `SSL` section to 
 `SHIM` to support the OpenSSL engine. There are more tweaks that you can make
-depending on the use case and how many Intel QAT engines should be run. You
+depending on the use case and how many Intel® QAT engines should be run. You
 can find more information about how to customize in the 
 [Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://01.org/sites/default/files/downloads/336210qatswprogrammersguiderev006.pdf) 

-> **Note: This section assumes that a QAT `c6xx` platform is used.**
+> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**

-```sh
+```bash
 $ mkdir -p $QAT_CONF_LOCATION
 $ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
 $ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
 ```

-### Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)
+### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)

 To enable virtual functions, the host OS should have IOMMU groups enabled. In 
-the UEFI Firmware Intel Virtualization Technology for Directed I/O 
-(Intel VT-d) must be enabled. Also, the kernel boot parameter should be 
-`intel_iommu=on` or `intel_iommu=ifgx_off`. The default in Clear Linux currently 
-is `intel_iommu=igfx_off` which should work with the Intel QAT device. The 
-following commands assume you installed an Intel QAT card, IOMMU is on, and
+the UEFI Firmware Intel® Virtualization Technology for Directed I/O 
+(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be 
+`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
+the instructions above. Check the output of `/proc/cmdline` to confirm. The 
+following commands assume you installed an Intel® QAT card, IOMMU is on, and
 VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
 each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
 complete, each virtual function passes into a Kata Containers container using
-the PCIe device passthrough feature. For Kubernetes, the Intel device plugin
-for Kubernetes handles the binding of the driver but the VF’s still must be
+the PCIe device passthrough feature. For Kubernetes, the 
+[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
+for Kubernetes handles the binding of the driver, but the VF’s still must be
 enabled.

-```sh
+```bash
 $ sudo modprobe vfio-pci
 $ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
 $ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
@@ -160,8 +170,10 @@ $ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/ueve
 $ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
 $ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
 ```
+
 Loop through all the virtual functions and bind to the VFIO driver
-```sh
+
+```bash
 $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
  do QAT_PCI_BUS_VF=$(basename $(readlink $f))
   echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
@@ -169,22 +181,23 @@ $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
  done
 ```

-### Check Intel QAT virtual functions are enabled
+### Check Intel® QAT virtual functions are enabled

 If the following command returns empty, then the virtual functions are not 
 properly enabled. This command checks the enumerated device IDs for just the 
-virtual functions. Using the Intel QAT as an example, the physical device ID 
+virtual functions. Using the Intel® QAT as an example, the physical device ID 
 is `37c8` and virtual function device ID is `37c9`. The following command checks 
-if VF's are enabled for any of the currently known Intel QAT device ID's. The
+if VF's are enabled for any of the currently known Intel® QAT device ID's. The
 following `ls` command should show the 16 VF's bound to `VFIO-PCI`.

-```sh
+```bash
 $ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
 ```

 Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
 It should match the device ID's of the VF's.
-```sh
+
+```bash
 $ ls -la /sys/bus/pci/drivers/vfio-pci
 ```

@@ -201,16 +214,16 @@ There are some patches that must be installed as well, which the
 `build-kernel.sh` script should automatically apply. If you are using a
 different kernel version, then you might need to manually apply them. Since
 the Kata Containers kernel has a minimal set of kernel flags set, you must
-create a QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
+create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
 Update the config to set some of the `CRYPTO` flags to enabled. This might
-change with different kernel versions. We tested the following instructions
-with kernel `v4.19.28-41`.
+change with different kernel versions. The following instructions were tested
+with kernel `v5.4.0-64-generic`.

-```sh
+```bash
 $ mkdir -p $GOPATH
 $ cd $GOPATH
-$ go get -v github.com/kata-containers/packaging
-$ cat << EOF > $GOPATH/src/github.com/kata-containers/packaging/kernel/configs/fragments/common/qat.conf
+$ go get -v github.com/kata-containers/kata-containers
+$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
 CONFIG_PCIEAER=y
 CONFIG_UIO=y
 CONFIG_CRYPTO_HW=y
@@ -221,61 +234,70 @@ CONFIG_MODULE_SIG=y
 CONFIG_CRYPTO_AUTHENC=y
 CONFIG_CRYPTO_DH=y
 EOF
-$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh setup
+$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
 ```

 ### Build Kata kernel

-```sh
-$ export LINUX_VER=$(ls -d kata*)
+```bash
+$ cd $GOPATH
+$ export LINUX_VER=$(ls -d kata-linux-*)
 $ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
-$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh build
+$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
 ```

-
 ### Copy Kata kernel

-```sh
+```bash
+$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
 $ mkdir -p $KATA_KERNEL_LOCATION
-$ cp $LINUX_VER/arch/x86/boot/bzImage $KATA_KERNEL_LOCATION/vmlinuz-${LINUX_VER}_qat
+$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
 ```

 ### Prepare Kata root filesystem

 These instructions build upon the OS builder instructions located in the 
-[Developer Guide](../Developer-Guide.md). The following instructions use Clear
-Linux (Kata Containers default) as the root filesystem with systemd as the 
-init and will add in the `kmod` binary, which is not a standard binary in a 
-Kata rootfs image. The `kmod` binary is necessary to load the QAT kernel 
-modules when the virtual machine rootfs boots. You should install Docker on
-your system before running the following commands. If you need to use a custom 
-`kata-agent`, then refer to the previous link on how to add it in.
+[Developer Guide](../Developer-Guide.md). At this point it is recommended that
+[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
+then [Kata-deploy](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy)
+is use to install Kata. This will make sure that the correct `agent` version 
+is installed into the rootfs in the steps below.

-```sh
-$ mkdir -p $OSBUILDER
-$ cd $OSBUILDER
-$ git clone https://github.com/kata-containers/osbuilder.git
-$ export ROOTFS_DIR=${OSBUILDER}/osbuilder/rootfs-builder/rootfs
+The following instructions use Debian as the root filesystem with systemd as 
+the init and will add in the `kmod` binary, which is not a standard binary in 
+a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT 
+kernel modules when the virtual machine rootfs boots. 
+
+```bash
+$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
+$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
 $ export EXTRA_PKGS='kmod'
 ```
+
 Make sure that the `kata-agent` version matches the installed `kata-runtime`
-version.
-```sh
+version. Also make sure the `kata-runtime` install location is in your `PATH` 
+variable. The following `AGENT_VERSION` can be set manually to match
+the `kata-runtime` version if the following commands don't work.
+
+```bash
+$ export PATH=$PATH:/opt/kata/bin
+$ cd $GOPATH
 $ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
-$ cd ${OSBUILDER}/osbuilder/rootfs-builder
+$ cd ${OSBUILDER}/rootfs-builder
 $ sudo rm -rf ${ROOTFS_DIR}
-$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh clearlinux'
+$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh debian'
 ```

-### Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
+### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs

 After the Kata Containers kernel builds with the proper configuration flags, 
-you must build the Intel QAT drivers against that Kata Containers kernel
+you must build the Intel® QAT drivers against that Kata Containers kernel
 version in a similar way they were previously built for the host OS. You must 
 set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source 
-directory and build the Intel QAT drivers again.
+directory and build the Intel® QAT drivers again. The  `make` command will
+install the Intel® QAT modules into the Kata rootfs.

-```sh
+```bash
 $ cd $GOPATH
 $ export LINUX_VER=$(ls -d kata*)
 $ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
@@ -284,16 +306,18 @@ $ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Mak
 $ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
 $ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
 $ cd $QAT_SRC
-$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --disable-qat-lkcf --enable-icp-sriov=guest
+$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
 $ sudo -E make all -j$(nproc)
 $ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j$(nproc)
 ```
+
 The `usdm_drv` module also needs to be copied into the rootfs modules path and
 `depmod` should be run. 
-```sh
-$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/usr/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers  
+
+```bash
+$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers  
 $ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
-$ cd ${OSBUILDER}/osbuilder/image-builder
+$ cd ${OSBUILDER}/image-builder
 $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
 ```

@@ -302,84 +326,225 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'

 ### Copy Kata rootfs

-```sh
+```bash
 $ mkdir -p $KATA_ROOTFS_LOCATION
-$ cp ${OSBUILDER}/osbuilder/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
+$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
 ```

-### Update Kata configuration to point to custom kernel and rootfs
+## Verify Intel® QAT works in a container

-You must update the `configuration.toml` for Kata Containers to point to the 
-custom kernel, custom rootfs, and to specify which modules to load when the 
-virtual machine is booted when a container is run. The following example
-assumes you installed an Intel QAT, and you need to load those modules.
-
-```sh
-$ sudo mkdir -p /etc/kata-containers
-$ sudo cp /usr/share/defaults/kata-containers/configuration-qemu.toml /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|kernel_params = \"\"|kernel_params = \"modules-load=usdm_drv,qat_c62xvf\"|g" /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|\/usr\/share\/kata-containers\/kata-containers.img|${KATA_KERNEL_LOCATION}\/kata-containers.img|g" /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|\/usr\/share\/kata-containers\/vmlinuz.container|${KATA_ROOTFS_LOCATION}\/vmlinuz-${LINUX_VER}_qat|g" /etc/kata-containers/configuration.toml
-```
-
-## Verify Intel QAT works in a Docker Kata Containers container
-
-The following instructions leverage an OpenSSL Dockerfile that builds the 
-Intel QAT engine to allow OpenSSL to offload crypto functions. It is a 
-convenient way to test that VFIO device passthrough for the Intel QAT VF’s are
+The following instructions uses a OpenSSL Dockerfile that builds the 
+Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a 
+convenient way to test that VFIO device passthrough for the Intel® QAT VF’s are
 working properly with the Kata Containers VM.

-## Build OpenSSL Intel QAT engine container
+### Build OpenSSL Intel® QAT engine container

-Use the OpenSSL Intel QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
+Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
 to build a container image with an optimized OpenSSL engine for 
-Intel QAT. Using `docker build` with the Kata Containers runtime can sometimes
-have issues. Therefore, we recommended you change the default runtime to
-`runc` before doing a build. Instructions for this are below.
+Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
+have issues. Therefore, make sure that `runc` is the default Docker container 
+runtime.

-```sh
+```bash
 $ cd $QAT_SRC
 $ curl -O $QAT_DOCKERFILE
-$ sudo sed -i 's/kata-runtime/runc/g' /etc/systemd/system/docker.service.d/50-runtime.conf
-$ sudo systemctl daemon-reload && sudo systemctl restart docker
 $ sudo docker build -t openssl-qat-engine .
 ```

-> **Note: The Intel QAT driver version in this container might not match the 
-> Intel QAT driver compiled and loaded on the host when compiling.**
+> **Note: The Intel® QAT driver version in this container might not match the 
+> Intel® QAT driver compiled and loaded on the host when compiling.**

-### Test Intel QAT in Docker
+### Test Intel® QAT with the ctr tool

-The host should already be setup with 16 virtual functions of the Intel QAT 
-card bound to `VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing
-of devices. Replace the number 90 with one of the VF’s exposed in `/dev/vfio`.
-It might require you to add an `IPC_LOCK` capability to your Docker runtime
-depending on which rootfs you use.
+The `ctr` tool can be used to interact with the containerd daemon. It may be 
+more convenient to use this tool to verify the kernel and image instead of
+setting up a Kubernetes cluster. The correct Kata runtimes need to be added
+to the containerd `config.toml`. Below is a sample snippet that can be added
+to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.

-```sh
-$ sudo docker run -it --runtime=kata-runtime --cap-add=IPC_LOCK --cap-add=SYS_ADMIN --device=/dev/vfio/90 -v /dev:/dev -v ${QAT_CONF_LOCATION}:/etc openssl-qat-engine bash
+```
+[plugins.cri.containerd.runtimes.kata-qemu]
+  runtime_type = "io.containerd.kata-qemu.v2"
+  privileged_without_host_devices = true
+  pod_annotations = ["io.katacontainers.*"]
+  [plugins.cri.containerd.runtimes.kata-qemu.options]
+    ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
+[plugins.cri.containerd.runtimes.kata-clh]
+  runtime_type = "io.containerd.kata-clh.v2"
+  privileged_without_host_devices = true
+  pod_annotations = ["io.katacontainers.*"]
+  [plugins.cri.containerd.runtimes.kata-clh.options]
+    ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
 ```

-Below are some commands to run in the container image to verify Intel QAT is 
+In addition, containerd expects the binary to be in `/usr/local/bin` so add 
+this small script so that it redirects to be able to use either QEMU or
+Cloud Hypervisor with Kata.
+
+```bash
+$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
+$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
+$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
+$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
+```
+
+After the OpenSSL image is built and imported into containerd, a Intel® QAT 
+virtual function exposed in the step above can be added to the `ctr` command. 
+Make sure to change the `/dev/vfio` number to one that actually exists on the 
+host system. When using the `ctr` tool, the`configuration.toml` for Kata needs 
+to point to the custom Kata kernel and rootfs built above and the Intel® QAT 
+modules in the Kata rootfs need to load at boot. The following steps assume that 
+`kata-deploy` was used to install Kata and QEMU is being tested. If using a 
+different hypervisor, different install method for Kata, or a different 
+Intel® QAT chipset then the command will need to be modified. 
+
+> **Note: The following was tested with 
+[containerd v1.3.9](https://github.com/containerd/containerd/releases/tag/v1.3.9).**
+
+```bash
+$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
+$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
+$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
+$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file 
+$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
+$ sudo ctr images import openssl-qat-engine.tar
+$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw  docker.io/library/openssl-qat-engine:latest bash
+```
+
+Below are some commands to run in the container image to verify Intel® QAT is 
 working

 ```sh
-bash-5.0# cat /proc/modules
-bash-5.0# adf_ctl restart
-bash-5.0# adf_ctl status
-bash-5.0# openssl engine -c -t qat
+root@67561dc2757a/ # cat /proc/modules
+qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
+usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
+intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
+
+root@67561dc2757a/ # adf_ctl restart
+Restarting all devices.
+Processing /etc/c6xxvf_dev0.conf
+
+root@67561dc2757a/ # adf_ctl status
+Checking status of all devices.
+There is 1 QAT acceleration device(s) in the system:
+ qat_dev0 - type: c6xxvf,  inst_id: 0,  node_id: 0,  bsf: 0000:01:01.0,  #accel: 1 #engines: 1 state: up
+
+root@67561dc2757a/ # openssl engine -c -t qat-hw
+(qat-hw) Reference implementation of QAT crypto engine v0.6.1
+ [RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
+     [ available ]
 ```

-Test with Intel QAT card acceleration
+### Test Intel® QAT in Kubernetes

-```sh
-bash-5.0# openssl speed -engine qat -elapsed -async_jobs 72 rsa2048 
+Start a Kubernetes cluster with containerd as the CRI. The host should 
+already be setup with 16 virtual functions of the Intel® QAT card bound to 
+`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices. 
+You might need to disable Docker before initializing Kubernetes. Be aware 
+that the OpenSSL container image built above will need to be exported from
+Docker and imported into containerd.
+
+If Kata is installed through [`kata-deploy`](https://github.com/kata-containers/kata-containers/blob/stable-2.0/tools/packaging/kata-deploy/README.md)
+there will be multiple `configuration.toml` files associated with different 
+hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and 
+kernel modules to each `configuration.toml` as the default, instead use
+[annotations](https://github.com/kata-containers/kata-containers/blob/stable-2.0/docs/how-to/how-to-load-kernel-modules-with-kata.md)
+in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The 
+easy way to do this is to use `kata-deploy` which will install the Kata binaries
+to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation 
+support. However, the `configuration.toml` needs to enable support for
+annotations as well. The following configures both QEMU and Cloud Hypervisor
+`configuration.toml` files that are currently available with Kata Container 
+versions 2.0 and higher.
+
+```bash
+$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
+$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
 ```

-Test with CPU acceleration
+Export the OpenSSL image from Docker and import into containerd.
+
+```bash
+$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
+$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
+```
+
+The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/master/cmd/qat_plugin/README.md)
+needs to be started so that the virtual functions can be discovered and
+used by Kubernetes. 
+
+The following YAML file can be used to start a Kata container with Intel® QAT
+support. If Kata is installed with `kata-deploy`, then the containerd 
+`configuration.toml` should have all of the Kata runtime classes already 
+populated and annotations supported. To use a Intel® QAT virtual function, the 
+Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as 
+described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot). 
+Edit the following to point to the correct Kata kernel and rootfs location 
+built with Intel® QAT support.
+
+```bash
+$ cat << EOF > kata-openssl-qat.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kata-openssl-qat
+  labels:
+    app: kata-openssl-qat
+  annotations:
+    io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
+    io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
+    io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
+spec:
+  runtimeClassName: kata-qemu
+  containers:
+  - name: kata-openssl-qat
+    image: docker.io/library/openssl-qat-engine:latest
+    imagePullPolicy: IfNotPresent
+    resources:
+      limits:
+        qat.intel.com/generic: 1
+        cpu: 1
+    securityContext:
+      capabilities:
+        add: ["IPC_LOCK", "SYS_ADMIN"]
+    volumeMounts:
+      - mountPath: /etc/c6xxvf_dev0.conf
+        name: etc-mount
+      - mountPath: /dev
+        name: dev-mount
+  volumes:
+    - name: dev-mount
+      hostPath:
+        path: /dev
+    - name: etc-mount
+      hostPath:
+        path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
+EOF
+```
+
+Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is 
+working with the Intel® QAT engine.
+```bash
+$ kubectl apply -f kata-openssl-qat.yaml
+```

 ```sh
-bash-5.0# openssl speed -elapsed rsa2048
+$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
+Restarting all devices.
+Processing /etc/c6xxvf_dev0.conf
+
+$ kubectl exec -it kata-openssl-qat -- adf_ctl status
+Checking status of all devices.
+There is 1 QAT acceleration device(s) in the system:
+ qat_dev0 - type: c6xxvf,  inst_id: 0,  node_id: 0,  bsf: 0000:01:01.0,  #accel: 1 #engines: 1 state: up
+
+$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
+(qat-hw) Reference implementation of QAT crypto engine v0.6.1
+ [RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
+     [ available ]
 ```

 ### Troubleshooting
@@ -412,9 +577,9 @@ c6xxvf_dev10.conf  c6xxvf_dev13.conf  c6xxvf_dev2.conf   c6xxvf_dev5.conf c6xxvf
 ```

 * Check `dmesg` inside the container to see if there are any issues with the 
-Intel QAT driver.
+Intel® QAT driver.

-* If there are issues building the OpenSSL Intel QAT container image, then 
+* If there are issues building the OpenSSL Intel® QAT container image, then 
 check to make sure that runc is the default runtime for building container.

 ```sh
@@ -425,17 +590,18 @@ Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"

 ## Optional Scripts

-### Verify Intel QAT card counters are incremented
+### Verify Intel® QAT card counters are incremented

-Use the `lspci` command to figure out which PCI bus the Intel QAT accelerators
-are on. The counters will increase when the accelerator is actively being
-used. To verify QAT is actively accelerating the containerized application,
-use the following instructions to check if any of the counters are
-incrementing. You will have to change the PCI device ID to match your system.
+To check the built in firmware counters, the Intel® QAT driver has to be compiled 
+and installed to the host and can't rely on the built in host driver. The 
+counters will increase when the accelerator is actively being used. To verify 
+Intel® QAT is actively accelerating the containerized application, use the 
+following instructions to check if any of the counters increment. Make 
+sure to change the PCI Device ID to match whats in the system.

-```sh
+```bash
 $ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
-```
+```
--- a/docs/use-cases/zun_kata.md
+++ b/docs/use-cases/zun_kata.md
@@ -10,9 +10,6 @@ Currently, the instructions are based on the following links:

 - https://docs.openstack.org/zun/latest/admin/clear-containers.html

- ../install/ubuntu-installation-guide.md
-
-
 ## Install Git to use with DevStack

 ```sh
@@ -54,7 +51,7 @@ $ zun delete test

 ## Install Kata Containers

-Follow [these instructions](../install/ubuntu-installation-guide.md)
+Follow [these instructions](../install/README.md)
 to install the Kata Containers components.

 ## Update Docker with new Kata Containers runtime
--- a/pkg/logging/src/lib.rs
+++ b/pkg/logging/src/lib.rs
@@ -21,7 +21,12 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[
 ];

 // XXX: 'writer' param used to make testing possible.
-pub fn create_logger<W>(name: &str, source: &str, level: slog::Level, writer: W) -> slog::Logger
+pub fn create_logger<W>(
+    name: &str,
+    source: &str,
+    level: slog::Level,
+    writer: W,
+) -> (slog::Logger, slog_async::AsyncGuard)
 where
    W: Write + Send + Sync + 'static,
 {
@@ -37,17 +42,21 @@ where
    let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();

    // Ensure the logger is thread-safe
-    let async_drain = slog_async::Async::new(filter_drain).build().fuse();
+    let (async_drain, guard) = slog_async::Async::new(filter_drain)
+        .thread_name("slog-async-logger".into())
+        .build_with_guard();

    // Add some "standard" fields
-    slog::Logger::root(
+    let logger = slog::Logger::root(
        async_drain.fuse(),
        o!("version" => env!("CARGO_PKG_VERSION"),
            "subsystem" => "root",
            "pid" => process::id().to_string(),
            "name" => name.to_string(),
            "source" => source.to_string()),
-    )
+    );
+
+    (logger, guard)
 }

 pub fn get_log_levels() -> Vec<&'static str> {
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -69,7 +69,7 @@ parts:
      tar -xf ${tarfile} --strip-components=1

  image:
-    after: [godeps]
+    after: [godeps, qemu, kernel]
    plugin: nil
    build-packages:
      - docker.io
@@ -89,6 +89,8 @@ parts:
      export GOROOT=${SNAPCRAFT_STAGE}
      export PATH="${GOROOT}/bin:${PATH}"

+      http_proxy=${http_proxy:-""}
+      https_proxy=${https_proxy:-""}
      if [ -n "$http_proxy" ]; then
        echo "Setting proxy $http_proxy"
        sudo -E systemctl set-environment http_proxy=$http_proxy || true
@@ -169,7 +171,7 @@ parts:
      fi

  kernel:
-    after: [godeps, image]
+    after: [godeps]
    plugin: nil
    build-packages:
      - libelf-dev
@@ -183,8 +185,8 @@ parts:

      cd ${kata_dir}/tools/packaging/kernel

-      # Say 'no' to everithing, fix issues with incomplete .config files
-      yes "n" | ./build-kernel.sh setup
+      # Setup and build kernel
+      ./build-kernel.sh -d setup
      kernel_dir_prefix="kata-linux-"
      cd ${kernel_dir_prefix}*
      version=$(basename ${PWD} | sed 's|'"${kernel_dir_prefix}"'||' | cut -d- -f1)
@@ -206,7 +208,7 @@ parts:

  qemu:
    plugin: make
-    after: [godeps, runtime]
+    after: [godeps]
    build-packages:
      - gcc
      - python3
@@ -226,6 +228,7 @@ parts:
      - libffi-dev
      - libmount-dev
      - libselinux1-dev
+      - ninja-build
    override-build: |
      yq=${SNAPCRAFT_STAGE}/yq
      export GOPATH=${SNAPCRAFT_STAGE}/gopath
@@ -242,10 +245,11 @@ parts:
        ;;

        *)
-          branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.tag)"
+          branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)"
          url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)"
          commit=""
          patches_dir="${kata_dir}/tools/packaging/qemu/patches/$(echo ${branch} | sed -e 's/.[[:digit:]]*$//' -e 's/^v//').x"
+          patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}"
        ;;
      esac

@@ -258,31 +262,23 @@ parts:
      [ -n "$(ls -A ui/keycodemapdb)" ] || git clone https://github.com/qemu/keycodemapdb ui/keycodemapdb/
      [ -n "$(ls -A capstone)" ] || git clone https://github.com/qemu/capstone capstone

-      # Apply patches
-      for patch in ${patches_dir}/*.patch; do
-        echo "Applying $(basename "$patch") ..."
-        patch \
-          --batch \
-          --forward \
-          --strip 1 \
-          --input "$patch"
-      done
+      # Apply branch patches
+      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_dir}"
+      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"

      # Only x86_64 supports libpmem
      [ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev libseccomp-dev

      configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
      chmod +x ${configure_hypervisor}
-      # static build
-      echo "$(${configure_hypervisor} -s qemu) \
-        --disable-rbd
-        --prefix=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr \
-        --datadir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share \
-        --libexecdir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/libexec/qemu" \
+      # static build. The --prefix, --libdir, --libexecdir, --datadir arguments are
+      # based on PREFIX and set by configure-hypervisor.sh
+      echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \
+        --disable-rbd " \
        | xargs ./configure

      # Copy QEMU configurations (Kconfigs)
-      cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/
+      cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/

      # build and install
      make -j $(($(nproc)-1))
@@ -293,7 +289,6 @@ parts:
      - -usr/bin/qemu-pr-helper
      - -usr/bin/virtfs-proxy-helper
      - -usr/include/
-      - -usr/libexec/
      - -usr/share/applications/
      - -usr/share/icons/
      - -usr/var/
--- a/src/agent/.gitignore
+++ b/src/agent/.gitignore
@@ -0,0 +1 @@
+tarpaulin-report.html
--- a/src/agent/.rustfmt.toml
+++ b/src/agent/.rustfmt.toml
@@ -0,0 +1 @@
+edition = "2018"
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -9,19 +9,29 @@ oci = { path = "oci" }
 logging = { path = "../../pkg/logging" }
 rustjail = { path = "rustjail" }
 protocols = { path = "protocols" }
-netlink = { path = "netlink", features = ["with-log", "with-agent-handler"] }
 lazy_static = "1.3.0"
-ttrpc = "0.3.0"
+ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
 protobuf = "=2.14.0"
 libc = "0.2.58"
-nix = "0.17.0"
-prctl = "1.0.0"
+nix = "0.21.0"
+capctl = "0.2.0"
 serde_json = "1.0.39"
-signal-hook = "0.1.9"
 scan_fmt = "0.2.3"
 scopeguard = "1.0.0"
 regex = "1"

+async-trait = "0.1.42"
+tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
+futures = "0.3.12"
+netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
+tokio-vsock = "0.3.1"
+# Because the author has no time to maintain the crate, we switch the dependency to github,
+# Once the new version released on crates.io, we switch it back.
+# https://github.com/little-dude/netlink/issues/161
+rtnetlink = { git = "https://github.com/little-dude/netlink", rev = "a9367bc4700496ddebc088110c28f40962923326" }
+netlink-packet-utils = "0.4.0"
+ipnetwork = "0.17.0"
+
 # slog:
 # - Dynamic keys required to allow HashMap keys to be slog::Serialized.
 # - The 'max_*' features allow changing the log level at runtime
@@ -33,16 +43,23 @@ slog-scope = "4.1.2"
 slog-stdlog = "4.0.0"
 log = "0.4.11"

-# for testing
-tempfile = "3.1.0"
 prometheus = { version = "0.9.0", features = ["process"] }
 procfs = "0.7.9"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.0" }
+cgroups = { package = "cgroups-rs", version = "0.2.5" }
+
+# Tracing
+tracing = "0.1.26"
+tracing-subscriber = "0.2.18"
+tracing-opentelemetry = "0.13.0"
+opentelemetry = "0.14.0"
+vsock-exporter = { path = "vsock-exporter" }
+
+[dev-dependencies]
+tempfile = "3.1.0"

 [workspace]
 members = [
-    "netlink",
    "oci",
    "protocols",
    "rustjail",
--- a/src/agent/Makefile
+++ b/src/agent/Makefile
@@ -3,6 +3,11 @@
 # SPDX-License-Identifier: Apache-2.0
 #

+# To show variables or targets help on `make help`
+# Use the following format:
+# '##VAR VARIABLE_NAME: help about variable'
+# '##TARGET TARGET_NAME: help about target'
+
 PROJECT_NAME = Kata Containers
 PROJECT_URL = https://github.com/kata-containers
 PROJECT_COMPONENT = kata-agent
@@ -16,16 +21,18 @@ SOURCES := \
 VERSION_FILE := ./VERSION
 VERSION := $(shell grep -v ^\# $(VERSION_FILE))
 COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true)
-COMMIT_NO_SHORT := $(shell git rev-parse --short HEAD 2>/dev/null || true)
 COMMIT := $(if $(shell git status --porcelain --untracked-files=no 2>/dev/null || true),${COMMIT_NO}-dirty,${COMMIT_NO})
 COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)

 # Exported to allow cargo to see it
 export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION))

+##VAR BUILD_TYPE=release|debug type of rust build
 BUILD_TYPE = release

+##VAR ARCH=arch target to build (format: uname -m)
 ARCH = $(shell uname -m)
+##VAR LIBC=musl|gnu
 LIBC ?= musl
 ifneq ($(LIBC),musl)
    ifeq ($(LIBC),gnu)
@@ -41,6 +48,11 @@ ifeq ($(ARCH), ppc64le)
    $(warning "WARNING: powerpc64le-unknown-linux-musl target is unavailable")
 endif

+ifeq ($(ARCH), s390x)
+    override LIBC = gnu
+    $(warning "WARNING: s390x-unknown-linux-musl target is unavailable")
+endif
+

 EXTRA_RUSTFLAGS :=
 ifeq ($(ARCH), aarch64)
@@ -52,10 +64,12 @@ TRIPLE = $(ARCH)-unknown-linux-$(LIBC)

 TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)

+##VAR DESTDIR=<path> is a directory prepended to each installed target file
 DESTDIR :=
+##VAR BINDIR=<path> is a directory for installing executable programs
 BINDIR := /usr/bin

-# Define if agent will be installed as init
+##VAR INIT=yes|no define if agent will be installed as init
 INIT := no

 # Path to systemd unit directory if installed as not init.
@@ -103,6 +117,7 @@ define INSTALL_FILE
 	install -D -m 644 $1 $(DESTDIR)$2/$1 || exit 1;
 endef

+##TARGET default: build code
 default: $(TARGET) show-header

 $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
@@ -110,42 +125,55 @@ $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
 $(TARGET_PATH): $(SOURCES) | show-summary
 	@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)

+$(GENERATED_FILES): %: %.in
+	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
+
+##TARGET optimize: optimized  build
 optimize: $(SOURCES) | show-summary show-header
 	@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)

-show-header:
-	@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"

+##TARGET clippy: run clippy linter
 clippy: $(GENERATED_CODE)
 	cargo clippy --all-targets --all-features --release \
 		-- \
 		-Aclippy::redundant_allocation \
 		-D warnings

-$(GENERATED_FILES): %: %.in
-	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
+format:
+	cargo fmt -- --check

-install: build-service
+
+##TARGET install: install agent
+install: install-services
 	@install -D $(TARGET_PATH) $(DESTDIR)/$(BINDIR)/$(TARGET)

+##TARGET clean: clean build
 clean:
 	@cargo clean
 	@rm -f $(GENERATED_FILES)
+	@rm -f tarpaulin-report.html

+#TARGET test: run cargo tests
 test:
 	@cargo test --all --target $(TRIPLE)

-check: test
+##TARGET check: run test
+check: clippy format

+##TARGET run: build and run agent
 run:
 	@cargo run --target $(TRIPLE)

-build-service: $(GENERATED_FILES)
+install-services: $(GENERATED_FILES)
 ifeq ($(INIT),no)
 	@echo "Installing systemd unit files..."
 	$(foreach f,$(UNIT_FILES),$(call INSTALL_FILE,$f,$(UNIT_DIR)))
 endif

+show-header:
+	@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"
+
 show-summary: show-header
 	@printf "project:\n"
 	@printf "  name: $(PROJECT_NAME)\n"
@@ -161,7 +189,35 @@ show-summary: show-header
 	@printf "  %s\n" "$(call get_toolchain_version)"
 	@printf "\n"

-help: show-summary
+## help: Show help comments that start with `##VAR` and `##TARGET`
+help: Makefile show-summary
+	@echo "==========================Help============================="
+	@echo "Variables:"
+	@sed -n 's/^##VAR//p' $< | sort
+	@echo ""
+	@echo "Targets:"
+	@sed -n 's/^##TARGET//p' $< | sort
+
+TARPAULIN_ARGS:=-v --workspace
+install-tarpaulin:
+	cargo install cargo-tarpaulin
+
+# Check if cargo tarpaulin is installed
+HAS_TARPAULIN:= $(shell cargo --list | grep tarpaulin 2>/dev/null)
+check_tarpaulin:
+ifndef  HAS_TARPAULIN
+	$(error "tarpaulin is not available please: run make install-tarpaulin ")
+else
+	$(info OK: tarpaulin installed)
+endif
+
+##TARGET codecov: Generate code coverage report
+codecov: check_tarpaulin
+	cargo tarpaulin $(TARPAULIN_ARGS)
+
+##TARGET codecov-html: Generate code coverage html report
+codecov-html: check_tarpaulin
+	cargo tarpaulin $(TARPAULIN_ARGS) -o Html

 .PHONY: \
 	help \
@@ -169,5 +225,6 @@ help: show-summary
 	show-summary \
 	optimize

+##TARGET generate-protocols: generate/update grpc agent protocols
 generate-protocols:
 	protocols/hack/update-generated-proto.sh all
--- a/src/agent/README.md
+++ b/src/agent/README.md
@@ -49,6 +49,11 @@ $ rustup target add "${arch}-unknown-linux-musl"
 $ sudo ln -s /usr/bin/g++ /bin/musl-g++
 ```

+ppc64le-only: Manually install `protoc`, e.g.
+```bash
+$ sudo dnf install protobuf-compiler
+```
+
 Download the source files in the Kata containers repository and build the agent:
 ```bash
 $ GOPATH="${GOPATH:-$HOME/go}"
--- a/src/agent/VERSION
+++ b/src/agent/VERSION
@@ -1 +0,0 @@
-2.0.0
--- a/src/agent/VERSION
+++ b/src/agent/VERSION
@@ -0,0 +1 @@
+../../VERSION
--- a/src/agent/kata-agent.service.in
+++ b/src/agent/kata-agent.service.in
@@ -15,7 +15,7 @@ Wants=kata-containers.target
 StandardOutput=tty
 Type=simple
 ExecStart=@BINDIR@/@AGENT_NAME@
-LimitNOFILE=infinity
+LimitNOFILE=1048576
 # ExecStop is required for static agent tracing; in all other scenarios
 # the runtime handles shutting down the VM.
 ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff
--- a/src/agent/netlink/Cargo.toml
+++ b/src/agent/netlink/Cargo.toml
@@ -1,20 +0,0 @@
-[package]
-name = "netlink"
-version = "0.1.0"
-authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-libc = "0.2.58"
-nix = "0.17.0"
-
-protobuf = { version = "=2.14.0", optional = true }
-protocols = { path = "../protocols", optional = true }
-slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"], optional = true }
-slog-scope = { version = "4.1.2", optional = true }
-
-[features]
-with-log = ["slog", "slog-scope"]
-with-agent-handler = ["protobuf", "protocols"]
--- a/src/agent/netlink/src/agent_handler.rs
+++ b/src/agent/netlink/src/agent_handler.rs
@@ -1,572 +0,0 @@
-// Copyright (c) 2020 Ant Financial
-// Copyright (C) 2020 Alibaba Cloud. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-//! Dedicated Netlink interfaces for Kata agent protocol handler.
-
-use std::convert::TryFrom;
-
-use protobuf::RepeatedField;
-use protocols::types::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
-
-use super::*;
-
-#[cfg(feature = "with-log")]
-// Convenience macro to obtain the scope logger
-macro_rules! sl {
-    () => {
-        slog_scope::logger().new(o!("subsystem" => "netlink"))
-    };
-}
-
-impl super::RtnlHandle {
-    pub fn update_interface(&mut self, iface: &Interface) -> Result<Interface> {
-        // the reliable way to find link is using hardware address
-        // as filter. However, hardware filter might not be supported
-        // by netlink, we may have to dump link list and the find the
-        // target link. filter using name or family is supported, but
-        // we cannot use that to find target link.
-        // let's try if hardware address filter works. -_-
-
-        let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
-
-        // bring down interface if it is up
-        if ifinfo.ifi_flags & libc::IFF_UP as u32 != 0 {
-            self.set_link_status(&ifinfo, false)?;
-        }
-
-        // delete all addresses associated with the link
-        let del_addrs: Vec<RtIPAddr> = self.get_link_addresses(&ifinfo)?;
-        self.delete_all_addrs(&ifinfo, del_addrs.as_ref())?;
-
-        // add new ip addresses in request
-        for grpc_addr in &iface.IPAddresses {
-            let rtip = RtIPAddr::try_from(grpc_addr.clone())?;
-            self.add_one_address(&ifinfo, &rtip)?;
-        }
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
-
-        // set name, set mtu, IFF_NOARP. in one rtnl_talk.
-        nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>() as u32) as __u32;
-        nlh.nlmsg_type = RTM_NEWLINK;
-        nlh.nlmsg_flags = NLM_F_REQUEST;
-        self.assign_seqnum(nlh);
-
-        ifi.ifi_family = ifinfo.ifi_family;
-        ifi.ifi_type = ifinfo.ifi_type;
-        ifi.ifi_index = ifinfo.ifi_index;
-        if iface.raw_flags & libc::IFF_NOARP as u32 != 0 {
-            ifi.ifi_change |= libc::IFF_NOARP as u32;
-            ifi.ifi_flags |= libc::IFF_NOARP as u32;
-        }
-
-        // Safe because we have allocated enough buffer space.
-        unsafe {
-            nlh.addattr32(IFLA_MTU, iface.mtu as u32);
-
-            // if str is null terminated, use addattr_var.
-            // otherwise, use addattr_str
-            nlh.addattr_var(IFLA_IFNAME, iface.name.as_ref());
-        }
-
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        // TODO: why the result is ignored here?
-        let _ = self.set_link_status(&ifinfo, true);
-
-        Ok(iface.clone())
-    }
-
-    /// Delete this interface/link per request
-    pub fn remove_interface(&mut self, iface: &Interface) -> Result<Interface> {
-        let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
-
-        self.set_link_status(&ifinfo, false)?;
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
-
-        // No attributes needed?
-        nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>()) as __u32;
-        nlh.nlmsg_type = RTM_DELLINK;
-        nlh.nlmsg_flags = NLM_F_REQUEST;
-        self.assign_seqnum(nlh);
-
-        ifi.ifi_family = ifinfo.ifi_family;
-        ifi.ifi_index = ifinfo.ifi_index;
-        ifi.ifi_type = ifinfo.ifi_type;
-
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        Ok(iface.clone())
-    }
-
-    pub fn list_interfaces(&mut self) -> Result<Vec<Interface>> {
-        let mut ifaces: Vec<Interface> = Vec::new();
-        let (_slv, lv) = self.dump_all_links()?;
-        let (_sav, av) = self.dump_all_addresses(0)?;
-
-        for link in &lv {
-            // Safe because dump_all_links() returns valid pointers.
-            let nlh = unsafe { &**link };
-            if nlh.nlmsg_type != RTM_NEWLINK && nlh.nlmsg_type != RTM_DELLINK {
-                continue;
-            }
-
-            if nlh.nlmsg_len < NLMSG_SPACE!(mem::size_of::<ifinfomsg>()) {
-                info!(
-                    sl!(),
-                    "invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}",
-                    nlh.nlmsg_len,
-                    NLMSG_SPACE!(mem::size_of::<ifinfomsg>())
-                );
-                break;
-            }
-
-            // Safe because we have just validated available buffer space above.
-            let ifi = unsafe { &*(NLMSG_DATA!(nlh) as *const ifinfomsg) };
-            let rta: *mut rtattr = IFLA_RTA!(ifi as *const ifinfomsg) as *mut rtattr;
-            let rtalen = IFLA_PAYLOAD!(nlh) as u32;
-            let attrs = unsafe { parse_attrs(rta, rtalen, (IFLA_MAX + 1) as usize)? };
-
-            // fill out some fields of Interface,
-            let mut iface: Interface = Interface::default();
-
-            // Safe because parse_attrs() returns valid pointers.
-            unsafe {
-                if !attrs[IFLA_IFNAME as usize].is_null() {
-                    let t = attrs[IFLA_IFNAME as usize];
-                    iface.name = String::from_utf8(getattr_var(t as *const rtattr))?;
-                }
-
-                if !attrs[IFLA_MTU as usize].is_null() {
-                    let t = attrs[IFLA_MTU as usize];
-                    iface.mtu = getattr32(t) as u64;
-                }
-
-                if !attrs[IFLA_ADDRESS as usize].is_null() {
-                    let alen = RTA_PAYLOAD!(attrs[IFLA_ADDRESS as usize]);
-                    let a: *const u8 = RTA_DATA!(attrs[IFLA_ADDRESS as usize]) as *const u8;
-                    iface.hwAddr = parser::format_address(a, alen as u32)?;
-                }
-            }
-
-            // get ip address info from av
-            let mut ads: Vec<IPAddress> = Vec::new();
-            for address in &av {
-                // Safe because dump_all_addresses() returns valid pointers.
-                let alh = unsafe { &**address };
-                if alh.nlmsg_type != RTM_NEWADDR {
-                    continue;
-                }
-
-                let tlen = NLMSG_SPACE!(mem::size_of::<ifaddrmsg>());
-                if alh.nlmsg_len < tlen {
-                    info!(
-                        sl!(),
-                        "invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}", alh.nlmsg_len, tlen
-                    );
-                    break;
-                }
-
-                // Safe becahse we have checked avialable buffer space by NLMSG_SPACE above.
-                let ifa = unsafe { &*(NLMSG_DATA!(alh) as *const ifaddrmsg) };
-                let arta: *mut rtattr = IFA_RTA!(ifa) as *mut rtattr;
-                let artalen = IFA_PAYLOAD!(alh) as u32;
-
-                if ifa.ifa_index as u32 == ifi.ifi_index as u32 {
-                    // found target addresses, parse attributes and fill out Interface
-                    let addrs = unsafe { parse_attrs(arta, artalen, (IFA_MAX + 1) as usize)? };
-
-                    // fill address field of Interface
-                    let mut one: IPAddress = IPAddress::default();
-                    let tattr: *const rtattr = if !addrs[IFA_ADDRESS as usize].is_null() {
-                        addrs[IFA_ADDRESS as usize]
-                    } else {
-                        addrs[IFA_LOCAL as usize]
-                    };
-
-                    one.mask = format!("{}", ifa.ifa_prefixlen);
-                    one.family = IPFamily::v4;
-                    if ifa.ifa_family == libc::AF_INET6 as u8 {
-                        one.family = IPFamily::v6;
-                    }
-
-                    // Safe because parse_attrs() returns valid pointers.
-                    unsafe {
-                        let a: *const u8 = RTA_DATA!(tattr) as *const u8;
-                        let alen = RTA_PAYLOAD!(tattr);
-                        one.address = parser::format_address(a, alen as u32)?;
-                    }
-
-                    ads.push(one);
-                }
-            }
-
-            iface.IPAddresses = RepeatedField::from_vec(ads);
-            ifaces.push(iface);
-        }
-
-        Ok(ifaces)
-    }
-
-    pub fn update_routes(&mut self, rt: &[Route]) -> Result<Vec<Route>> {
-        let rs = self.get_all_routes()?;
-        self.delete_all_routes(&rs)?;
-
-        for grpcroute in rt {
-            if grpcroute.gateway.as_str() == "" {
-                let r = RtRoute::try_from(grpcroute.clone())?;
-                if r.index == -1 {
-                    continue;
-                }
-                self.add_one_route(&r)?;
-            }
-        }
-
-        for grpcroute in rt {
-            if grpcroute.gateway.as_str() != "" {
-                let r = RtRoute::try_from(grpcroute.clone())?;
-                if r.index == -1 {
-                    continue;
-                }
-                self.add_one_route(&r)?;
-            }
-        }
-
-        Ok(rt.to_owned())
-    }
-
-    pub fn list_routes(&mut self) -> Result<Vec<Route>> {
-        // currently, only dump routes from main table for ipv4
-        // ie, rtmsg.rtmsg_family = AF_INET, set RT_TABLE_MAIN
-        // attribute in dump request
-        // Fix Me: think about othe tables, ipv6..
-        let mut rs: Vec<Route> = Vec::new();
-        let (_srv, rv) = self.dump_all_routes()?;
-
-        // parse out routes and store in rs
-        for r in &rv {
-            // Safe because dump_all_routes() returns valid pointers.
-            let nlh = unsafe { &**r };
-            if nlh.nlmsg_type != RTM_NEWROUTE && nlh.nlmsg_type != RTM_DELROUTE {
-                info!(sl!(), "not route message!");
-                continue;
-            }
-            let tlen = NLMSG_SPACE!(mem::size_of::<rtmsg>());
-            if nlh.nlmsg_len < tlen {
-                info!(
-                    sl!(),
-                    "invalid nlmsg! nlmsg_len: {}, nlmsg_spae: {}", nlh.nlmsg_len, tlen
-                );
-                break;
-            }
-
-            // Safe because we have just validated available buffer space above.
-            let rtm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut rtmsg) };
-            if rtm.rtm_table != RT_TABLE_MAIN as u8 {
-                continue;
-            }
-            let rta: *mut rtattr = RTM_RTA!(rtm) as *mut rtattr;
-            let rtalen = RTM_PAYLOAD!(nlh) as u32;
-            let attrs = unsafe { parse_attrs(rta, rtalen, (RTA_MAX + 1) as usize)? };
-
-            let t = attrs[RTA_TABLE as usize];
-            if !t.is_null() {
-                // Safe because parse_attrs() returns valid pointers
-                let table = unsafe { getattr32(t) };
-                if table != RT_TABLE_MAIN {
-                    continue;
-                }
-            }
-
-            // find source, destination, gateway, scope, and and device name
-            let mut t = attrs[RTA_DST as usize];
-            let mut rte: Route = Route::default();
-
-            // Safe because parse_attrs() returns valid pointers
-            unsafe {
-                // destination
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.dest =
-                        format!("{}/{}", parser::format_address(data, len)?, rtm.rtm_dst_len);
-                }
-
-                // gateway
-                t = attrs[RTA_GATEWAY as usize];
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.gateway = parser::format_address(data, len)?;
-
-                    // for gateway, destination is 0.0.0.0
-                    rte.dest = "0.0.0.0".to_string();
-                }
-
-                // source
-                t = attrs[RTA_SRC as usize];
-                if t.is_null() {
-                    t = attrs[RTA_PREFSRC as usize];
-                }
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.source = parser::format_address(data, len)?;
-
-                    if rtm.rtm_src_len != 0 {
-                        rte.source = format!("{}/{}", rte.source.as_str(), rtm.rtm_src_len);
-                    }
-                }
-
-                // scope
-                rte.scope = rtm.rtm_scope as u32;
-
-                // oif
-                t = attrs[RTA_OIF as usize];
-                if !t.is_null() {
-                    let data = &*(RTA_DATA!(t) as *const i32);
-                    assert_eq!(RTA_PAYLOAD!(t), 4);
-
-                    rte.device = self
-                        .get_name_by_index(*data)
-                        .unwrap_or_else(|_| "unknown".to_string());
-                }
-            }
-
-            rs.push(rte);
-        }
-
-        Ok(rs)
-    }
-
-    pub fn add_arp_neighbors(&mut self, neighs: &[ARPNeighbor]) -> Result<()> {
-        for neigh in neighs {
-            self.add_one_arp_neighbor(&neigh)?;
-        }
-
-        Ok(())
-    }
-
-    pub fn add_one_arp_neighbor(&mut self, neigh: &ARPNeighbor) -> Result<()> {
-        let to_ip = match neigh.toIPAddress.as_ref() {
-            None => return nix_errno(Errno::EINVAL),
-            Some(v) => {
-                if v.address.is_empty() {
-                    return nix_errno(Errno::EINVAL);
-                }
-                v.address.as_ref()
-            }
-        };
-
-        let dev = self.find_link_by_name(&neigh.device)?;
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ndm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ndmsg) };
-
-        nlh.nlmsg_len = NLMSG_LENGTH!(std::mem::size_of::<ndmsg>()) as u32;
-        nlh.nlmsg_type = RTM_NEWNEIGH;
-        nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-        self.assign_seqnum(nlh);
-
-        ndm.ndm_family = libc::AF_UNSPEC as __u8;
-        ndm.ndm_state = IFA_F_PERMANENT as __u16;
-        // process lladdr
-        if neigh.lladdr != "" {
-            let llabuf = parser::parse_mac_addr(&neigh.lladdr)?;
-
-            // Safe because we have allocated enough buffer space.
-            unsafe { nlh.addattr_var(NDA_LLADDR, llabuf.as_ref()) };
-        }
-
-        let (family, ip_data) = parser::parse_ip_addr_with_family(&to_ip)?;
-        ndm.ndm_family = family;
-        // Safe because we have allocated enough buffer space.
-        unsafe { nlh.addattr_var(NDA_DST, ip_data.as_ref()) };
-
-        // process state
-        if neigh.state != 0 {
-            ndm.ndm_state = neigh.state as __u16;
-        }
-
-        // process flags
-        ndm.ndm_flags = (*ndm).ndm_flags | neigh.flags as __u8;
-
-        // process dev
-        ndm.ndm_ifindex = dev.ifi_index;
-
-        // send
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        Ok(())
-    }
-}
-
-impl TryFrom<IPAddress> for RtIPAddr {
-    type Error = nix::Error;
-
-    fn try_from(ipi: IPAddress) -> std::result::Result<Self, Self::Error> {
-        let ip_family = if ipi.family == IPFamily::v4 {
-            libc::AF_INET
-        } else {
-            libc::AF_INET6
-        } as __u8;
-
-        let ip_mask = parser::parse_u8(ipi.mask.as_str(), 10)?;
-        let addr = parser::parse_ip_addr(ipi.address.as_ref())?;
-
-        Ok(Self {
-            ip_family,
-            ip_mask,
-            addr,
-        })
-    }
-}
-
-impl TryFrom<Route> for RtRoute {
-    type Error = nix::Error;
-
-    fn try_from(r: Route) -> std::result::Result<Self, Self::Error> {
-        // only handle ipv4
-
-        let index = {
-            let mut rh = RtnlHandle::new(NETLINK_ROUTE, 0)?;
-            match rh.find_link_by_name(r.device.as_str()) {
-                Ok(ifi) => ifi.ifi_index,
-                Err(_) => -1,
-            }
-        };
-
-        let (dest, dst_len) = if r.dest.is_empty() {
-            (Some(vec![0 as u8; 4]), 0)
-        } else {
-            let (dst, mask) = parser::parse_cidr(r.dest.as_str())?;
-            (Some(dst), mask)
-        };
-
-        let (source, src_len) = if r.source.is_empty() {
-            (None, 0)
-        } else {
-            let (src, mask) = parser::parse_cidr(r.source.as_str())?;
-            (Some(src), mask)
-        };
-
-        let gateway = if r.gateway.is_empty() {
-            None
-        } else {
-            Some(parser::parse_ip_addr(r.gateway.as_str())?)
-        };
-
-        Ok(Self {
-            dest,
-            source,
-            src_len,
-            dst_len,
-            index,
-            gateway,
-            scope: r.scope as u8,
-            protocol: RTPROTO_UNSPEC,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{RtnlHandle, NETLINK_ROUTE};
-    use protocols::types::IPAddress;
-    use std::process::Command;
-
-    fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
-        // ip link delete dummy
-        Command::new("ip")
-            .args(&["link", "delete", dummy_name])
-            .output()
-            .expect("prepare: failed to delete dummy");
-
-        // ip neigh del dev dummy ip
-        Command::new("ip")
-            .args(&["neigh", "del", dummy_name, ip])
-            .output()
-            .expect("prepare: failed to delete neigh");
-    }
-
-    fn prepare_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
-        clean_env_for_test_add_one_arp_neighbor(dummy_name, ip);
-        // modprobe dummy
-        Command::new("modprobe")
-            .arg("dummy")
-            .output()
-            .expect("failed to run modprobe dummy");
-
-        // ip link add dummy type dummy
-        Command::new("ip")
-            .args(&["link", "add", dummy_name, "type", "dummy"])
-            .output()
-            .expect("failed to add dummy interface");
-
-        // ip addr add 192.168.0.2/16 dev dummy
-        Command::new("ip")
-            .args(&["addr", "add", "192.168.0.2/16", "dev", dummy_name])
-            .output()
-            .expect("failed to add ip for dummy");
-
-        // ip link set dummy up;
-        Command::new("ip")
-            .args(&["link", "set", dummy_name, "up"])
-            .output()
-            .expect("failed to up dummy");
-    }
-
-    #[test]
-    fn test_add_one_arp_neighbor() {
-        // skip_if_not_root
-        if !nix::unistd::Uid::effective().is_root() {
-            println!("INFO: skipping {} which needs root", module_path!());
-            return;
-        }
-
-        let mac = "6a:92:3a:59:70:aa";
-        let to_ip = "169.254.1.1";
-        let dummy_name = "dummy_for_arp";
-
-        prepare_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
-
-        let mut ip_address = IPAddress::new();
-        ip_address.set_address(to_ip.to_string());
-
-        let mut neigh = ARPNeighbor::new();
-        neigh.set_toIPAddress(ip_address);
-        neigh.set_device(dummy_name.to_string());
-        neigh.set_lladdr(mac.to_string());
-        neigh.set_state(0x80);
-
-        let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
-
-        rtnl.add_one_arp_neighbor(&neigh).unwrap();
-
-        // ip neigh show dev dummy ip
-        let stdout = Command::new("ip")
-            .args(&["neigh", "show", "dev", dummy_name, to_ip])
-            .output()
-            .expect("failed to show neigh")
-            .stdout;
-
-        let stdout = std::str::from_utf8(&stdout).expect("failed to conveert stdout");
-
-        assert_eq!(stdout, format!("{} lladdr {} PERMANENT\n", to_ip, mac));
-
-        clean_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
-    }
-}
--- a/src/agent/netlink/src/lib.rs
+++ b/src/agent/netlink/src/lib.rs
--- a/src/agent/netlink/src/parser.rs
+++ b/src/agent/netlink/src/parser.rs
@@ -1,201 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-
-//! Parser for IPv4/IPv6/MAC addresses.
-
-use std::net::{Ipv4Addr, Ipv6Addr};
-use std::str::FromStr;
-
-use super::{Errno, Result, __u8, nix_errno};
-
-#[inline]
-pub(crate) fn parse_u8(s: &str, radix: u32) -> Result<u8> {
-    if radix >= 2 && radix <= 36 {
-        u8::from_str_radix(s, radix).map_err(|_| nix::Error::Sys(Errno::EINVAL))
-    } else {
-        u8::from_str(s).map_err(|_| nix::Error::Sys(Errno::EINVAL))
-    }
-}
-
-pub fn parse_ipv4_addr(s: &str) -> Result<Vec<u8>> {
-    match Ipv4Addr::from_str(s) {
-        Ok(v) => Ok(Vec::from(v.octets().as_ref())),
-        Err(_e) => nix_errno(Errno::EINVAL),
-    }
-}
-
-pub fn parse_ip_addr(s: &str) -> Result<Vec<u8>> {
-    if let Ok(v6) = Ipv6Addr::from_str(s) {
-        Ok(Vec::from(v6.octets().as_ref()))
-    } else {
-        parse_ipv4_addr(s)
-    }
-}
-
-pub fn parse_ip_addr_with_family(ip_address: &str) -> Result<(__u8, Vec<u8>)> {
-    if let Ok(v6) = Ipv6Addr::from_str(ip_address) {
-        Ok((libc::AF_INET6 as __u8, Vec::from(v6.octets().as_ref())))
-    } else {
-        parse_ipv4_addr(ip_address).map(|v| (libc::AF_INET as __u8, v))
-    }
-}
-
-pub fn parse_ipv4_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
-    let fields: Vec<&str> = s.split('/').collect();
-
-    if fields.len() != 2 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok((parse_ipv4_addr(fields[0])?, parse_u8(fields[1], 10)?))
-    }
-}
-
-pub fn parse_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
-    let fields: Vec<&str> = s.split('/').collect();
-
-    if fields.len() != 2 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok((parse_ip_addr(fields[0])?, parse_u8(fields[1], 10)?))
-    }
-}
-
-pub fn parse_mac_addr(hwaddr: &str) -> Result<Vec<u8>> {
-    let fields: Vec<&str> = hwaddr.split(':').collect();
-
-    if fields.len() != 6 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok(vec![
-            parse_u8(fields[0], 16)?,
-            parse_u8(fields[1], 16)?,
-            parse_u8(fields[2], 16)?,
-            parse_u8(fields[3], 16)?,
-            parse_u8(fields[4], 16)?,
-            parse_u8(fields[5], 16)?,
-        ])
-    }
-}
-
-/// Format an IPv4/IPv6/MAC address.
-///
-/// # Safety
-/// Caller needs to ensure that addr and len are valid.
-pub unsafe fn format_address(addr: *const u8, len: u32) -> Result<String> {
-    let mut a: String;
-    if len == 4 {
-        // ipv4
-        let mut i = 1;
-        let mut p = addr as i64;
-
-        a = format!("{}", *(p as *const u8));
-        while i < len {
-            p += 1;
-            i += 1;
-            a.push_str(format!(".{}", *(p as *const u8)).as_str());
-        }
-
-        return Ok(a);
-    }
-
-    if len == 6 {
-        // hwaddr
-        let mut i = 1;
-        let mut p = addr as i64;
-
-        a = format!("{:0>2X}", *(p as *const u8));
-        while i < len {
-            p += 1;
-            i += 1;
-            a.push_str(format!(":{:0>2X}", *(p as *const u8)).as_str());
-        }
-
-        return Ok(a);
-    }
-
-    if len == 16 {
-        // ipv6
-        let p = addr as *const u8 as *const libc::c_void;
-        let mut ar: [u8; 16] = [0; 16];
-        let mut v: Vec<u8> = vec![0; 16];
-        let dp: *mut libc::c_void = v.as_mut_ptr() as *mut libc::c_void;
-        libc::memcpy(dp, p, 16);
-
-        ar.copy_from_slice(v.as_slice());
-
-        return Ok(Ipv6Addr::from(ar).to_string());
-    }
-
-    nix_errno(Errno::EINVAL)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use libc;
-
-    #[test]
-    fn test_ip_addr() {
-        let ip = parse_ipv4_addr("1.2.3.4").unwrap();
-        assert_eq!(ip, vec![0x1u8, 0x2u8, 0x3u8, 0x4u8]);
-        parse_ipv4_addr("1.2.3.4.5").unwrap_err();
-        parse_ipv4_addr("1.2.3-4").unwrap_err();
-        parse_ipv4_addr("1.2.3.a").unwrap_err();
-        parse_ipv4_addr("1.2.3.x").unwrap_err();
-        parse_ipv4_addr("-1.2.3.4").unwrap_err();
-        parse_ipv4_addr("+1.2.3.4").unwrap_err();
-
-        let (family, _) = parse_ip_addr_with_family("192.168.1.1").unwrap();
-        assert_eq!(family, libc::AF_INET as __u8);
-
-        let (family, ip) =
-            parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap();
-        assert_eq!(family, libc::AF_INET6 as __u8);
-        assert_eq!(ip.len(), 16);
-        parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:73345").unwrap_err();
-
-        let ip = parse_ip_addr("::1").unwrap();
-        assert_eq!(ip[0], 0x0);
-        assert_eq!(ip[15], 0x1);
-    }
-
-    #[test]
-    fn test_parse_cidr() {
-        let (_, mask) = parse_ipv4_cidr("1.2.3.4/31").unwrap();
-        assert_eq!(mask, 31);
-
-        parse_ipv4_cidr("1.2.3/4/31").unwrap_err();
-        parse_ipv4_cidr("1.2.3.4/f").unwrap_err();
-        parse_ipv4_cidr("1.2.3/8").unwrap_err();
-        parse_ipv4_cidr("1.2.3.4.8").unwrap_err();
-
-        let (ip, mask) = parse_cidr("2001:db8:a::123/64").unwrap();
-        assert_eq!(mask, 64);
-        assert_eq!(ip[0], 0x20);
-        assert_eq!(ip[15], 0x23);
-    }
-
-    #[test]
-    fn test_parse_mac_addr() {
-        let mac = parse_mac_addr("FF:FF:FF:FF:FF:FE").unwrap();
-        assert_eq!(mac.len(), 6);
-        assert_eq!(mac[0], 0xff);
-        assert_eq!(mac[5], 0xfe);
-
-        parse_mac_addr("FF:FF:FF:FF:FF:FE:A0").unwrap_err();
-        parse_mac_addr("FF:FF:FF:FF:FF:FX").unwrap_err();
-        parse_mac_addr("FF:FF:FF:FF:FF").unwrap_err();
-    }
-
-    #[test]
-    fn test_format_address() {
-        let buf = [1u8, 2u8, 3u8, 4u8];
-        let addr = unsafe { format_address(&buf as *const u8, 4).unwrap() };
-        assert_eq!(addr, "1.2.3.4");
-
-        let buf = [1u8, 2u8, 3u8, 4u8, 5u8, 6u8];
-        let addr = unsafe { format_address(&buf as *const u8, 6).unwrap() };
-        assert_eq!(addr, "01:02:03:04:05:06");
-    }
-}
--- a/src/agent/oci/src/lib.rs
+++ b/src/agent/oci/src/lib.rs
@@ -8,7 +8,7 @@ extern crate serde;
 extern crate serde_derive;
 extern crate serde_json;

-use libc::mode_t;
+use libc::{self, mode_t};
 use std::collections::HashMap;

 mod serialize;
@@ -27,6 +27,10 @@ where
    *d == T::default()
 }

+fn default_seccomp_errno() -> u32 {
+    libc::EPERM as u32
+}
+
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Spec {
    #[serde(
@@ -54,7 +58,7 @@ pub struct Spec {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub windows: Option<Windows<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub vm: Option<VM>,
+    pub vm: Option<Vm>,
 }

 impl Spec {
@@ -67,7 +71,7 @@ impl Spec {
    }
 }

-pub type LinuxRlimit = POSIXRlimit;
+pub type LinuxRlimit = PosixRlimit;

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Process {
@@ -89,7 +93,7 @@ pub struct Process {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub capabilities: Option<LinuxCapabilities>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub rlimits: Vec<POSIXRlimit>,
+    pub rlimits: Vec<PosixRlimit>,
    #[serde(default, rename = "noNewPrivileges")]
    pub no_new_privileges: bool,
    #[serde(
@@ -195,9 +199,9 @@ pub struct Hooks {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Linux {
    #[serde(default, rename = "uidMappings", skip_serializing_if = "Vec::is_empty")]
-    pub uid_mappings: Vec<LinuxIDMapping>,
+    pub uid_mappings: Vec<LinuxIdMapping>,
    #[serde(default, rename = "gidMappings", skip_serializing_if = "Vec::is_empty")]
-    pub gid_mappings: Vec<LinuxIDMapping>,
+    pub gid_mappings: Vec<LinuxIdMapping>,
    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
    pub sysctl: HashMap<String, String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -257,7 +261,7 @@ pub const UTSNAMESPACE: &str = "uts";
 pub const CGROUPNAMESPACE: &str = "cgroup";

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxIDMapping {
+pub struct LinuxIdMapping {
    #[serde(default, rename = "containerID")]
    pub container_id: u32,
    #[serde(default, rename = "hostID")]
@@ -267,7 +271,7 @@ pub struct LinuxIDMapping {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct POSIXRlimit {
+pub struct PosixRlimit {
    #[serde(default)]
    pub r#type: String,
    #[serde(default)]
@@ -293,7 +297,7 @@ pub struct LinuxInterfacePriority {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxBlockIODevice {
+pub struct LinuxBlockIoDevice {
    #[serde(default)]
    pub major: i64,
    #[serde(default)]
@@ -303,7 +307,7 @@ pub struct LinuxBlockIODevice {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct LinuxWeightDevice {
    #[serde(flatten)]
-    pub blk: LinuxBlockIODevice,
+    pub blk: LinuxBlockIoDevice,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub weight: Option<u16>,
    #[serde(
@@ -317,13 +321,13 @@ pub struct LinuxWeightDevice {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct LinuxThrottleDevice {
    #[serde(flatten)]
-    pub blk: LinuxBlockIODevice,
+    pub blk: LinuxBlockIoDevice,
    #[serde(default)]
    pub rate: u64,
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxBlockIO {
+pub struct LinuxBlockIo {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub weight: Option<u16>,
    #[serde(
@@ -387,7 +391,7 @@ pub struct LinuxMemory {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxCPU {
+pub struct LinuxCpu {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub shares: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -449,11 +453,11 @@ pub struct LinuxResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub memory: Option<LinuxMemory>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub cpu: Option<LinuxCPU>,
+    pub cpu: Option<LinuxCpu>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub pids: Option<LinuxPids>,
    #[serde(skip_serializing_if = "Option::is_none", rename = "blockIO")]
-    pub block_io: Option<LinuxBlockIO>,
+    pub block_io: Option<LinuxBlockIo>,
    #[serde(
        default,
        skip_serializing_if = "Vec::is_empty",
@@ -513,7 +517,7 @@ pub struct Solaris {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub anet: Vec<SolarisAnet>,
    #[serde(default, skip_serializing_if = "Option::is_none", rename = "cappedCPU")]
-    pub capped_cpu: Option<SolarisCappedCPU>,
+    pub capped_cpu: Option<SolarisCappedCpu>,
    #[serde(
        default,
        skip_serializing_if = "Option::is_none",
@@ -523,7 +527,7 @@ pub struct Solaris {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct SolarisCappedCPU {
+pub struct SolarisCappedCpu {
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub ncpus: String,
 }
@@ -601,7 +605,7 @@ pub struct WindowsResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub memory: Option<WindowsMemoryResources>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub cpu: Option<WindowsCPUResources>,
+    pub cpu: Option<WindowsCpuResources>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub storage: Option<WindowsStorageResources>,
 }
@@ -613,7 +617,7 @@ pub struct WindowsMemoryResources {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct WindowsCPUResources {
+pub struct WindowsCpuResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub count: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -671,14 +675,14 @@ pub struct WindowsHyperV {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VM {
-    pub hypervisor: VMHypervisor,
-    pub kernel: VMKernel,
-    pub image: VMImage,
+pub struct Vm {
+    pub hypervisor: VmHypervisor,
+    pub kernel: VmKernel,
+    pub image: VmImage,
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMHypervisor {
+pub struct VmHypervisor {
    #[serde(default)]
    pub path: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -686,7 +690,7 @@ pub struct VMHypervisor {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMKernel {
+pub struct VmKernel {
    #[serde(default)]
    pub path: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -696,7 +700,7 @@ pub struct VMKernel {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMImage {
+pub struct VmImage {
    #[serde(default)]
    pub path: String,
    #[serde(default)]
@@ -710,6 +714,8 @@ pub struct LinuxSeccomp {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub architectures: Vec<Arch>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub flags: Vec<LinuxSeccompFlag>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub syscalls: Vec<LinuxSyscall>,
 }

@@ -733,14 +739,20 @@ pub const ARCHS390: &str = "SCMP_ARCH_S390";
 pub const ARCHS390X: &str = "SCMP_ARCH_S390X";
 pub const ARCHPARISC: &str = "SCMP_ARCH_PARISC";
 pub const ARCHPARISC64: &str = "SCMP_ARCH_PARISC64";
+pub const ARCHRISCV64: &str = "SCMP_ARCH_RISCV64";
+
+pub type LinuxSeccompFlag = String;

 pub type LinuxSeccompAction = String;

 pub const ACTKILL: &str = "SCMP_ACT_KILL";
+pub const ACTKILLPROCESS: &str = "SCMP_ACT_KILL_PROCESS";
+pub const ACTKILLTHREAD: &str = "SCMP_ACT_KILL_THREAD";
 pub const ACTTRAP: &str = "SCMP_ACT_TRAP";
 pub const ACTERRNO: &str = "SCMP_ACT_ERRNO";
 pub const ACTTRACE: &str = "SCMP_ACT_TRACE";
 pub const ACTALLOW: &str = "SCMP_ACT_ALLOW";
+pub const ACTLOG: &str = "SCMP_ACT_LOG";

 pub type LinuxSeccompOperator = String;

@@ -770,6 +782,8 @@ pub struct LinuxSyscall {
    pub names: Vec<String>,
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub action: LinuxSeccompAction,
+    #[serde(default = "default_seccomp_errno", rename = "errnoRet")]
+    pub errno_ret: u32,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub args: Vec<LinuxSeccompArg>,
 }
@@ -784,7 +798,17 @@ pub struct LinuxIntelRdt {
    pub l3_cache_schema: String,
 }

-#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ContainerState {
+    Creating,
+    Created,
+    Running,
+    Stopped,
+    Paused,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct State {
    #[serde(
        default,
@@ -794,8 +818,7 @@ pub struct State {
    pub version: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub id: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    pub status: String,
+    pub status: ContainerState,
    #[serde(default)]
    pub pid: i32,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -806,6 +829,8 @@ pub struct State {

 #[cfg(test)]
 mod tests {
+    use super::*;
+
    #[test]
    fn test_deserialize_state() {
        let data = r#"{
@@ -818,10 +843,10 @@ mod tests {
                "myKey": "myValue"
            }
        }"#;
-        let expected = crate::State {
+        let expected = State {
            version: "0.2.0".to_string(),
            id: "oci-container1".to_string(),
-            status: "running".to_string(),
+            status: ContainerState::Running,
            pid: 4422,
            bundle: "/containers/redis".to_string(),
            annotations: [("myKey".to_string(), "myValue".to_string())]
@@ -1246,12 +1271,12 @@ mod tests {
                    ambient: vec!["CAP_NET_BIND_SERVICE".to_string()],
                }),
                rlimits: vec![
-                    crate::POSIXRlimit {
+                    crate::PosixRlimit {
                        r#type: "RLIMIT_CORE".to_string(),
                        hard: 1024,
                        soft: 1024,
                    },
-                    crate::POSIXRlimit {
+                    crate::PosixRlimit {
                        r#type: "RLIMIT_NOFILE".to_string(),
                        hard: 1024,
                        soft: 1024,
@@ -1383,12 +1408,12 @@ mod tests {
            .cloned()
            .collect(),
            linux: Some(crate::Linux {
-                uid_mappings: vec![crate::LinuxIDMapping {
+                uid_mappings: vec![crate::LinuxIdMapping {
                    container_id: 0,
                    host_id: 1000,
                    size: 32000,
                }],
-                gid_mappings: vec![crate::LinuxIDMapping {
+                gid_mappings: vec![crate::LinuxIdMapping {
                    container_id: 0,
                    host_id: 1000,
                    size: 32000,
@@ -1433,7 +1458,7 @@ mod tests {
                        swappiness: Some(0),
                        disable_oom_killer: Some(false),
                    }),
-                    cpu: Some(crate::LinuxCPU {
+                    cpu: Some(crate::LinuxCpu {
                        shares: Some(1024),
                        quota: Some(1000000),
                        period: Some(500000),
@@ -1443,17 +1468,17 @@ mod tests {
                        mems: "0-7".to_string(),
                    }),
                    pids: Some(crate::LinuxPids { limit: 32771 }),
-                    block_io: Some(crate::LinuxBlockIO {
+                    block_io: Some(crate::LinuxBlockIo {
                        weight: Some(10),
                        leaf_weight: Some(10),
                        weight_device: vec![
                            crate::LinuxWeightDevice {
-                                blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
+                                blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
                                weight: Some(500),
                                leaf_weight: Some(300),
                            },
                            crate::LinuxWeightDevice {
-                                blk: crate::LinuxBlockIODevice {
+                                blk: crate::LinuxBlockIoDevice {
                                    major: 8,
                                    minor: 16,
                                },
@@ -1462,13 +1487,13 @@ mod tests {
                            },
                        ],
                        throttle_read_bps_device: vec![crate::LinuxThrottleDevice {
-                            blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
+                            blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
                            rate: 600,
                        }],
                        throttle_write_bps_device: vec![],
                        throttle_read_iops_device: vec![],
                        throttle_write_iops_device: vec![crate::LinuxThrottleDevice {
-                            blk: crate::LinuxBlockIODevice {
+                            blk: crate::LinuxBlockIoDevice {
                                major: 8,
                                minor: 16,
                            },
@@ -1554,9 +1579,11 @@ mod tests {
                seccomp: Some(crate::LinuxSeccomp {
                    default_action: "SCMP_ACT_ALLOW".to_string(),
                    architectures: vec!["SCMP_ARCH_X86".to_string(), "SCMP_ARCH_X32".to_string()],
+                    flags: vec![],
                    syscalls: vec![crate::LinuxSyscall {
                        names: vec!["getcwd".to_string(), "chmod".to_string()],
                        action: "SCMP_ACT_ERRNO".to_string(),
+                        errno_ret: crate::default_seccomp_errno(),
                        args: vec![],
                    }],
                }),
--- a/src/agent/protocols/Cargo.toml
+++ b/src/agent/protocols/Cargo.toml
@@ -5,9 +5,9 @@ authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
 edition = "2018"

 [dependencies]
-ttrpc = "0.3.0"
+ttrpc = { version = "0.5.0", features = ["async"] }
+async-trait = "0.1.42"
 protobuf = "=2.14.0"
-futures = "0.1.27"

 [build-dependencies]
-ttrpc-codegen = "0.1.2"
+ttrpc-codegen = "0.2.0"
--- a/src/agent/protocols/build.rs
+++ b/src/agent/protocols/build.rs
@@ -3,8 +3,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use std::fs::File;
-use std::io::{Read, Write};
+use std::fs;
+use ttrpc_codegen::{Codegen, Customize};

 fn main() {
    let protos = vec![
@@ -15,16 +15,15 @@ fn main() {
        "protos/oci.proto",
    ];

-    // Tell Cargo that if the .proto files changed, to rerun this build script.
-    protos
-        .iter()
-        .for_each(|p| println!("cargo:rerun-if-changed={}", &p));
-
-    ttrpc_codegen::Codegen::new()
+    Codegen::new()
        .out_dir("src")
        .inputs(&protos)
        .include("protos")
        .rust_protobuf()
+        .customize(Customize {
+            async_server: true,
+            ..Default::default()
+        })
        .run()
        .expect("Gen codes failed.");

@@ -40,16 +39,6 @@ fn main() {
 }

 fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> {
-    let mut src = File::open(file_name)?;
-    let mut contents = String::new();
-    src.read_to_string(&mut contents).unwrap();
-    drop(src);
-
-    let new_contents = contents.replace(from, to);
-
-    let mut dst = File::create(&file_name)?;
-    dst.write_all(new_contents.as_bytes())?;
-
-    Ok(())
+    let new_contents = fs::read_to_string(file_name)?.replace(from, to);
+    fs::write(&file_name, new_contents.as_bytes())
 }
-
--- a/src/agent/protocols/hack/update-generated-proto.sh
+++ b/src/agent/protocols/hack/update-generated-proto.sh
@@ -65,7 +65,7 @@ $GOPATH/src/github.com/kata-containers/kata-containers/src/agent/protocols/proto
 }

 if [ "$(basename $(pwd))" != "agent" ]; then
-	die "Please go to directory of protocols before execute this shell"
+	die "Please go to root directory of agent before execute this shell"
 fi

 # Protocol buffer files required to generate golang/rust bindings.
--- a/src/agent/protocols/protos/agent.proto
+++ b/src/agent/protocols/protos/agent.proto
@@ -32,7 +32,6 @@ service AgentService {
 	rpc ExecProcess(ExecProcessRequest) returns (google.protobuf.Empty);
 	rpc SignalProcess(SignalProcessRequest) returns (google.protobuf.Empty);
 	rpc WaitProcess(WaitProcessRequest) returns (WaitProcessResponse); // wait & reap like waitpid(2)
-	rpc ListProcesses(ListProcessesRequest) returns (ListProcessesResponse);
 	rpc UpdateContainer(UpdateContainerRequest) returns (google.protobuf.Empty);
 	rpc StatsContainer(StatsContainerRequest) returns (StatsContainerResponse);
 	rpc PauseContainer(PauseContainerRequest) returns (google.protobuf.Empty);
@@ -126,18 +125,6 @@ message WaitProcessResponse {
 	int32 status = 1;
 }

-// ListProcessesRequest contains the options used to list running processes inside the container
-message ListProcessesRequest {
-	string container_id = 1;
-	string format = 2;
-	repeated string args = 3;
-}
-
-// ListProcessesResponse represents the list of running processes inside the container
-message ListProcessesResponse {
-	bytes process_list = 1;
-}
-
 message UpdateContainerRequest {
 	string container_id = 1;
 	LinuxResources resources = 2;
--- a/src/agent/protocols/protos/oci.proto
+++ b/src/agent/protocols/protos/oci.proto
@@ -441,7 +441,8 @@ message LinuxInterfacePriority {
 message LinuxSeccomp {
 	string DefaultAction = 1;
 	repeated string Architectures = 2;
-	repeated LinuxSyscall Syscalls = 3  [(gogoproto.nullable) = false];
+	repeated string Flags = 3;
+	repeated LinuxSyscall Syscalls = 4  [(gogoproto.nullable) = false];
 }

 message LinuxSeccompArg {
@@ -454,7 +455,10 @@ message LinuxSeccompArg {
 message LinuxSyscall {
 	repeated string Names = 1;
 	string Action = 2;
-	repeated LinuxSeccompArg Args = 3  [(gogoproto.nullable) = false];
+	oneof ErrnoRet {
+		uint32 errnoret = 3;
+	}
+	repeated LinuxSeccompArg Args = 4  [(gogoproto.nullable) = false];
 }

 message LinuxIntelRdt {
--- a/src/agent/protocols/protos/types.proto
+++ b/src/agent/protocols/protos/types.proto
@@ -29,10 +29,8 @@ message Interface {
 	uint64 mtu = 4;
 	string hwAddr = 5;

-	// pciAddr is the PCI address in the format  "bridgeAddr/deviceAddr".
-	// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
-	// while deviceAddr is the address at which the network device is attached on the bridge.
-	string pciAddr = 6;
+	// PCI path for the device (see the pci::Path (Rust) or types.PciPath (Go) type for format details)
+	string pciPath = 6;

 	// Type defines the type of interface described by this structure.
 	// The expected values are the one that are defined by the netlink
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -10,23 +10,27 @@ serde_json = "1.0.39"
 serde_derive = "1.0.91"
 oci = { path = "../oci" }
 protocols = { path ="../protocols" }
-caps = "0.3.0"
-nix = "0.17.0"
+caps = "0.5.0"
+nix = "0.21.0"
 scopeguard = "1.0.0"
-prctl = "1.0.0"
+capctl = "0.2.0"
 lazy_static = "1.3.0"
 libc = "0.2.58"
-protobuf = "2.8.1"
+protobuf = "=2.14.0"
 slog = "2.5.2"
 slog-scope = "4.1.2"
 scan_fmt = "0.2"
 regex = "1.1"
 path-absolutize = "1.2.0"
-dirs = "3.0.1"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.0" }
-tempfile = "3.1.0"
-epoll = "4.3.1"
+cgroups = { package = "cgroups-rs", version = "0.2.5" }
+rlimit = "0.5.3"
+
+tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
+futures = "0.3"
+async-trait = "0.1.31"
+inotify = "0.9.2"

 [dev-dependencies]
 serial_test = "0.5.0"
+tempfile = "3.1.0"
--- a/src/agent/rustjail/src/capabilities.rs
+++ b/src/agent/rustjail/src/capabilities.rs
@@ -9,97 +9,44 @@
 use crate::log_child;
 use crate::sync::write_count;
 use anyhow::{anyhow, Result};
-use caps::{self, CapSet, Capability, CapsHashSet};
+use caps::{self, runtime, CapSet, Capability, CapsHashSet};
 use oci::LinuxCapabilities;
-use std::collections::HashMap;
 use std::os::unix::io::RawFd;
-
-lazy_static! {
-    pub static ref CAPSMAP: HashMap<String, Capability> = {
-        let mut m = HashMap::new();
-        m.insert("CAP_CHOWN".to_string(), Capability::CAP_CHOWN);
-        m.insert("CAP_DAC_OVERRIDE".to_string(), Capability::CAP_DAC_OVERRIDE);
-        m.insert(
-            "CAP_DAC_READ_SEARCH".to_string(),
-            Capability::CAP_DAC_READ_SEARCH,
-        );
-        m.insert("CAP_FOWNER".to_string(), Capability::CAP_FOWNER);
-        m.insert("CAP_FSETID".to_string(), Capability::CAP_FSETID);
-        m.insert("CAP_KILL".to_string(), Capability::CAP_KILL);
-        m.insert("CAP_SETGID".to_string(), Capability::CAP_SETGID);
-        m.insert("CAP_SETUID".to_string(), Capability::CAP_SETUID);
-        m.insert("CAP_SETPCAP".to_string(), Capability::CAP_SETPCAP);
-        m.insert(
-            "CAP_LINUX_IMMUTABLE".to_string(),
-            Capability::CAP_LINUX_IMMUTABLE,
-        );
-        m.insert(
-            "CAP_NET_BIND_SERVICE".to_string(),
-            Capability::CAP_NET_BIND_SERVICE,
-        );
-        m.insert(
-            "CAP_NET_BROADCAST".to_string(),
-            Capability::CAP_NET_BROADCAST,
-        );
-        m.insert("CAP_NET_ADMIN".to_string(), Capability::CAP_NET_ADMIN);
-        m.insert("CAP_NET_RAW".to_string(), Capability::CAP_NET_RAW);
-        m.insert("CAP_IPC_LOCK".to_string(), Capability::CAP_IPC_LOCK);
-        m.insert("CAP_IPC_OWNER".to_string(), Capability::CAP_IPC_OWNER);
-        m.insert("CAP_SYS_MODULE".to_string(), Capability::CAP_SYS_MODULE);
-        m.insert("CAP_SYS_RAWIO".to_string(), Capability::CAP_SYS_RAWIO);
-        m.insert("CAP_SYS_CHROOT".to_string(), Capability::CAP_SYS_CHROOT);
-        m.insert("CAP_SYS_PTRACE".to_string(), Capability::CAP_SYS_PTRACE);
-        m.insert("CAP_SYS_PACCT".to_string(), Capability::CAP_SYS_PACCT);
-        m.insert("CAP_SYS_ADMIN".to_string(), Capability::CAP_SYS_ADMIN);
-        m.insert("CAP_SYS_BOOT".to_string(), Capability::CAP_SYS_BOOT);
-        m.insert("CAP_SYS_NICE".to_string(), Capability::CAP_SYS_NICE);
-        m.insert("CAP_SYS_RESOURCE".to_string(), Capability::CAP_SYS_RESOURCE);
-        m.insert("CAP_SYS_TIME".to_string(), Capability::CAP_SYS_TIME);
-        m.insert(
-            "CAP_SYS_TTY_CONFIG".to_string(),
-            Capability::CAP_SYS_TTY_CONFIG,
-        );
-        m.insert("CAP_MKNOD".to_string(), Capability::CAP_MKNOD);
-        m.insert("CAP_LEASE".to_string(), Capability::CAP_LEASE);
-        m.insert("CAP_AUDIT_WRITE".to_string(), Capability::CAP_AUDIT_WRITE);
-        m.insert("CAP_AUDIT_CONTROL".to_string(), Capability::CAP_AUDIT_WRITE);
-        m.insert("CAP_SETFCAP".to_string(), Capability::CAP_SETFCAP);
-        m.insert("CAP_MAC_OVERRIDE".to_string(), Capability::CAP_MAC_OVERRIDE);
-        m.insert("CAP_SYSLOG".to_string(), Capability::CAP_SYSLOG);
-        m.insert("CAP_WAKE_ALARM".to_string(), Capability::CAP_WAKE_ALARM);
-        m.insert(
-            "CAP_BLOCK_SUSPEND".to_string(),
-            Capability::CAP_BLOCK_SUSPEND,
-        );
-        m.insert("CAP_AUDIT_READ".to_string(), Capability::CAP_AUDIT_READ);
-        m
-    };
-}
+use std::str::FromStr;

 fn to_capshashset(cfd_log: RawFd, caps: &[String]) -> CapsHashSet {
    let mut r = CapsHashSet::new();

    for cap in caps.iter() {
-        let c = CAPSMAP.get(cap);
-
-        if c.is_none() {
-            log_child!(cfd_log, "{} is not a cap", cap);
-            continue;
-        }
-
-        r.insert(*c.unwrap());
+        match Capability::from_str(cap) {
+            Err(_) => {
+                log_child!(cfd_log, "{} is not a cap", cap);
+                continue;
+            }
+            Ok(c) => r.insert(c),
+        };
    }

    r
 }

+pub fn get_all_caps() -> CapsHashSet {
+    let mut caps_set =
+        runtime::procfs_all_supported(None).unwrap_or_else(|_| runtime::thread_all_supported());
+    if caps_set.is_empty() {
+        caps_set = caps::all();
+    }
+    caps_set
+}
+
 pub fn reset_effective() -> Result<()> {
-    caps::set(None, CapSet::Effective, caps::all()).map_err(|e| anyhow!(e.to_string()))?;
+    let all = get_all_caps();
+    caps::set(None, CapSet::Effective, &all).map_err(|e| anyhow!(e.to_string()))?;
    Ok(())
 }

 pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
-    let all = caps::all();
+    let all = get_all_caps();

    for c in all.difference(&to_capshashset(cfd_log, caps.bounding.as_ref())) {
        caps::drop(None, CapSet::Bounding, *c).map_err(|e| anyhow!(e.to_string()))?;
@@ -108,26 +55,26 @@ pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
    caps::set(
        None,
        CapSet::Effective,
-        to_capshashset(cfd_log, caps.effective.as_ref()),
+        &to_capshashset(cfd_log, caps.effective.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;
    caps::set(
        None,
        CapSet::Permitted,
-        to_capshashset(cfd_log, caps.permitted.as_ref()),
+        &to_capshashset(cfd_log, caps.permitted.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;
    caps::set(
        None,
        CapSet::Inheritable,
-        to_capshashset(cfd_log, caps.inheritable.as_ref()),
+        &to_capshashset(cfd_log, caps.inheritable.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;

    let _ = caps::set(
        None,
        CapSet::Ambient,
-        to_capshashset(cfd_log, caps.ambient.as_ref()),
+        &to_capshashset(cfd_log, caps.ambient.as_ref()),
    )
    .map_err(|_| log_child!(cfd_log, "failed to set ambient capability"));

--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -24,7 +24,7 @@ use anyhow::{anyhow, Context, Result};
 use libc::{self, pid_t};
 use nix::errno::Errno;
 use oci::{
-    LinuxBlockIO, LinuxCPU, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
+    LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
    LinuxNetwork, LinuxPids, LinuxResources,
 };

@@ -37,6 +37,8 @@ use std::collections::HashMap;
 use std::fs;
 use std::path::Path;

+const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online";
+
 // Convenience macro to obtain the scope logger
 macro_rules! sl {
    () => {
@@ -60,7 +62,6 @@ pub struct Manager {
    pub cpath: String,
    #[serde(skip)]
    cgroup: cgroups::Cgroup,
-    relative_paths: HashMap<String, String>,
 }

 // set_resource is used to set reources by cgroup controller.
@@ -104,21 +105,21 @@ impl CgroupManager for Manager {

        // set block_io resources
        if let Some(blkio) = &r.block_io {
-            set_block_io_resources(&self.cgroup, blkio, res)?;
+            set_block_io_resources(&self.cgroup, blkio, res);
        }

        // set hugepages resources
        if !r.hugepage_limits.is_empty() {
-            set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res)?;
+            set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res);
        }

        // set network resources
        if let Some(network) = &r.network {
-            set_network_resources(&self.cgroup, network, res)?;
+            set_network_resources(&self.cgroup, network, res);
        }

        // set devices resources
-        set_devices_resources(&self.cgroup, &r.devices, res)?;
+        set_devices_resources(&self.cgroup, &r.devices, res);
        info!(sl!(), "resources after processed {:?}", res);

        // apply resources
@@ -199,7 +200,7 @@ fn set_network_resources(
    _cg: &cgroups::Cgroup,
    network: &LinuxNetwork,
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set network");

    // set classid
@@ -220,14 +221,13 @@ fn set_network_resources(
    }

    res.network.priorities = priorities;
-    Ok(())
 }

 fn set_devices_resources(
    _cg: &cgroups::Cgroup,
    device_resources: &[LinuxDeviceCgroup],
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set devices");
    let mut devices = vec![];

@@ -250,15 +250,13 @@ fn set_devices_resources(
    }

    res.devices.devices = devices;
-
-    Ok(())
 }

 fn set_hugepages_resources(
    _cg: &cgroups::Cgroup,
    hugepage_limits: &[LinuxHugepageLimit],
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set hugepage");
    let mut limits = vec![];

@@ -270,41 +268,25 @@ fn set_hugepages_resources(
        limits.push(hr);
    }
    res.hugepages.limits = limits;
-
-    Ok(())
 }

 fn set_block_io_resources(
-    cg: &cgroups::Cgroup,
-    blkio: &LinuxBlockIO,
+    _cg: &cgroups::Cgroup,
+    blkio: &LinuxBlockIo,
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set block io");

-    if cg.v2() {
-        res.blkio.weight = convert_blk_io_to_v2_value(blkio.weight);
-        res.blkio.leaf_weight = convert_blk_io_to_v2_value(blkio.leaf_weight);
-    } else {
-        res.blkio.weight = blkio.weight;
-        res.blkio.leaf_weight = blkio.leaf_weight;
-    }
+    res.blkio.weight = blkio.weight;
+    res.blkio.leaf_weight = blkio.leaf_weight;

    let mut blk_device_resources = vec![];
    for d in blkio.weight_device.iter() {
-        let (w, lw) = if cg.v2() {
-            (
-                convert_blk_io_to_v2_value(blkio.weight),
-                convert_blk_io_to_v2_value(blkio.leaf_weight),
-            )
-        } else {
-            (blkio.weight, blkio.leaf_weight)
-        };
-
        let dr = BlkIoDeviceResource {
            major: d.blk.major as u64,
            minor: d.blk.minor as u64,
-            weight: w,
-            leaf_weight: lw,
+            weight: blkio.weight,
+            leaf_weight: blkio.leaf_weight,
        };
        blk_device_resources.push(dr);
    }
@@ -318,11 +300,9 @@ fn set_block_io_resources(
        build_blk_io_device_throttle_resource(&blkio.throttle_read_iops_device);
    res.blkio.throttle_write_iops_device =
        build_blk_io_device_throttle_resource(&blkio.throttle_write_iops_device);
-
-    Ok(())
 }

-fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
+fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCpu) -> Result<()> {
    info!(sl!(), "cgroup manager set cpu");

    let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
@@ -369,14 +349,34 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        mem_controller.set_kmem_limit(-1)?;
    }

-    set_resource!(mem_controller, set_limit, memory, limit);
-    set_resource!(mem_controller, set_soft_limit, memory, reservation);
-    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
-    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+    // If the memory update is set to -1 we should also
+    // set swap to -1, it means unlimited memory.
+    let mut swap = memory.swap.unwrap_or(0);
+    if memory.limit == Some(-1) {
+        swap = -1;
+    }

-    if let Some(swap) = memory.swap {
-        // set memory swap
-        let swap = if cg.v2() {
+    if memory.limit.is_some() && swap != 0 {
+        let memstat = get_memory_stats(cg)
+            .into_option()
+            .ok_or_else(|| anyhow!("failed to get the cgroup memory stats"))?;
+        let memusage = memstat.get_usage();
+
+        // When update memory limit, the kernel would check the current memory limit
+        // set against the new swap setting, if the current memory limit is large than
+        // the new swap, then set limit first, otherwise the kernel would complain and
+        // refused to set; on the other hand, if the current memory limit is smaller than
+        // the new swap, then we should set the swap first and then set the memor limit.
+        if swap == -1 || memusage.get_limit() < swap as u64 {
+            mem_controller.set_memswap_limit(swap)?;
+            set_resource!(mem_controller, set_limit, memory, limit);
+        } else {
+            set_resource!(mem_controller, set_limit, memory, limit);
+            mem_controller.set_memswap_limit(swap)?;
+        }
+    } else {
+        set_resource!(mem_controller, set_limit, memory, limit);
+        swap = if cg.v2() {
            convert_memory_swap_to_v2_value(swap, memory.limit.unwrap_or(0))?
        } else {
            swap
@@ -386,8 +386,12 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        }
    }

+    set_resource!(mem_controller, set_soft_limit, memory, reservation);
+    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
+    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+
    if let Some(swappiness) = memory.swappiness {
-        if swappiness >= 0 && swappiness <= 100 {
+        if (0..=100).contains(&swappiness) {
            mem_controller.set_swappiness(swappiness as u64)?;
        } else {
            return Err(anyhow!(
@@ -509,63 +513,61 @@ lazy_static! {
    };

    pub static ref DEFAULT_ALLOWED_DEVICES: Vec<LinuxDeviceCgroup> = {
-        let mut v = Vec::new();
+        vec![
+            // all mknod to all char devices
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(WILDCARD),
+                minor: Some(WILDCARD),
+                access: "m".to_string(),
+            },

-        // all mknod to all char devices
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(WILDCARD),
-            minor: Some(WILDCARD),
-            access: "m".to_string(),
-        });
+            // all mknod to all block devices
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "b".to_string(),
+                major: Some(WILDCARD),
+                minor: Some(WILDCARD),
+                access: "m".to_string(),
+            },

-        // all mknod to all block devices
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "b".to_string(),
-            major: Some(WILDCARD),
-            minor: Some(WILDCARD),
-            access: "m".to_string(),
-        });
+            // all read/write/mknod to char device /dev/console
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(5),
+                minor: Some(1),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/console
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(5),
-            minor: Some(1),
-            access: "rwm".to_string(),
-        });
+            // all read/write/mknod to char device /dev/pts/<N>
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(136),
+                minor: Some(WILDCARD),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/pts/<N>
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(136),
-            minor: Some(WILDCARD),
-            access: "rwm".to_string(),
-        });
+            // all read/write/mknod to char device /dev/ptmx
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(5),
+                minor: Some(2),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/ptmx
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(5),
-            minor: Some(2),
-            access: "rwm".to_string(),
-        });
-
-        // all read/write/mknod to char device /dev/net/tun
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(10),
-            minor: Some(200),
-            access: "rwm".to_string(),
-        });
-
-        v
+            // all read/write/mknod to char device /dev/net/tun
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(10),
+                minor: Some(200),
+                access: "rwm".to_string(),
+            },
+        ]
    };
 }

@@ -946,38 +948,28 @@ pub fn get_mounts() -> Result<HashMap<String, String>> {
    Ok(m)
 }

-fn new_cgroup(
-    h: Box<dyn cgroups::Hierarchy>,
-    path: &str,
-    relative_paths: HashMap<String, String>,
-) -> Cgroup {
+fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Cgroup {
    let valid_path = path.trim_start_matches('/').to_string();
-    cgroups::Cgroup::new_with_relative_paths(h, valid_path.as_str(), relative_paths)
+    cgroups::Cgroup::new(h, valid_path.as_str())
 }

 impl Manager {
    pub fn new(cpath: &str) -> Result<Self> {
        let mut m = HashMap::new();
-        let mut relative_paths = HashMap::new();

        let paths = get_paths()?;
        let mounts = get_mounts()?;

-        for (key, value) in &paths {
+        for key in paths.keys() {
            let mnt = mounts.get(key);

            if mnt.is_none() {
                continue;
            }

-            let p = if value == "/" {
-                format!("{}/{}", mnt.unwrap(), cpath)
-            } else {
-                format!("{}{}/{}", mnt.unwrap(), value, cpath)
-            };
+            let p = format!("{}/{}", mnt.unwrap(), cpath);

            m.insert(key.to_string(), p);
-            relative_paths.insert(key.to_string(), value.to_string());
        }

        Ok(Self {
@@ -985,13 +977,12 @@ impl Manager {
            mounts,
            // rels: paths,
            cpath: cpath.to_string(),
-            cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath, relative_paths.clone()),
-            relative_paths,
+            cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath),
        })
    }

    pub fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> {
-        if guest_cpuset == "" {
+        if guest_cpuset.is_empty() {
            return Ok(());
        }
        info!(sl!(), "update_cpuset_path to: {}", guest_cpuset);
@@ -1031,11 +1022,7 @@ impl Manager {
                .unwrap()
                .trim_start_matches(root_path.to_str().unwrap());
            info!(sl!(), "updating cpuset for parent path {:?}", &r_path);
-            let cg = new_cgroup(
-                cgroups::hierarchies::auto(),
-                &r_path,
-                self.relative_paths.clone(),
-            );
+            let cg = new_cgroup(cgroups::hierarchies::auto(), &r_path);
            let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
            cpuset_controller.set_cpus(guest_cpuset)?;
        }
@@ -1064,23 +1051,10 @@ impl Manager {
    }
 }

+// get the guest's online cpus.
 pub fn get_guest_cpuset() -> Result<String> {
-    // for cgroup v2
-    if cgroups::hierarchies::is_cgroup2_unified_mode() {
-        let c = fs::read_to_string("/sys/fs/cgroup/cpuset.cpus.effective")?;
-        return Ok(c);
-    }
-
-    // for cgroup v1
-    let m = get_mounts()?;
-    if m.get("cpuset").is_none() {
-        warn!(sl!(), "no cpuset cgroup!");
-        return Err(nix::Error::Sys(Errno::ENOENT).into());
-    }
-
-    let p = format!("{}/cpuset.cpus", m.get("cpuset").unwrap());
-    let c = fs::read_to_string(p.as_str())?;
-    Ok(c)
+    let c = fs::read_to_string(GUEST_CPUS_PATH)?;
+    Ok(c.trim().to_string())
 }

 // Since the OCI spec is designed for cgroup v1, in some cases
@@ -1123,20 +1097,6 @@ fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result<i64>
    Ok(memory_swap - memory)
 }

-// Since the OCI spec is designed for cgroup v1, in some cases
-// there is need to convert from the cgroup v1 configuration to cgroup v2
-// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
-// convert linearly from [10-1000] to [1-10000]
-// https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c86414/libcontainer/cgroups/utils.go#L382
-fn convert_blk_io_to_v2_value(blk_io_weight: Option<u16>) -> Option<u16> {
-    let v = blk_io_weight.unwrap_or(0);
-    if v != 0 {
-        return None;
-    }
-
-    Some(1 + (v - 10) * 9999 / 990 as u16)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/agent/rustjail/src/cgroups/notifier.rs
+++ b/src/agent/rustjail/src/cgroups/notifier.rs
@@ -3,16 +3,18 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, Context, Result};
 use eventfd::{eventfd, EfdFlags};
 use nix::sys::eventfd;
-use nix::sys::inotify::{AddWatchFlags, InitFlags, Inotify};
 use std::fs::{self, File};
-use std::io::Read;
 use std::os::unix::io::{AsRawFd, FromRawFd};
-use std::path::{Path, PathBuf};
-use std::sync::mpsc::{self, Receiver};
-use std::thread;
+use std::path::Path;
+
+use crate::pipestream::PipeStream;
+use futures::StreamExt as _;
+use inotify::{Inotify, WatchMask};
+use tokio::io::AsyncReadExt;
+use tokio::sync::mpsc::{channel, Receiver};

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -21,11 +23,11 @@ macro_rules! sl {
    };
 }

-pub fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
+pub async fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
    if cgroups::hierarchies::is_cgroup2_unified_mode() {
-        return notify_on_oom_v2(cid, cg_dir);
+        return notify_on_oom_v2(cid, cg_dir).await;
    }
-    notify_on_oom(cid, cg_dir)
+    notify_on_oom(cid, cg_dir).await
 }

 // get_value_from_cgroup parse cgroup file with `Flat keyed`
@@ -33,7 +35,7 @@ pub fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
 // Flat keyed file format:
 //   KEY0 VAL0\n
 //   KEY1 VAL1\n
-fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {
+fn get_value_from_cgroup(path: &Path, key: &str) -> Result<i64> {
    let content = fs::read_to_string(path)?;
    info!(
        sl!(),
@@ -52,11 +54,11 @@ fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {

 // notify_on_oom returns channel on which you can expect event about OOM,
 // if process died without OOM this channel will be closed.
-pub fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
-    register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events")
+pub async fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
+    register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events").await
 }

-fn register_memory_event_v2(
+async fn register_memory_event_v2(
    containere_id: &str,
    cg_dir: String,
    memory_event_name: &str,
@@ -73,49 +75,49 @@ fn register_memory_event_v2(
        "register_memory_event_v2 cgroup_event_control_path: {:?}", &cgroup_event_control_path
    );

-    let fd = Inotify::init(InitFlags::empty()).unwrap();
+    let mut inotify = Inotify::init().context("Failed to initialize inotify")?;

    // watching oom kill
-    let ev_fd = fd
-        .add_watch(&event_control_path, AddWatchFlags::IN_MODIFY)
-        .unwrap();
+    let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?;
    // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
-    let cg_fd = fd
-        .add_watch(&cgroup_event_control_path, AddWatchFlags::IN_MODIFY)
-        .unwrap();
-    info!(sl!(), "ev_fd: {:?}", ev_fd);
-    info!(sl!(), "cg_fd: {:?}", cg_fd);
+    let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?;

-    let (sender, receiver) = mpsc::channel();
+    info!(sl!(), "ev_wd: {:?}", ev_wd);
+    info!(sl!(), "cg_wd: {:?}", cg_wd);
+
+    let (sender, receiver) = channel(100);
    let containere_id = containere_id.to_string();

-    thread::spawn(move || {
-        loop {
-            let events = fd.read_events().unwrap();
+    tokio::spawn(async move {
+        let mut buffer = [0; 32];
+        let mut stream = inotify
+            .event_stream(&mut buffer)
+            .expect("create inotify event stream failed");
+
+        while let Some(event_or_error) = stream.next().await {
+            let event = event_or_error.unwrap();
            info!(
                sl!(),
-                "container[{}] get events for container: {:?}", &containere_id, &events
+                "container[{}] get event for container: {:?}", &containere_id, &event
            );
+            // info!("is1: {}", event.wd == wd1);
+            info!(sl!(), "event.wd: {:?}", event.wd);

-            for event in events {
-                if event.mask & AddWatchFlags::IN_MODIFY != AddWatchFlags::IN_MODIFY {
-                    continue;
+            if event.wd == ev_wd {
+                let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
+                if oom.unwrap_or(0) > 0 {
+                    let _ = sender.send(containere_id.clone()).await.map_err(|e| {
+                        error!(sl!(), "send containere_id failed, error: {:?}", e);
+                    });
+                    return;
                }
-                info!(sl!(), "event.wd: {:?}", event.wd);
-
-                if event.wd == ev_fd {
-                    let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
-                    if oom.unwrap_or(0) > 0 {
-                        sender.send(containere_id.clone()).unwrap();
-                        return;
-                    }
-                } else if event.wd == cg_fd {
-                    let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
-                    if pids.unwrap_or(-1) == 0 {
-                        return;
-                    }
+            } else if event.wd == cg_wd {
+                let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
+                if pids.unwrap_or(-1) == 0 {
+                    return;
                }
            }
+
            // When a cgroup is destroyed, an event is sent to eventfd.
            // So if the control path is gone, return instead of notifying.
            if !Path::new(&event_control_path).exists() {
@@ -129,17 +131,17 @@ fn register_memory_event_v2(

 // notify_on_oom returns channel on which you can expect event about OOM,
 // if process died without OOM this channel will be closed.
-fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
-    if dir == "" {
+async fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
+    if dir.is_empty() {
        return Err(anyhow!("memory controller missing"));
    }

-    register_memory_event(cid, dir, "memory.oom_control", "")
+    register_memory_event(cid, dir, "memory.oom_control", "").await
 }

 // level is one of "low", "medium", or "critical"
-fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
-    if dir == "" {
+async fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
+    if dir.is_empty() {
        return Err(anyhow!("memory controller missing"));
    }

@@ -147,10 +149,10 @@ fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receive
        return Err(anyhow!("invalid pressure level {}", level));
    }

-    register_memory_event(cid, dir, "memory.pressure_level", level)
+    register_memory_event(cid, dir, "memory.pressure_level", level).await
 }

-fn register_memory_event(
+async fn register_memory_event(
    cid: &str,
    cg_dir: String,
    event_name: &str,
@@ -163,7 +165,7 @@ fn register_memory_event(

    let event_control_path = Path::new(&cg_dir).join("cgroup.event_control");
    let data;
-    if arg == "" {
+    if arg.is_empty() {
        data = format!("{} {}", eventfd, event_file.as_raw_fd());
    } else {
        data = format!("{} {} {}", eventfd, event_file.as_raw_fd(), arg);
@@ -171,15 +173,16 @@ fn register_memory_event(

    fs::write(&event_control_path, data)?;

-    let mut eventfd_file = unsafe { File::from_raw_fd(eventfd) };
+    let mut eventfd_stream = unsafe { PipeStream::from_raw_fd(eventfd) };

-    let (sender, receiver) = mpsc::channel();
+    let (sender, receiver) = tokio::sync::mpsc::channel(100);
    let containere_id = cid.to_string();

-    thread::spawn(move || {
+    tokio::spawn(async move {
        loop {
-            let mut buf = [0; 8];
-            match eventfd_file.read(&mut buf) {
+            let sender = sender.clone();
+            let mut buf = [0u8; 8];
+            match eventfd_stream.read(&mut buf).await {
                Err(err) => {
                    warn!(sl!(), "failed to read from eventfd: {:?}", err);
                    return;
@@ -188,7 +191,10 @@ fn register_memory_event(
                    let content = fs::read_to_string(path.clone());
                    info!(
                        sl!(),
-                        "OOM event for container: {}, content: {:?}", &containere_id, content
+                        "cgroup event for container: {}, path: {:?}, content: {:?}",
+                        &containere_id,
+                        &path,
+                        content
                    );
                }
            }
@@ -198,7 +204,10 @@ fn register_memory_event(
            if !Path::new(&event_control_path).exists() {
                return;
            }
-            sender.send(containere_id.clone()).unwrap();
+
+            let _ = sender.send(containere_id.clone()).await.map_err(|e| {
+                error!(sl!(), "send containere_id failed, error: {:?}", e);
+            });
        }
    });

--- a/src/agent/rustjail/src/configs/device.rs
+++ b/src/agent/rustjail/src/configs/device.rs
@@ -1,56 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use libc::*;
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Device {
-    #[serde(default)]
-    r#type: char,
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    major: i64,
-    #[serde(default)]
-    minor: i64,
-    #[serde(default)]
-    permissions: String,
-    #[serde(default)]
-    file_mode: mode_t,
-    #[serde(default)]
-    uid: i32,
-    #[serde(default)]
-    gid: i32,
-    #[serde(default)]
-    allow: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct BlockIODevice {
-    #[serde(default)]
-    major: i64,
-    #[serde(default)]
-    minor: i64,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct WeightDevice {
-    block: BlockIODevice,
-    #[serde(default)]
-    weight: u16,
-    #[serde(default, rename = "leafWeight")]
-    leaf_weight: u16,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct ThrottleDevice {
-    block: BlockIODevice,
-    #[serde(default)]
-    rate: u64,
-}
--- a/src/agent/rustjail/src/configs/mod.rs
+++ b/src/agent/rustjail/src/configs/mod.rs
@@ -1,368 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-use protocols::oci::State as OCIState;
-
-use std::collections::HashMap;
-use std::fmt;
-use std::path::PathBuf;
-use std::time::Duration;
-
-use nix::unistd;
-
-use self::device::{Device, ThrottleDevice, WeightDevice};
-use self::namespaces::Namespaces;
-use crate::specconv::CreateOpts;
-
-pub mod device;
-pub mod namespaces;
-pub mod validator;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Rlimit {
-    #[serde(default)]
-    r#type: i32,
-    #[serde(default)]
-    hard: i32,
-    #[serde(default)]
-    soft: i32,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IDMap {
-    #[serde(default)]
-    container_id: i32,
-    #[serde(default)]
-    host_id: i32,
-    #[serde(default)]
-    size: i32,
-}
-
-type Action = i32;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Seccomp {
-    #[serde(default)]
-    default_action: Action,
-    #[serde(default)]
-    architectures: Vec<String>,
-    #[serde(default)]
-    syscalls: Vec<Syscall>,
-}
-
-type Operator = i32;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Arg {
-    #[serde(default)]
-    index: u32,
-    #[serde(default)]
-    value: u64,
-    #[serde(default)]
-    value_two: u64,
-    #[serde(default)]
-    op: Operator,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Syscall {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    name: String,
-    #[serde(default)]
-    action: Action,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    args: Vec<Arg>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Config<'a> {
-    #[serde(default)]
-    no_pivot_root: bool,
-    #[serde(default)]
-    parent_death_signal: i32,
-    #[serde(default)]
-    rootfs: String,
-    #[serde(default)]
-    readonlyfs: bool,
-    #[serde(default, rename = "rootPropagation")]
-    root_propagation: i32,
-    #[serde(default)]
-    mounts: Vec<Mount>,
-    #[serde(default)]
-    devices: Vec<Device>,
-    #[serde(default)]
-    mount_label: String,
-    #[serde(default)]
-    hostname: String,
-    #[serde(default)]
-    namespaces: Namespaces,
-    #[serde(default)]
-    capabilities: Option<Capabilities>,
-    #[serde(default)]
-    networks: Vec<Network>,
-    #[serde(default)]
-    routes: Vec<Route>,
-    #[serde(default)]
-    cgroups: Option<Cgroup<'a>>,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    apparmor_profile: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    process_label: String,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    rlimits: Vec<Rlimit>,
-    #[serde(default)]
-    oom_score_adj: Option<i32>,
-    #[serde(default)]
-    uid_mappings: Vec<IDMap>,
-    #[serde(default)]
-    gid_mappings: Vec<IDMap>,
-    #[serde(default)]
-    mask_paths: Vec<String>,
-    #[serde(default)]
-    readonly_paths: Vec<String>,
-    #[serde(default)]
-    sysctl: HashMap<String, String>,
-    #[serde(default)]
-    seccomp: Option<Seccomp>,
-    #[serde(default)]
-    no_new_privileges: bool,
-    hooks: Option<Hooks>,
-    #[serde(default)]
-    version: String,
-    #[serde(default)]
-    labels: Vec<String>,
-    #[serde(default)]
-    no_new_keyring: bool,
-    #[serde(default)]
-    intel_rdt: Option<IntelRdt>,
-    #[serde(default)]
-    rootless_euid: bool,
-    #[serde(default)]
-    rootless_cgroups: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Hooks {
-    prestart: Vec<Box<Hook>>,
-    poststart: Vec<Box<Hook>>,
-    poststop: Vec<Box<Hook>>,
-}
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Capabilities {
-    bounding: Vec<String>,
-    effective: Vec<String>,
-    inheritable: Vec<String>,
-    permitted: Vec<String>,
-    ambient: Vec<String>,
-}
-
-pub trait Hook {
-    fn run(&self, state: &OCIState) -> Result<()>;
-}
-
-pub struct FuncHook {
-    // run: fn(&OCIState) -> Result<()>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Command {
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    args: Vec<String>,
-    #[serde(default)]
-    env: Vec<String>,
-    #[serde(default)]
-    dir: String,
-    #[serde(default)]
-    timeout: Duration,
-}
-
-pub struct CommandHook {
-    command: Command,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Mount {
-    #[serde(default)]
-    source: String,
-    #[serde(default)]
-    destination: String,
-    #[serde(default)]
-    device: String,
-    #[serde(default)]
-    flags: i32,
-    #[serde(default)]
-    propagation_flags: Vec<i32>,
-    #[serde(default)]
-    data: String,
-    #[serde(default)]
-    relabel: String,
-    #[serde(default)]
-    extensions: i32,
-    #[serde(default)]
-    premount_cmds: Vec<Command>,
-    #[serde(default)]
-    postmount_cmds: Vec<Command>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct HugepageLimit {
-    #[serde(default)]
-    page_size: String,
-    #[serde(default)]
-    limit: u64,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IntelRdt {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    l3_cache_schema: String,
-    #[serde(
-        default,
-        rename = "memBwSchema",
-        skip_serializing_if = "String::is_empty"
-    )]
-    mem_bw_schema: String,
-}
-
-pub type FreezerState = String;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Cgroup<'a> {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    name: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    parent: String,
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    scope_prefix: String,
-    paths: HashMap<String, String>,
-    resource: &'a Resources<'a>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Resources<'a> {
-    #[serde(default)]
-    allow_all_devices: bool,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    allowed_devices: Vec<&'a Device>,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    denied_devices: Vec<&'a Device>,
-    #[serde(default)]
-    devices: Vec<&'a Device>,
-    #[serde(default)]
-    memory: i64,
-    #[serde(default)]
-    memory_reservation: i64,
-    #[serde(default)]
-    memory_swap: i64,
-    #[serde(default)]
-    kernel_memory: i64,
-    #[serde(default)]
-    kernel_memory_tcp: i64,
-    #[serde(default)]
-    cpu_shares: u64,
-    #[serde(default)]
-    cpu_quota: i64,
-    #[serde(default)]
-    cpu_period: u64,
-    #[serde(default)]
-    cpu_rt_quota: i64,
-    #[serde(default)]
-    cpu_rt_period: u64,
-    #[serde(default)]
-    cpuset_cpus: String,
-    #[serde(default)]
-    cpuset_mems: String,
-    #[serde(default)]
-    pids_limit: i64,
-    #[serde(default)]
-    blkio_weight: u64,
-    #[serde(default)]
-    blkio_leaf_weight: u64,
-    #[serde(default)]
-    blkio_weight_device: Vec<&'a WeightDevice>,
-    #[serde(default)]
-    blkio_throttle_read_bps_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_write_bps_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_read_iops_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_write_iops_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    freezer: FreezerState,
-    #[serde(default)]
-    hugetlb_limit: Vec<&'a HugepageLimit>,
-    #[serde(default)]
-    oom_kill_disable: bool,
-    #[serde(default)]
-    memory_swapiness: u64,
-    #[serde(default)]
-    net_prio_ifpriomap: Vec<&'a IfPrioMap>,
-    #[serde(default)]
-    net_cls_classid_u: u32,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Network {
-    #[serde(default)]
-    r#type: String,
-    #[serde(default)]
-    name: String,
-    #[serde(default)]
-    bridge: String,
-    #[serde(default)]
-    mac_address: String,
-    #[serde(default)]
-    address: String,
-    #[serde(default)]
-    gateway: String,
-    #[serde(default)]
-    ipv6_address: String,
-    #[serde(default)]
-    ipv6_gateway: String,
-    #[serde(default)]
-    mtu: i32,
-    #[serde(default)]
-    txqueuelen: i32,
-    #[serde(default)]
-    host_interface_name: String,
-    #[serde(default)]
-    hairpin_mode: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Route {
-    #[serde(default)]
-    destination: String,
-    #[serde(default)]
-    source: String,
-    #[serde(default)]
-    gateway: String,
-    #[serde(default)]
-    interface_name: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IfPrioMap {
-    #[serde(default)]
-    interface: String,
-    #[serde(default)]
-    priority: i32,
-}
-
-impl IfPrioMap {
-    fn cgroup_string(&self) -> String {
-        format!("{} {}", self.interface, self.priority)
-    }
-}
--- a/src/agent/rustjail/src/configs/namespaces.rs
+++ b/src/agent/rustjail/src/configs/namespaces.rs
@@ -1,46 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-use std::collections::HashMap;
-#[macro_use]
-use lazy_static;
-
-pub type NamespaceType = String;
-pub type Namespaces = Vec<Namespace>;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Namespace {
-    #[serde(default)]
-    r#type: NamespaceType,
-    #[serde(default)]
-    path: String,
-}
-
-pub const NEWNET: &'static str = "NEWNET";
-pub const NEWPID: &'static str = "NEWPID";
-pub const NEWNS: &'static str = "NEWNS";
-pub const NEWUTS: &'static str = "NEWUTS";
-pub const NEWUSER: &'static str = "NEWUSER";
-pub const NEWCGROUP: &'static str = "NEWCGROUP";
-pub const NEWIPC: &'static str = "NEWIPC";
-
-lazy_static! {
-    static ref TYPETONAME: HashMap<&'static str, &'static str> = {
-        let mut m = HashMap::new();
-        m.insert("pid", "pid");
-        m.insert("network", "net");
-        m.insert("mount", "mnt");
-        m.insert("user", "user");
-        m.insert("uts", "uts");
-        m.insert("ipc", "ipc");
-        m.insert("cgroup", "cgroup");
-        m
-    };
-}
--- a/src/agent/rustjail/src/configs/validator.rs
+++ b/src/agent/rustjail/src/configs/validator.rs
@@ -1,23 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use crate::configs::Config;
-use std::io::Result;
-
-pub trait Validator {
-    fn validate(&self, config: &Config) -> Result<()> {
-        Ok(())
-    }
-}
-
-pub struct ConfigValidator {}
-
-impl Validator for ConfigValidator {}
-
-impl ConfigValidator {
-    fn new() -> Self {
-        ConfigValidator {}
-    }
-}
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -23,7 +23,7 @@ extern crate caps;
 extern crate protocols;
 #[macro_use]
 extern crate scopeguard;
-extern crate prctl;
+extern crate capctl;
 #[macro_use]
 extern crate lazy_static;
 extern crate libc;
@@ -40,41 +40,24 @@ pub mod capabilities;
 pub mod cgroups;
 pub mod container;
 pub mod mount;
+pub mod pipestream;
 pub mod process;
-pub mod reaper;
 pub mod specconv;
 pub mod sync;
+pub mod sync_with_async;
+pub mod utils;
 pub mod validator;

-// pub mod factory;
-//pub mod configs;
-// pub mod devices;
-// pub mod init;
-// pub mod rootfs;
-// pub mod capabilities;
-// pub mod console;
-// pub mod stats;
-// pub mod user;
-//pub mod intelrdt;
-
-// construtc ociSpec from grpcSpec, which is needed for hook
-// execution. since hooks read config.json
-
-use oci::{
-    Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities,
-    Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot,
-    Spec as ociSpec, User as ociUser,
-};
-use protocols::oci::{
-    Hooks as grpcHooks, Linux as grpcLinux, Mount as grpcMount, Process as grpcProcess,
-    Root as grpcRoot, Spec as grpcSpec,
-};
 use std::collections::HashMap;

-pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
+use protocols::oci as grpc;
+
+// construct ociSpec from grpc::Spec, which is needed for hook
+// execution. since hooks read config.json
+pub fn process_grpc_to_oci(p: &grpc::Process) -> oci::Process {
    let console_size = if p.ConsoleSize.is_some() {
        let c = p.ConsoleSize.as_ref().unwrap();
-        Some(ociBox {
+        Some(oci::Box {
            height: c.Height,
            width: c.Width,
        })
@@ -84,14 +67,14 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {

    let user = if p.User.is_some() {
        let u = p.User.as_ref().unwrap();
-        ociUser {
+        oci::User {
            uid: u.UID,
            gid: u.GID,
            additional_gids: u.AdditionalGids.clone(),
            username: u.Username.clone(),
        }
    } else {
-        ociUser {
+        oci::User {
            uid: 0,
            gid: 0,
            additional_gids: vec![],
@@ -102,7 +85,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    let capabilities = if p.Capabilities.is_some() {
        let cap = p.Capabilities.as_ref().unwrap();

-        Some(ociLinuxCapabilities {
+        Some(oci::LinuxCapabilities {
            bounding: cap.Bounding.clone().into_vec(),
            effective: cap.Effective.clone().into_vec(),
            inheritable: cap.Inheritable.clone().into_vec(),
@@ -116,7 +99,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    let rlimits = {
        let mut r = Vec::new();
        for lm in p.Rlimits.iter() {
-            r.push(ociPOSIXRlimit {
+            r.push(oci::PosixRlimit {
                r#type: lm.Type.clone(),
                hard: lm.Hard,
                soft: lm.Soft,
@@ -125,7 +108,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
        r
    };

-    ociProcess {
+    oci::Process {
        terminal: p.Terminal,
        console_size,
        user,
@@ -141,15 +124,15 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    }
 }

-fn root_grpc_to_oci(root: &grpcRoot) -> ociRoot {
-    ociRoot {
+fn root_grpc_to_oci(root: &grpc::Root) -> oci::Root {
+    oci::Root {
        path: root.Path.clone(),
        readonly: root.Readonly,
    }
 }

-fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
-    ociMount {
+fn mount_grpc_to_oci(m: &grpc::Mount) -> oci::Mount {
+    oci::Mount {
        destination: m.destination.clone(),
        r#type: m.field_type.clone(),
        source: m.source.clone(),
@@ -157,13 +140,12 @@ fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
    }
 }

-use oci::Hook as ociHook;
 use protocols::oci::Hook as grpcHook;

-fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
+fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
    let mut r = Vec::new();
    for e in h.iter() {
-        r.push(ociHook {
+        r.push(oci::Hook {
            path: e.Path.clone(),
            args: e.Args.clone().into_vec(),
            env: e.Env.clone().into_vec(),
@@ -173,39 +155,29 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
    r
 }

-fn hooks_grpc_to_oci(h: &grpcHooks) -> ociHooks {
+fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
    let prestart = hook_grpc_to_oci(h.Prestart.as_ref());

    let poststart = hook_grpc_to_oci(h.Poststart.as_ref());

    let poststop = hook_grpc_to_oci(h.Poststop.as_ref());

-    ociHooks {
+    oci::Hooks {
        prestart,
        poststart,
        poststop,
    }
 }

-use oci::{
-    LinuxDevice as ociLinuxDevice, LinuxIDMapping as ociLinuxIDMapping,
-    LinuxIntelRdt as ociLinuxIntelRdt, LinuxNamespace as ociLinuxNamespace,
-    LinuxResources as ociLinuxResources, LinuxSeccomp as ociLinuxSeccomp,
-};
-use protocols::oci::{
-    LinuxIDMapping as grpcLinuxIDMapping, LinuxResources as grpcLinuxResources,
-    LinuxSeccomp as grpcLinuxSeccomp,
-};
-
-fn idmap_grpc_to_oci(im: &grpcLinuxIDMapping) -> ociLinuxIDMapping {
-    ociLinuxIDMapping {
+fn idmap_grpc_to_oci(im: &grpc::LinuxIDMapping) -> oci::LinuxIdMapping {
+    oci::LinuxIdMapping {
        container_id: im.ContainerID,
        host_id: im.HostID,
        size: im.Size,
    }
 }

-fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
+fn idmaps_grpc_to_oci(ims: &[grpc::LinuxIDMapping]) -> Vec<oci::LinuxIdMapping> {
    let mut r = Vec::new();
    for im in ims.iter() {
        r.push(idmap_grpc_to_oci(im));
@@ -213,24 +185,13 @@ fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
    r
 }

-use oci::{
-    LinuxBlockIO as ociLinuxBlockIO, LinuxBlockIODevice as ociLinuxBlockIODevice,
-    LinuxCPU as ociLinuxCPU, LinuxDeviceCgroup as ociLinuxDeviceCgroup,
-    LinuxHugepageLimit as ociLinuxHugepageLimit,
-    LinuxInterfacePriority as ociLinuxInterfacePriority, LinuxMemory as ociLinuxMemory,
-    LinuxNetwork as ociLinuxNetwork, LinuxPids as ociLinuxPids,
-    LinuxThrottleDevice as ociLinuxThrottleDevice, LinuxWeightDevice as ociLinuxWeightDevice,
-};
-use protocols::oci::{
-    LinuxBlockIO as grpcLinuxBlockIO, LinuxThrottleDevice as grpcLinuxThrottleDevice,
-    LinuxWeightDevice as grpcLinuxWeightDevice,
-};
-
-fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinuxThrottleDevice> {
+fn throttle_devices_grpc_to_oci(
+    tds: &[grpc::LinuxThrottleDevice],
+) -> Vec<oci::LinuxThrottleDevice> {
    let mut r = Vec::new();
    for td in tds.iter() {
-        r.push(ociLinuxThrottleDevice {
-            blk: ociLinuxBlockIODevice {
+        r.push(oci::LinuxThrottleDevice {
+            blk: oci::LinuxBlockIoDevice {
                major: td.Major,
                minor: td.Minor,
            },
@@ -240,11 +201,11 @@ fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinux
    r
 }

-fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeightDevice> {
+fn weight_devices_grpc_to_oci(wds: &[grpc::LinuxWeightDevice]) -> Vec<oci::LinuxWeightDevice> {
    let mut r = Vec::new();
    for wd in wds.iter() {
-        r.push(ociLinuxWeightDevice {
-            blk: ociLinuxBlockIODevice {
+        r.push(oci::LinuxWeightDevice {
+            blk: oci::LinuxBlockIoDevice {
                major: wd.Major,
                minor: wd.Minor,
            },
@@ -255,7 +216,7 @@ fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeig
    r
 }

-fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
+fn blockio_grpc_to_oci(blk: &grpc::LinuxBlockIO) -> oci::LinuxBlockIo {
    let weight_device = weight_devices_grpc_to_oci(blk.WeightDevice.as_ref());
    let throttle_read_bps_device = throttle_devices_grpc_to_oci(blk.ThrottleReadBpsDevice.as_ref());
    let throttle_write_bps_device =
@@ -265,7 +226,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
    let throttle_write_iops_device =
        throttle_devices_grpc_to_oci(blk.ThrottleWriteIOPSDevice.as_ref());

-    ociLinuxBlockIO {
+    oci::LinuxBlockIo {
        weight: Some(blk.Weight as u16),
        leaf_weight: Some(blk.LeafWeight as u16),
        weight_device,
@@ -276,7 +237,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
    }
 }

-pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
+pub fn resources_grpc_to_oci(res: &grpc::LinuxResources) -> oci::LinuxResources {
    let devices = {
        let mut d = Vec::new();
        for dev in res.Devices.iter() {
@@ -291,7 +252,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
            } else {
                Some(dev.Minor)
            };
-            d.push(ociLinuxDeviceCgroup {
+            d.push(oci::LinuxDeviceCgroup {
                allow: dev.Allow,
                r#type: dev.Type.clone(),
                major,
@@ -304,7 +265,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let memory = if res.Memory.is_some() {
        let mem = res.Memory.as_ref().unwrap();
-        Some(ociLinuxMemory {
+        Some(oci::LinuxMemory {
            limit: Some(mem.Limit),
            reservation: Some(mem.Reservation),
            swap: Some(mem.Swap),
@@ -319,7 +280,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let cpu = if res.CPU.is_some() {
        let c = res.CPU.as_ref().unwrap();
-        Some(ociLinuxCPU {
+        Some(oci::LinuxCpu {
            shares: Some(c.Shares),
            quota: Some(c.Quota),
            period: Some(c.Period),
@@ -334,7 +295,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let pids = if res.Pids.is_some() {
        let p = res.Pids.as_ref().unwrap();
-        Some(ociLinuxPids { limit: p.Limit })
+        Some(oci::LinuxPids { limit: p.Limit })
    } else {
        None
    };
@@ -350,7 +311,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
    let hugepage_limits = {
        let mut r = Vec::new();
        for hl in res.HugepageLimits.iter() {
-            r.push(ociLinuxHugepageLimit {
+            r.push(oci::LinuxHugepageLimit {
                page_size: hl.Pagesize.clone(),
                limit: hl.Limit,
            });
@@ -363,14 +324,14 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
        let priorities = {
            let mut r = Vec::new();
            for pr in net.Priorities.iter() {
-                r.push(ociLinuxInterfacePriority {
+                r.push(oci::LinuxInterfacePriority {
                    name: pr.Name.clone(),
                    priority: pr.Priority,
                });
            }
            r
        };
-        Some(ociLinuxNetwork {
+        Some(oci::LinuxNetwork {
            class_id: Some(net.ClassID),
            priorities,
        })
@@ -378,7 +339,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
        None
    };

-    ociLinuxResources {
+    oci::LinuxResources {
        devices,
        memory,
        cpu,
@@ -390,17 +351,22 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
    }
 }

-use oci::{LinuxSeccompArg as ociLinuxSeccompArg, LinuxSyscall as ociLinuxSyscall};
-
-fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
+fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp {
    let syscalls = {
        let mut r = Vec::new();

        for sys in sec.Syscalls.iter() {
            let mut args = Vec::new();
+            let errno_ret: u32;
+
+            if sys.has_errnoret() {
+                errno_ret = sys.get_errnoret();
+            } else {
+                errno_ret = libc::EPERM as u32;
+            }

            for arg in sys.Args.iter() {
-                args.push(ociLinuxSeccompArg {
+                args.push(oci::LinuxSeccompArg {
                    index: arg.Index as u32,
                    value: arg.Value,
                    value_two: arg.ValueTwo,
@@ -408,23 +374,25 @@ fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
                });
            }

-            r.push(ociLinuxSyscall {
+            r.push(oci::LinuxSyscall {
                names: sys.Names.clone().into_vec(),
                action: sys.Action.clone(),
+                errno_ret,
                args,
            });
        }
        r
    };

-    ociLinuxSeccomp {
+    oci::LinuxSeccomp {
        default_action: sec.DefaultAction.clone(),
        architectures: sec.Architectures.clone().into_vec(),
+        flags: sec.Flags.clone().into_vec(),
        syscalls,
    }
 }

-fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
+fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux {
    let uid_mappings = idmaps_grpc_to_oci(l.UIDMappings.as_ref());
    let gid_mappings = idmaps_grpc_to_oci(l.GIDMappings.as_ref());

@@ -444,7 +412,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
        let mut r = Vec::new();

        for ns in l.Namespaces.iter() {
-            r.push(ociLinuxNamespace {
+            r.push(oci::LinuxNamespace {
                r#type: ns.Type.clone(),
                path: ns.Path.clone(),
            });
@@ -456,7 +424,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
        let mut r = Vec::new();

        for d in l.Devices.iter() {
-            r.push(ociLinuxDevice {
+            r.push(oci::LinuxDevice {
                path: d.Path.clone(),
                r#type: d.Type.clone(),
                major: d.Major,
@@ -472,14 +440,14 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
    let intel_rdt = if l.IntelRdt.is_some() {
        let rdt = l.IntelRdt.as_ref().unwrap();

-        Some(ociLinuxIntelRdt {
+        Some(oci::LinuxIntelRdt {
            l3_cache_schema: rdt.L3CacheSchema.clone(),
        })
    } else {
        None
    };

-    ociLinux {
+    oci::Linux {
        uid_mappings,
        gid_mappings,
        sysctl: l.Sysctl.clone(),
@@ -496,11 +464,11 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
    }
 }

-fn linux_oci_to_grpc(_l: &ociLinux) -> grpcLinux {
-    grpcLinux::default()
+fn linux_oci_to_grpc(_l: &oci::Linux) -> grpc::Linux {
+    grpc::Linux::default()
 }

-pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
+pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
    // process
    let process = if grpc.Process.is_some() {
        Some(process_grpc_to_oci(grpc.Process.as_ref().unwrap()))
@@ -538,7 +506,7 @@ pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
        None
    };

-    ociSpec {
+    oci::Spec {
        version: grpc.Version.clone(),
        process,
        root,
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -52,10 +52,12 @@ const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
 const PROC_PATH: &str = "/proc";

 // since libc didn't defined this const for musl, thus redefined it here.
-#[cfg(all(target_os = "linux", target_env = "gnu"))]
+#[cfg(all(target_os = "linux", target_env = "gnu", not(target_arch = "s390x")))]
 const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
 #[cfg(all(target_os = "linux", target_env = "musl"))]
 const PROC_SUPER_MAGIC: libc::c_ulong = 0x00009fa0;
+#[cfg(all(target_os = "linux", target_env = "gnu", target_arch = "s390x"))]
+const PROC_SUPER_MAGIC: libc::c_uint = 0x00009fa0;

 lazy_static! {
    static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
@@ -66,6 +68,8 @@ lazy_static! {
        m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
        m.insert("slave", MsFlags::MS_SLAVE);
        m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
+        m.insert("unbindable", MsFlags::MS_UNBINDABLE);
+        m.insert("runbindable", MsFlags::MS_UNBINDABLE | MsFlags::MS_REC);
        m
    };
    static ref OPTIONS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -91,17 +95,6 @@ lazy_static! {
        m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME));
        m.insert("bind", (false, MsFlags::MS_BIND));
        m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC));
-        m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE));
-        m.insert(
-            "runbindable",
-            (false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC),
-        );
-        m.insert("private", (false, MsFlags::MS_PRIVATE));
-        m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC));
-        m.insert("shared", (false, MsFlags::MS_SHARED));
-        m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC));
-        m.insert("slave", (false, MsFlags::MS_SLAVE));
-        m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC));
        m.insert("relatime", (false, MsFlags::MS_RELATIME));
        m.insert("norelatime", (true, MsFlags::MS_RELATIME));
        m.insert("strictatime", (false, MsFlags::MS_STRICTATIME));
@@ -190,7 +183,7 @@ pub fn init_rootfs(

    let mut bind_mount_dev = false;
    for m in &spec.mounts {
-        let (mut flags, data) = parse_mount(&m);
+        let (mut flags, pgflags, data) = parse_mount(&m);
        if !m.destination.starts_with('/') || m.destination.contains("..") {
            return Err(anyhow!(
                "the mount destination {} is invalid",
@@ -232,13 +225,15 @@ pub fn init_rootfs(
            // effective.
            // first check that we have non-default options required before attempting a
            // remount
-            if m.r#type == "bind" {
-                for o in &m.options {
-                    if let Some(fl) = PROPAGATION.get(o.as_str()) {
-                        let dest = format!("{}{}", &rootfs, &m.destination);
-                        mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
-                    }
-                }
+            if m.r#type == "bind" && !pgflags.is_empty() {
+                let dest = secure_join(rootfs, &m.destination);
+                mount(
+                    None::<&str>,
+                    dest.as_str(),
+                    None::<&str>,
+                    pgflags,
+                    None::<&str>,
+                )?;
            }
        }
    }
@@ -655,26 +650,73 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
    Ok(true)
 }

-fn parse_mount(m: &Mount) -> (MsFlags, String) {
+fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
    let mut flags = MsFlags::empty();
+    let mut pgflags = MsFlags::empty();
    let mut data = Vec::new();

    for o in &m.options {
-        match OPTIONS.get(o.as_str()) {
-            Some(v) => {
-                let (clear, fl) = *v;
-                if clear {
-                    flags &= !fl;
-                } else {
-                    flags |= fl;
-                }
+        if let Some(v) = OPTIONS.get(o.as_str()) {
+            let (clear, fl) = *v;
+            if clear {
+                flags &= !fl;
+            } else {
+                flags |= fl;
            }
-
-            None => data.push(o.clone()),
+        } else if let Some(fl) = PROPAGATION.get(o.as_str()) {
+            pgflags |= *fl;
+        } else {
+            data.push(o.clone());
        }
    }

-    (flags, data.join(","))
+    (flags, pgflags, data.join(","))
+}
+
+// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
+// The resulting path is guaranteed to be ("below" / "in a directory under") the `rootfs` directory.
+//
+// Parameters:
+//
+// - `rootfs` is the absolute path to the root of the containers root filesystem directory.
+// - `unsafe_path` is path inside a container. It is unsafe since it may try to "escape" from the containers
+//    rootfs by using one or more "../" path elements or is its a symlink to path.
+fn secure_join(rootfs: &str, unsafe_path: &str) -> String {
+    let mut path = PathBuf::from(format!("{}/", rootfs));
+    let unsafe_p = Path::new(&unsafe_path);
+
+    for it in unsafe_p.iter() {
+        let it_p = Path::new(&it);
+
+        // if it_p leads with "/", path.push(it) will be replace as it, so ignore "/"
+        if it_p.has_root() {
+            continue;
+        };
+
+        path.push(it);
+        if let Ok(v) = path.read_link() {
+            if v.is_absolute() {
+                path = PathBuf::from(format!("{}{}", rootfs, v.to_str().unwrap().to_string()));
+            } else {
+                path.pop();
+                for it in v.iter() {
+                    path.push(it);
+                    if path.exists() {
+                        path = path.canonicalize().unwrap();
+                        if !path.starts_with(rootfs) {
+                            path = PathBuf::from(rootfs.to_string());
+                        }
+                    }
+                }
+            }
+        }
+        // skip any ".."
+        if path.ends_with("..") {
+            path.pop();
+        }
+    }
+
+    path.to_str().unwrap().to_string()
 }

 fn mount_from(
@@ -686,14 +728,14 @@ fn mount_from(
    _label: &str,
 ) -> Result<()> {
    let d = String::from(data);
-    let dest = format!("{}{}", rootfs, &m.destination);
+    let dest = secure_join(rootfs, &m.destination);

    let src = if m.r#type.as_str() == "bind" {
        let src = fs::canonicalize(m.source.as_str())?;
-        let dir = if src.is_file() {
-            Path::new(&dest).parent().unwrap()
-        } else {
+        let dir = if src.is_dir() {
            Path::new(&dest)
+        } else {
+            Path::new(&dest).parent().unwrap()
        };

        let _ = fs::create_dir_all(&dir).map_err(|e| {
@@ -706,7 +748,7 @@ fn mount_from(
        });

        // make sure file exists so we can bind over it
-        if src.is_file() {
+        if !src.is_dir() {
            let _ = OpenOptions::new().create(true).write(true).open(&dest);
        }
        src.to_str().unwrap().to_string()
@@ -874,7 +916,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {

    for m in spec.mounts.iter() {
        if m.destination == "/dev" {
-            let (flags, _) = parse_mount(m);
+            let (flags, _, _) = parse_mount(m);
            if flags.contains(MsFlags::MS_RDONLY) {
                mount(
                    Some("/dev"),
@@ -970,6 +1012,10 @@ fn readonly_path(path: &str) -> Result<()> {
 mod tests {
    use super::*;
    use crate::skip_if_not_root;
+    use std::fs::create_dir;
+    use std::fs::create_dir_all;
+    use std::fs::remove_dir_all;
+    use std::os::unix::fs;
    use std::os::unix::io::AsRawFd;
    use tempfile::tempdir;

@@ -999,7 +1045,7 @@ mod tests {
        );

        let rootfs = tempdir().unwrap();
-        let ret = fs::create_dir(rootfs.path().join("dev"));
+        let ret = create_dir(rootfs.path().join("dev"));
        assert!(ret.is_ok(), "Got: {:?}", ret);

        spec.root = Some(oci::Root {
@@ -1010,8 +1056,8 @@ mod tests {
        // there is no spec.mounts, but should pass
        let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // Adding bad mount point to spec.mounts
        spec.mounts.push(oci::Mount {
@@ -1029,8 +1075,8 @@ mod tests {
            ret
        );
        spec.mounts.pop();
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // mounting a cgroup
        spec.mounts.push(oci::Mount {
@@ -1043,8 +1089,8 @@ mod tests {
        let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
        spec.mounts.pop();
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // mounting /dev
        spec.mounts.push(oci::Mount {
@@ -1081,11 +1127,11 @@ mod tests {
        cgroup_mounts.insert("cpu".to_string(), "cpu".to_string());
        cgroup_mounts.insert("memory".to_string(), "memory".to_string());

-        let ret = fs::create_dir_all(tempdir.path().join("cgroups"));
+        let ret = create_dir_all(tempdir.path().join("cgroups"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
-        let ret = fs::create_dir_all(tempdir.path().join("cpu"));
+        let ret = create_dir_all(tempdir.path().join("cpu"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
-        let ret = fs::create_dir_all(tempdir.path().join("memory"));
+        let ret = create_dir_all(tempdir.path().join("memory"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);

        let ret = mount_cgroups(
@@ -1233,4 +1279,89 @@ mod tests {

        assert!(check_proc_mount(&mount).is_err());
    }
+
+    #[test]
+    fn test_secure_join() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            name: &'a str,
+            rootfs: &'a str,
+            unsafe_path: &'a str,
+            symlink_path: &'a str,
+            result: &'a str,
+        }
+
+        // create tempory directory to simulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path().to_str().unwrap();
+
+        let tests = &[
+            TestData {
+                name: "rootfs_not_exist",
+                rootfs: "/home/rootfs",
+                unsafe_path: "a/b/c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "relative_path",
+                rootfs: "/home/rootfs",
+                unsafe_path: "../../../a/b/c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "skip any ..",
+                rootfs: "/home/rootfs",
+                unsafe_path: "../../../a/../../b/../../c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "rootfs is null",
+                rootfs: "",
+                unsafe_path: "",
+                symlink_path: "",
+                result: "/",
+            },
+            TestData {
+                name: "relative softlink beyond container rootfs",
+                rootfs: rootfs_path,
+                unsafe_path: "1",
+                symlink_path: "../../../",
+                result: rootfs_path,
+            },
+            TestData {
+                name: "abs softlink points to the non-exist directory",
+                rootfs: rootfs_path,
+                unsafe_path: "2",
+                symlink_path: "/dddd",
+                result: &format!("{}/dddd", rootfs_path).as_str().to_owned(),
+            },
+            TestData {
+                name: "abs softlink points to the root",
+                rootfs: rootfs_path,
+                unsafe_path: "3",
+                symlink_path: "/",
+                result: &format!("{}/", rootfs_path).as_str().to_owned(),
+            },
+        ];
+
+        for (i, t) in tests.iter().enumerate() {
+            // Create a string containing details of the test
+            let msg = format!("test[{}]: {:?}", i, t);
+
+            // if is_symlink, then should be prepare the softlink environment
+            if t.symlink_path != "" {
+                fs::symlink(t.symlink_path, format!("{}/{}", t.rootfs, t.unsafe_path)).unwrap();
+            }
+            let result = secure_join(t.rootfs, t.unsafe_path);
+
+            // Update the test details string with the results of the call
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            // Perform the checks
+            assert!(result == t.result, "{}", msg);
+        }
+    }
 }
--- a/src/agent/rustjail/src/pipestream.rs
+++ b/src/agent/rustjail/src/pipestream.rs
@@ -0,0 +1,203 @@
+// Copyright (c) 2020 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! Async support for pipe or something has file descriptor
+
+use nix::unistd;
+use std::{
+    fmt, io,
+    io::{Read, Result, Write},
+    mem,
+    os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd},
+    pin::Pin,
+    task::{Context, Poll},
+};
+
+use futures::ready;
+use tokio::io::{unix::AsyncFd, AsyncRead, AsyncWrite, ReadBuf};
+
+fn set_nonblocking(fd: RawFd) {
+    unsafe {
+        libc::fcntl(fd, libc::F_SETFL, libc::O_NONBLOCK);
+    }
+}
+
+struct StreamFd(RawFd);
+
+impl io::Read for &StreamFd {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match unistd::read(self.0, buf) {
+            Ok(l) => Ok(l),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+}
+
+impl io::Write for &StreamFd {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match unistd::write(self.0, buf) {
+            Ok(l) => Ok(l),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+impl StreamFd {
+    fn close(&mut self) -> io::Result<()> {
+        match unistd::close(self.0) {
+            Ok(()) => Ok(()),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+}
+
+impl Drop for StreamFd {
+    fn drop(&mut self) {
+        self.close().ok();
+    }
+}
+
+impl AsRawFd for StreamFd {
+    fn as_raw_fd(&self) -> RawFd {
+        self.0
+    }
+}
+
+pub struct PipeStream(AsyncFd<StreamFd>);
+
+impl PipeStream {
+    pub fn new(fd: RawFd) -> Result<Self> {
+        set_nonblocking(fd);
+        Ok(Self(AsyncFd::new(StreamFd(fd))?))
+    }
+
+    pub fn from_fd(fd: RawFd) -> Self {
+        unsafe { Self::from_raw_fd(fd) }
+    }
+}
+
+impl AsRawFd for PipeStream {
+    fn as_raw_fd(&self) -> RawFd {
+        self.0.as_raw_fd()
+    }
+}
+
+impl IntoRawFd for PipeStream {
+    fn into_raw_fd(self) -> RawFd {
+        let fd = self.as_raw_fd();
+        mem::forget(self);
+        fd
+    }
+}
+
+impl FromRawFd for PipeStream {
+    unsafe fn from_raw_fd(fd: RawFd) -> Self {
+        Self::new(fd).unwrap()
+    }
+}
+
+impl fmt::Debug for PipeStream {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "PipeStream({})", self.as_raw_fd())
+    }
+}
+
+impl AsyncRead for PipeStream {
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<Result<()>> {
+        let b;
+        unsafe {
+            b = &mut *(buf.unfilled_mut() as *mut [mem::MaybeUninit<u8>] as *mut [u8]);
+        };
+
+        loop {
+            let mut guard = ready!(self.0.poll_read_ready(cx))?;
+
+            match guard.try_io(|inner| inner.get_ref().read(b)) {
+                Ok(Ok(n)) => {
+                    unsafe {
+                        buf.assume_init(n);
+                    }
+                    buf.advance(n);
+                    return Ok(()).into();
+                }
+                Ok(Err(e)) => return Err(e).into(),
+                Err(_would_block) => {
+                    continue;
+                }
+            }
+        }
+    }
+}
+
+impl AsyncWrite for PipeStream {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<io::Result<usize>> {
+        loop {
+            let mut guard = ready!(self.0.poll_write_ready(cx))?;
+
+            match guard.try_io(|inner| inner.get_ref().write(buf)) {
+                Ok(result) => return Poll::Ready(result),
+                Err(_would_block) => continue,
+            }
+        }
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        Poll::Ready(Ok(()))
+    }
+
+    fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        // Do nothing in shutdown is very important
+        // The only right way to shutdown pipe is drop it
+        // Otherwise PipeStream will conflict with its twins
+        // Because they both have same fd, and both registered.
+        Poll::Ready(Ok(()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nix::fcntl::OFlag;
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    #[tokio::test]
+    // Shutdown should never close the inner fd.
+    async fn test_pipestream_shutdown() {
+        let (_, wfd1) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
+        let mut writer1 = PipeStream::new(wfd1).unwrap();
+
+        // if close fd in shutdown, the fd will be reused
+        // and the test will failed
+        let _ = writer1.shutdown().await.unwrap();
+
+        // let _ = unistd::close(wfd1);
+
+        let (rfd2, wfd2) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); // reuse fd number, rfd2 == wfd1
+
+        let mut reader2 = PipeStream::new(rfd2).unwrap();
+        let mut writer2 = PipeStream::new(wfd2).unwrap();
+
+        // deregister writer1, then reader2 which has the same fd will be deregistered from epoll
+        drop(writer1);
+
+        let _ = writer2.write(b"1").await;
+
+        let mut content = vec![0u8; 1];
+        // Will Block here if shutdown close the fd.
+        let _ = reader2.read(&mut content).await;
+    }
+}
--- a/src/agent/rustjail/src/process.rs
+++ b/src/agent/rustjail/src/process.rs
@@ -6,7 +6,7 @@
 use libc::pid_t;
 use std::fs::File;
 use std::os::unix::io::RawFd;
-use std::sync::mpsc::Sender;
+use tokio::sync::mpsc::Sender;

 use nix::fcntl::{fcntl, FcntlArg, OFlag};
 use nix::sys::signal::{self, Signal};
@@ -14,18 +14,38 @@ use nix::sys::wait::{self, WaitStatus};
 use nix::unistd::{self, Pid};
 use nix::Result;

-use crate::reaper::Epoller;
 use oci::Process as OCIProcess;
 use slog::Logger;

+use crate::pipestream::PipeStream;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::io::{split, ReadHalf, WriteHalf};
+use tokio::sync::Mutex;
+use tokio::sync::Notify;
+
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+pub enum StreamType {
+    Stdin,
+    Stdout,
+    Stderr,
+    TermMaster,
+    ParentStdin,
+    ParentStdout,
+    ParentStderr,
+}
+
+type Reader = Arc<Mutex<ReadHalf<PipeStream>>>;
+type Writer = Arc<Mutex<WriteHalf<PipeStream>>>;
+
 #[derive(Debug)]
 pub struct Process {
    pub exec_id: String,
    pub stdin: Option<RawFd>,
    pub stdout: Option<RawFd>,
    pub stderr: Option<RawFd>,
-    pub exit_pipe_r: Option<RawFd>,
-    pub exit_pipe_w: Option<RawFd>,
+    pub exit_tx: Option<tokio::sync::watch::Sender<bool>>,
+    pub exit_rx: Option<tokio::sync::watch::Receiver<bool>>,
    pub extra_files: Vec<File>,
    pub term_master: Option<RawFd>,
    pub tty: bool,
@@ -41,7 +61,10 @@ pub struct Process {
    pub exit_watchers: Vec<Sender<i32>>,
    pub oci: OCIProcess,
    pub logger: Logger,
-    pub epoller: Option<Epoller>,
+    pub term_exit_notifier: Arc<Notify>,
+
+    readers: HashMap<StreamType, Reader>,
+    writers: HashMap<StreamType, Writer>,
 }

 pub trait ProcessOperations {
@@ -73,14 +96,15 @@ impl Process {
        pipe_size: i32,
    ) -> Result<Self> {
        let logger = logger.new(o!("subsystem" => "process"));
+        let (exit_tx, exit_rx) = tokio::sync::watch::channel(false);

        let mut p = Process {
            exec_id: String::from(id),
            stdin: None,
            stdout: None,
            stderr: None,
-            exit_pipe_w: None,
-            exit_pipe_r: None,
+            exit_tx: Some(exit_tx),
+            exit_rx: Some(exit_rx),
            extra_files: Vec::new(),
            tty: ocip.terminal,
            term_master: None,
@@ -93,7 +117,9 @@ impl Process {
            exit_watchers: Vec::new(),
            oci: ocip.clone(),
            logger: logger.clone(),
-            epoller: None,
+            term_exit_notifier: Arc::new(Notify::new()),
+            readers: HashMap::new(),
+            writers: HashMap::new(),
        };

        info!(logger, "before create console socket!");
@@ -116,27 +142,58 @@ impl Process {
        Ok(p)
    }

-    pub fn close_epoller(&mut self) {
-        if let Some(epoller) = self.epoller.take() {
-            epoller.close();
+    pub fn notify_term_close(&mut self) {
+        let notify = self.term_exit_notifier.clone();
+        notify.notify_one();
+    }
+
+    fn get_fd(&self, stream_type: &StreamType) -> Option<RawFd> {
+        match stream_type {
+            StreamType::Stdin => self.stdin,
+            StreamType::Stdout => self.stdout,
+            StreamType::Stderr => self.stderr,
+            StreamType::TermMaster => self.term_master,
+            StreamType::ParentStdin => self.parent_stdin,
+            StreamType::ParentStdout => self.parent_stdout,
+            StreamType::ParentStderr => self.parent_stderr,
        }
    }

-    pub fn create_epoller(&mut self) -> anyhow::Result<()> {
-        match self.term_master {
-            Some(term_master) => {
-                // add epoller to process
-                let epoller = Epoller::new(&self.logger, term_master)?;
-                self.epoller = Some(epoller)
-            }
-            None => {
-                info!(
-                    self.logger,
-                    "try to add epoller to a process without a term master fd"
-                );
-            }
+    fn get_stream_and_store(&mut self, stream_type: StreamType) -> Option<(Reader, Writer)> {
+        let fd = self.get_fd(&stream_type)?;
+        let stream = PipeStream::from_fd(fd);
+
+        let (reader, writer) = split(stream);
+        let reader = Arc::new(Mutex::new(reader));
+        let writer = Arc::new(Mutex::new(writer));
+
+        self.readers.insert(stream_type.clone(), reader.clone());
+        self.writers.insert(stream_type, writer.clone());
+
+        Some((reader, writer))
+    }
+
+    pub fn get_reader(&mut self, stream_type: StreamType) -> Option<Reader> {
+        if let Some(reader) = self.readers.get(&stream_type) {
+            return Some(reader.clone());
        }
-        Ok(())
+
+        let (reader, _) = self.get_stream_and_store(stream_type)?;
+        Some(reader)
+    }
+
+    pub fn get_writer(&mut self, stream_type: StreamType) -> Option<Writer> {
+        if let Some(writer) = self.writers.get(&stream_type) {
+            return Some(writer.clone());
+        }
+
+        let (_, writer) = self.get_stream_and_store(stream_type)?;
+        Some(writer)
+    }
+
+    pub fn close_stream(&mut self, stream_type: StreamType) {
+        let _ = self.readers.remove(&stream_type);
+        let _ = self.writers.remove(&stream_type);
    }
 }

@@ -195,7 +252,6 @@ mod tests {

        // -1 by default
        assert_eq!(process.pid, -1);
-        assert!(process.wait().is_err());
        // signal to every process in the process
        // group of the calling process.
        process.pid = 0;
--- a/src/agent/rustjail/src/reaper.rs
+++ b/src/agent/rustjail/src/reaper.rs
@@ -1,150 +0,0 @@
-// Copyright (c) 2020 Ant Group
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use nix::fcntl::OFlag;
-use slog::Logger;
-
-use nix::unistd;
-use std::os::unix::io::RawFd;
-
-use anyhow::Result;
-
-const MAX_EVENTS: usize = 2;
-
-#[derive(Debug, Clone)]
-pub struct Epoller {
-    logger: Logger,
-    epoll_fd: RawFd,
-    // rfd and wfd are a pipe's files two ends, this pipe is
-    // used to sync between the readStdio and the process exits.
-    // once the process exits, it will close one end to notify
-    // the readStdio that the process has exited and it should not
-    // wait on the process's terminal which has been inherited
-    // by it's children and hasn't exited.
-    rfd: RawFd,
-    wfd: RawFd,
-}
-
-impl Epoller {
-    pub fn new(logger: &Logger, fd: RawFd) -> Result<Epoller> {
-        let epoll_fd = epoll::create(true)?;
-        let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-
-        let mut epoller = Self {
-            logger: logger.clone(),
-            epoll_fd,
-            rfd,
-            wfd,
-        };
-
-        epoller.add(rfd)?;
-        epoller.add(fd)?;
-
-        Ok(epoller)
-    }
-
-    pub fn close_wfd(&self) {
-        let _ = unistd::close(self.wfd);
-    }
-
-    pub fn close(&self) {
-        let _ = unistd::close(self.rfd);
-        let _ = unistd::close(self.wfd);
-        let _ = unistd::close(self.epoll_fd);
-    }
-
-    fn add(&mut self, fd: RawFd) -> Result<()> {
-        info!(self.logger, "Epoller add fd {}", fd);
-        // add creates an epoll which is used to monitor the process's pty's master and
-        // one end of its exit notify pipe. Those files will be registered with level-triggered
-        // notification.
-        epoll::ctl(
-            self.epoll_fd,
-            epoll::ControlOptions::EPOLL_CTL_ADD,
-            fd,
-            epoll::Event::new(
-                epoll::Events::EPOLLHUP
-                    | epoll::Events::EPOLLIN
-                    | epoll::Events::EPOLLERR
-                    | epoll::Events::EPOLLRDHUP,
-                fd as u64,
-            ),
-        )?;
-
-        Ok(())
-    }
-
-    // There will be three cases on the epoller once it poll:
-    // a: only pty's master get an event(other than self.rfd);
-    // b: only the pipe get an event(self.rfd);
-    // c: both of pty and pipe have event occur;
-    // for case a, it means there is output in process's terminal and what needed to do is
-    // just read the terminal and send them out; for case b, it means the process has exited
-    // and there is no data in the terminal, thus just return the "EOF" to end the io;
-    // for case c, it means the process has exited but there is some data in the terminal which
-    // hasn't been send out, thus it should send those data out first and then send "EOF" last to
-    // end the io.
-    pub fn poll(&self) -> Result<RawFd> {
-        let mut rfd = self.rfd;
-        let mut epoll_events = vec![epoll::Event::new(epoll::Events::empty(), 0); MAX_EVENTS];
-
-        loop {
-            let event_count = match epoll::wait(self.epoll_fd, -1, epoll_events.as_mut_slice()) {
-                Ok(ec) => ec,
-                Err(e) => {
-                    info!(self.logger, "loop wait err {:?}", e);
-                    // EINTR: The call was interrupted by a signal handler before either
-                    // any of the requested events occurred or the timeout expired
-                    if e.kind() == std::io::ErrorKind::Interrupted {
-                        continue;
-                    }
-                    return Err(e.into());
-                }
-            };
-
-            for event in epoll_events.iter().take(event_count) {
-                let fd = event.data as i32;
-                // fd has been assigned with one end of process's exited pipe by default, and
-                // here to check is there any event occur on process's terminal, if "yes", it
-                // should be dealt first, otherwise, it means the process has exited and there
-                // is nothing left in the process's terminal needed to be read.
-                if fd != rfd {
-                    rfd = fd;
-                    break;
-                }
-            }
-            break;
-        }
-
-        Ok(rfd)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Epoller;
-    use nix::fcntl::OFlag;
-    use nix::unistd;
-    use std::thread;
-
-    #[test]
-    fn test_epoller_poll() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
-        let epoller = Epoller::new(&logger, rfd).unwrap();
-
-        let child = thread::spawn(move || {
-            let _ = unistd::write(wfd, "temporary file's content".as_bytes());
-        });
-
-        // wait write to finish
-        let _ = child.join();
-
-        let fd = epoller.poll().unwrap();
-        assert_eq!(fd, rfd, "Should get rfd");
-
-        epoller.close();
-    }
-}
--- a/src/agent/rustjail/src/sync.rs
+++ b/src/agent/rustjail/src/sync.rs
@@ -14,8 +14,8 @@ pub const SYNC_SUCCESS: i32 = 1;
 pub const SYNC_FAILED: i32 = 2;
 pub const SYNC_DATA: i32 = 3;

-const DATA_SIZE: usize = 100;
-const MSG_SIZE: usize = mem::size_of::<i32>();
+pub const DATA_SIZE: usize = 100;
+pub const MSG_SIZE: usize = mem::size_of::<i32>();

 #[macro_export]
 macro_rules! log_child {
--- a/src/agent/rustjail/src/sync_with_async.rs
+++ b/src/agent/rustjail/src/sync_with_async.rs
@@ -0,0 +1,140 @@
+// Copyright (c) 2020 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! The async version of sync module used for IPC
+
+use crate::pipestream::PipeStream;
+use anyhow::{anyhow, Result};
+use nix::errno::Errno;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+use crate::sync::{DATA_SIZE, MSG_SIZE, SYNC_DATA, SYNC_FAILED, SYNC_SUCCESS};
+
+async fn write_count(pipe_w: &mut PipeStream, buf: &[u8], count: usize) -> Result<usize> {
+    let mut len = 0;
+
+    loop {
+        match pipe_w.write(&buf[len..]).await {
+            Ok(l) => {
+                len += l;
+                if len == count {
+                    break;
+                }
+            }
+
+            Err(e) => {
+                if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
+                    return Err(e.into());
+                }
+            }
+        }
+    }
+
+    Ok(len)
+}
+
+async fn read_count(pipe_r: &mut PipeStream, count: usize) -> Result<Vec<u8>> {
+    let mut v: Vec<u8> = vec![0; count];
+    let mut len = 0;
+
+    loop {
+        match pipe_r.read(&mut v[len..]).await {
+            Ok(l) => {
+                len += l;
+                if len == count || l == 0 {
+                    break;
+                }
+            }
+
+            Err(e) => {
+                if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
+                    return Err(e.into());
+                }
+            }
+        }
+    }
+
+    Ok(v[0..len].to_vec())
+}
+
+pub async fn read_async(pipe_r: &mut PipeStream) -> Result<Vec<u8>> {
+    let buf = read_count(pipe_r, MSG_SIZE).await?;
+    if buf.len() != MSG_SIZE {
+        return Err(anyhow!(
+            "process: {} failed to receive async message from peer: got msg length: {}, expected: {}",
+            std::process::id(),
+            buf.len(),
+            MSG_SIZE
+        ));
+    }
+    let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
+    let msg: i32 = i32::from_be_bytes(buf_array);
+    match msg {
+        SYNC_SUCCESS => Ok(Vec::new()),
+        SYNC_DATA => {
+            let buf = read_count(pipe_r, MSG_SIZE).await?;
+            let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
+            let msg_length: i32 = i32::from_be_bytes(buf_array);
+            let data_buf = read_count(pipe_r, msg_length as usize).await?;
+
+            Ok(data_buf)
+        }
+        SYNC_FAILED => {
+            let mut error_buf = vec![];
+            loop {
+                let buf = read_count(pipe_r, DATA_SIZE).await?;
+
+                error_buf.extend(&buf);
+                if DATA_SIZE == buf.len() {
+                    continue;
+                } else {
+                    break;
+                }
+            }
+
+            let error_str = match std::str::from_utf8(&error_buf) {
+                Ok(v) => String::from(v),
+                Err(e) => {
+                    return Err(
+                        anyhow!(e).context("receive error message from child process failed")
+                    );
+                }
+            };
+
+            Err(anyhow!(error_str))
+        }
+        _ => Err(anyhow!("error in receive sync message")),
+    }
+}
+
+pub async fn write_async(pipe_w: &mut PipeStream, msg_type: i32, data_str: &str) -> Result<()> {
+    let buf = msg_type.to_be_bytes();
+    let count = write_count(pipe_w, &buf, MSG_SIZE).await?;
+    if count != MSG_SIZE {
+        return Err(anyhow!("error in send sync message"));
+    }
+
+    match msg_type {
+        SYNC_FAILED => {
+            if let Err(e) = write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
+                return Err(anyhow!(e).context("error in send message to process"));
+            }
+        }
+        SYNC_DATA => {
+            let length: i32 = data_str.len() as i32;
+            write_count(pipe_w, &length.to_be_bytes(), MSG_SIZE)
+                .await
+                .map_err(|e| anyhow!(e).context("error in send message to process"))?;
+
+            write_count(pipe_w, data_str.as_bytes(), data_str.len())
+                .await
+                .map_err(|e| anyhow!(e).context("error in send message to process"))?;
+        }
+
+        _ => (),
+    };
+
+    Ok(())
+}
--- a/src/agent/rustjail/src/utils.rs
+++ b/src/agent/rustjail/src/utils.rs
@@ -0,0 +1,119 @@
+// Copyright (c) 2021 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+use anyhow::{anyhow, Context, Result};
+use libc::gid_t;
+use libc::uid_t;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+
+const PASSWD_FILE: &str = "/etc/passwd";
+
+// An entry from /etc/passwd
+#[derive(Debug, PartialEq, PartialOrd)]
+pub struct PasswdEntry {
+    // username
+    pub name: String,
+    // user password
+    pub passwd: String,
+    // user id
+    pub uid: uid_t,
+    // group id
+    pub gid: gid_t,
+    // user Information
+    pub gecos: String,
+    // home directory
+    pub dir: String,
+    // User's Shell
+    pub shell: String,
+}
+
+// get an entry for a given `uid` from `/etc/passwd`
+fn get_entry_by_uid(uid: uid_t, path: &str) -> Result<PasswdEntry> {
+    let file = File::open(path).with_context(|| format!("open file {}", path))?;
+    let mut reader = BufReader::new(file);
+
+    let mut line = String::new();
+    loop {
+        line.clear();
+        match reader.read_line(&mut line) {
+            Ok(0) => return Err(anyhow!(format!("file {} is empty", path))),
+            Ok(_) => (),
+            Err(e) => {
+                return Err(anyhow!(format!(
+                    "failed to read file {} with {:?}",
+                    path, e
+                )))
+            }
+        }
+
+        if line.starts_with('#') {
+            continue;
+        }
+
+        let parts: Vec<&str> = line.split(':').map(|part| part.trim()).collect();
+        if parts.len() != 7 {
+            continue;
+        }
+
+        match parts[2].parse() {
+            Err(_e) => continue,
+            Ok(new_uid) => {
+                if uid != new_uid {
+                    continue;
+                }
+
+                let entry = PasswdEntry {
+                    name: parts[0].to_string(),
+                    passwd: parts[1].to_string(),
+                    uid: new_uid,
+                    gid: parts[3].parse().unwrap_or(0),
+                    gecos: parts[4].to_string(),
+                    dir: parts[5].to_string(),
+                    shell: parts[6].to_string(),
+                };
+
+                return Ok(entry);
+            }
+        }
+    }
+}
+
+pub fn home_dir(uid: uid_t) -> Result<String> {
+    get_entry_by_uid(uid, PASSWD_FILE).map(|entry| entry.dir)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+    use tempfile::Builder;
+
+    #[test]
+    fn test_get_entry_by_uid() {
+        let tmpdir = Builder::new().tempdir().unwrap();
+        let tmpdir_path = tmpdir.path().to_str().unwrap();
+        let temp_passwd = format!("{}/passwd", tmpdir_path);
+
+        let mut tempf = File::create(temp_passwd.as_str()).unwrap();
+        writeln!(tempf, "root:x:0:0:root:/root0:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:1:0:root:/root1:/bin/bash").unwrap();
+        writeln!(tempf, "#root:x:1:0:root:/rootx:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:2:0:root:/root2:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:3:0:root:/root3").unwrap();
+        writeln!(tempf, "root:x:3:0:root:/root3:/bin/bash").unwrap();
+
+        let entry = get_entry_by_uid(0, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root0");
+
+        let entry = get_entry_by_uid(1, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root1");
+
+        let entry = get_entry_by_uid(2, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root2");
+
+        let entry = get_entry_by_uid(3, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root3");
+    }
+}
--- a/src/agent/rustjail/src/validator.rs
+++ b/src/agent/rustjail/src/validator.rs
@@ -6,7 +6,7 @@
 use crate::container::Config;
 use anyhow::{anyhow, Context, Error, Result};
 use nix::errno::Errno;
-use oci::{Linux, LinuxIDMapping, LinuxNamespace, Spec};
+use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
 use std::collections::HashMap;
 use std::path::{Component, PathBuf};

@@ -28,16 +28,6 @@ fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool {
    false
 }

-fn get_namespace_path(nses: &[LinuxNamespace], key: &str) -> Result<String> {
-    for ns in nses {
-        if ns.r#type.as_str() == key {
-            return Ok(ns.path.clone());
-        }
-    }
-
-    Err(einval())
-}
-
 fn rootfs(root: &str) -> Result<()> {
    let path = PathBuf::from(root);
    // not absolute path or not exists
@@ -78,12 +68,8 @@ fn rootfs(root: &str) -> Result<()> {
    Ok(())
 }

-fn network(_oci: &Spec) -> Result<()> {
-    Ok(())
-}
-
 fn hostname(oci: &Spec) -> Result<()> {
-    if oci.hostname.is_empty() || oci.hostname == "" {
+    if oci.hostname.is_empty() {
        return Ok(());
    }

@@ -111,7 +97,7 @@ fn security(oci: &Spec) -> Result<()> {
    Ok(())
 }

-fn idmapping(maps: &[LinuxIDMapping]) -> Result<()> {
+fn idmapping(maps: &[LinuxIdMapping]) -> Result<()> {
    for map in maps {
        if map.size > 0 {
            return Ok(());
@@ -170,31 +156,6 @@ lazy_static! {
    };
 }

-fn check_host_ns(path: &str) -> Result<()> {
-    let cpath = PathBuf::from(path);
-    let hpath = PathBuf::from("/proc/self/ns/net");
-
-    let real_hpath = hpath
-        .read_link()
-        .context(format!("read link {:?}", hpath))?;
-    let meta = cpath
-        .symlink_metadata()
-        .context(format!("symlink metadata {:?}", cpath))?;
-    let file_type = meta.file_type();
-
-    if !file_type.is_symlink() {
-        return Ok(());
-    }
-    let real_cpath = cpath
-        .read_link()
-        .context(format!("read link {:?}", cpath))?;
-    if real_cpath == real_hpath {
-        return Err(einval());
-    }
-
-    Ok(())
-}
-
 fn sysctl(oci: &Spec) -> Result<()> {
    let linux = get_linux(oci)?;

@@ -242,7 +203,7 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
    Ok(())
 }

-fn has_idmapping(maps: &[LinuxIDMapping], id: u32) -> bool {
+fn has_idmapping(maps: &[LinuxIdMapping], id: u32) -> bool {
    for map in maps {
        if id >= map.container_id && id < map.container_id + map.size {
            return true;
@@ -301,7 +262,6 @@ pub fn validate(conf: &Config) -> Result<()> {
    };

    rootfs(root).context("rootfs")?;
-    network(oci).context("network")?;
    hostname(oci).context("hostname")?;
    security(oci).context("security")?;
    usernamespace(oci).context("usernamespace")?;
@@ -339,19 +299,6 @@ mod tests {
        assert_eq!(contain_namespace(&namespaces, ""), false);
        assert_eq!(contain_namespace(&namespaces, "Net"), false);
        assert_eq!(contain_namespace(&namespaces, "ipc"), false);
-
-        assert_eq!(
-            get_namespace_path(&namespaces, "net").unwrap(),
-            "/sys/cgroups/net"
-        );
-        assert_eq!(
-            get_namespace_path(&namespaces, "uts").unwrap(),
-            "/sys/cgroups/uts"
-        );
-
-        get_namespace_path(&namespaces, "").unwrap_err();
-        get_namespace_path(&namespaces, "Uts").unwrap_err();
-        get_namespace_path(&namespaces, "ipc").unwrap_err();
    }

    #[test]
@@ -446,7 +393,7 @@ mod tests {
        usernamespace(&spec).unwrap();

        let mut linux = Linux::default();
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 0,
@@ -455,7 +402,7 @@ mod tests {
        usernamespace(&spec).unwrap_err();

        let mut linux = Linux::default();
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 100,
@@ -502,12 +449,12 @@ mod tests {
                path: "/sys/cgroups/user".to_owned(),
            },
        ];
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 1000,
        }];
-        linux.gid_mappings = vec![LinuxIDMapping {
+        linux.gid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 1000,
@@ -533,12 +480,6 @@ mod tests {
        rootless_euid(&spec).unwrap();
    }

-    #[test]
-    fn test_check_host_ns() {
-        check_host_ns("/proc/self/ns/net").unwrap_err();
-        check_host_ns("/proc/sys/net/ipv4/tcp_sack").unwrap();
-    }
-
    #[test]
    fn test_sysctl() {
        let mut spec = Spec::default();
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
@@ -2,14 +2,18 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 //
+use crate::tracer;
 use anyhow::{anyhow, Result};
 use std::env;
 use std::fs;
 use std::time;
+use tracing::instrument;

 const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
 const DEV_MODE_FLAG: &str = "agent.devmode";
+const TRACE_MODE_OPTION: &str = "agent.trace";
 const LOG_LEVEL_OPTION: &str = "agent.log";
+const SERVER_ADDR_OPTION: &str = "agent.server_addr";
 const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
 const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
 const LOG_VPORT_OPTION: &str = "agent.log_vport";
@@ -25,13 +29,26 @@ const VSOCK_PORT: u16 = 1024;
 // Environment variables used for development and testing
 const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
 const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";
+const TRACE_TYPE_ENV_VAR: &str = "KATA_AGENT_TRACE_TYPE";

-// FIXME: unused
-const TRACE_MODE_FLAG: &str = "agent.trace";
-const USE_VSOCK_FLAG: &str = "agent.use_vsock";
+const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
+const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
+const ERR_INVALID_GET_VALUE_PARAM: &str = "expected name=value";
+const ERR_INVALID_GET_VALUE_NO_NAME: &str = "name=value parameter missing name";
+const ERR_INVALID_GET_VALUE_NO_VALUE: &str = "name=value parameter missing value";
+const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
+
+const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
+const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
+const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
+
+const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
+const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
+const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
+const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";

 #[derive(Debug)]
-pub struct agentConfig {
+pub struct AgentConfig {
    pub debug_console: bool,
    pub dev_mode: bool,
    pub log_level: slog::Level,
@@ -41,6 +58,7 @@ pub struct agentConfig {
    pub container_pipe_size: i32,
    pub server_addr: String,
    pub unified_cgroup_hierarchy: bool,
+    pub tracing: tracer::TraceType,
 }

 // parse_cmdline_param parse commandline parameters.
@@ -73,9 +91,9 @@ macro_rules! parse_cmdline_param {
    };
 }

-impl agentConfig {
-    pub fn new() -> agentConfig {
-        agentConfig {
+impl AgentConfig {
+    pub fn new() -> AgentConfig {
+        AgentConfig {
            debug_console: false,
            dev_mode: false,
            log_level: DEFAULT_LOG_LEVEL,
@@ -85,9 +103,11 @@ impl agentConfig {
            container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
            server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
            unified_cgroup_hierarchy: false,
+            tracing: tracer::TraceType::Disabled,
        }
    }

+    #[instrument]
    pub fn parse_cmdline(&mut self, file: &str) -> Result<()> {
        let cmdline = fs::read_to_string(file)?;
        let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
@@ -96,8 +116,23 @@ impl agentConfig {
            parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
            parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);

+            // Support "bare" tracing option for backwards compatibility with
+            // Kata 1.x.
+            if param == &TRACE_MODE_OPTION {
+                self.tracing = tracer::TraceType::Isolated;
+                continue;
+            }
+
+            parse_cmdline_param!(param, TRACE_MODE_OPTION, self.tracing, get_trace_type);
+
            // parse cmdline options
            parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
+            parse_cmdline_param!(
+                param,
+                SERVER_ADDR_OPTION,
+                self.server_addr,
+                get_string_value
+            );

            // ensure the timeout is a positive value
            parse_cmdline_param!(
@@ -105,7 +140,7 @@ impl agentConfig {
                HOTPLUG_TIMOUT_OPTION,
                self.hotplug_timeout,
                get_hotplug_timeout,
-                |hotplugTimeout: time::Duration| hotplugTimeout.as_secs() > 0
+                |hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
            );

            // vsock port should be positive values
@@ -148,10 +183,17 @@ impl agentConfig {
            }
        }

+        if let Ok(value) = env::var(TRACE_TYPE_ENV_VAR) {
+            if let Ok(result) = value.parse::<tracer::TraceType>() {
+                self.tracing = result;
+            }
+        }
+
        Ok(())
    }
 }

+#[instrument]
 fn get_vsock_port(p: &str) -> Result<i32> {
    let fields: Vec<&str> = p.split('=').collect();
    if fields.len() != 2 {
@@ -166,6 +208,7 @@ fn get_vsock_port(p: &str) -> Result<i32> {
 //
 // Note: Logrus names are used for compatability with the previous
 // golang-based agent.
+#[instrument]
 fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
    let level = match logrus_level {
        // Note: different semantics to logrus: log, but don't panic.
@@ -181,47 +224,71 @@ fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
        "trace" => slog::Level::Trace,

        _ => {
-            return Err(anyhow!("invalid log level"));
+            return Err(anyhow!(ERR_INVALID_LOG_LEVEL));
        }
    };

    Ok(level)
 }

+#[instrument]
 fn get_log_level(param: &str) -> Result<slog::Level> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid log level parameter"));
+        return Err(anyhow!(ERR_INVALID_LOG_LEVEL_PARAM));
    }

    if fields[0] != LOG_LEVEL_OPTION {
-        Err(anyhow!("invalid log level key name"))
+        Err(anyhow!(ERR_INVALID_LOG_LEVEL_KEY))
    } else {
        Ok(logrus_to_slog_level(fields[1])?)
    }
 }

+#[instrument]
+fn get_trace_type(param: &str) -> Result<tracer::TraceType> {
+    if param.is_empty() {
+        return Err(anyhow!("invalid trace type parameter"));
+    }
+
+    let fields: Vec<&str> = param.split('=').collect();
+
+    if fields[0] != TRACE_MODE_OPTION {
+        return Err(anyhow!("invalid trace type key name"));
+    }
+
+    if fields.len() == 1 {
+        return Ok(tracer::TraceType::Isolated);
+    }
+
+    let result = fields[1].parse::<tracer::TraceType>()?;
+
+    Ok(result)
+}
+
+#[instrument]
 fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid hotplug timeout parameter"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT));
    }

    let key = fields[0];
    if key != HOTPLUG_TIMOUT_OPTION {
-        return Err(anyhow!("invalid hotplug timeout key name"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_KEY));
    }

    let value = fields[1].parse::<u64>();
    if value.is_err() {
-        return Err(anyhow!("unable to parse hotplug timeout"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_PARAM));
    }

    Ok(time::Duration::from_secs(value.unwrap()))
 }

+#[instrument]
 fn get_bool_value(param: &str) -> Result<bool> {
    let fields: Vec<&str> = param.split('=').collect();

@@ -238,26 +305,56 @@ fn get_bool_value(param: &str) -> Result<bool> {
    })
 }

+// Return the value from a "name=value" string.
+//
+// Note:
+//
+// - A name *and* a value is required.
+// - A value can contain any number of equal signs.
+// - We could/should maybe check if the name is pure whitespace
+//   since this is considered to be invalid.
+#[instrument]
+fn get_string_value(param: &str) -> Result<String> {
+    let fields: Vec<&str> = param.split('=').collect();
+
+    if fields.len() < 2 {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_PARAM));
+    }
+
+    // We need name (but the value can be blank)
+    if fields[0].is_empty() {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_NAME));
+    }
+
+    let value = fields[1..].join("=");
+    if value.is_empty() {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_VALUE));
+    }
+
+    Ok(value)
+}
+
+#[instrument]
 fn get_container_pipe_size(param: &str) -> Result<i32> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid container pipe size parameter"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE));
    }

    let key = fields[0];
    if key != CONTAINER_PIPE_SIZE_OPTION {
-        return Err(anyhow!("invalid container pipe size key name"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_KEY));
    }

    let res = fields[1].parse::<i32>();
    if res.is_err() {
-        return Err(anyhow!("unable to parse container pipe size"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM));
    }

    let value = res.unwrap();
    if value < 0 {
-        return Err(anyhow!("container pipe size should not be negative"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_NEGATIVE));
    }

    Ok(value)
@@ -272,18 +369,9 @@ mod tests {
    use std::time;
    use tempfile::tempdir;

-    const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
-    const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
-    const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
-
-    const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
-    const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
-    const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
-
-    const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
-    const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
-    const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
-    const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
+    const ERR_INVALID_TRACE_TYPE_PARAM: &str = "invalid trace type parameter";
+    const ERR_INVALID_TRACE_TYPE: &str = "invalid trace type";
+    const ERR_INVALID_TRACE_TYPE_KEY: &str = "invalid trace type key name";

    // helper function to make errors less crazy-long
    fn make_err(desc: &str) -> Error {
@@ -300,22 +388,25 @@ mod tests {
            if $expected_result.is_ok() {
                let expected_level = $expected_result.as_ref().unwrap();
                let actual_level = $actual_result.unwrap();
-                assert!(*expected_level == actual_level, $msg);
+                assert!(*expected_level == actual_level, "{}", $msg);
            } else {
                let expected_error = $expected_result.as_ref().unwrap_err();
-                let actual_error = $actual_result.unwrap_err();
-
                let expected_error_msg = format!("{:?}", expected_error);
-                let actual_error_msg = format!("{:?}", actual_error);

-                assert!(expected_error_msg == actual_error_msg, $msg);
+                if let Err(actual_error) = $actual_result {
+                    let actual_error_msg = format!("{:?}", actual_error);
+
+                    assert!(expected_error_msg == actual_error_msg, "{}", $msg);
+                } else {
+                    assert!(expected_error_msg == "expected error, got OK", "{}", $msg);
+                }
            }
        };
    }

    #[test]
    fn test_new() {
-        let config = agentConfig::new();
+        let config = AgentConfig::new();
        assert_eq!(config.debug_console, false);
        assert_eq!(config.dev_mode, false);
        assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -337,6 +428,7 @@ mod tests {
            container_pipe_size: i32,
            server_addr: &'a str,
            unified_cgroup_hierarchy: bool,
+            tracing: tracer::TraceType,
        }

        let tests = &[
@@ -350,6 +442,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.debug_console agent.devmodex",
@@ -361,6 +454,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.logx=debug",
@@ -372,6 +466,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.log=debug",
@@ -383,6 +478,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.log=debug",
@@ -394,6 +490,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -405,6 +502,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo",
@@ -416,6 +514,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo bar",
@@ -427,6 +526,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo bar",
@@ -438,6 +538,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent bar",
@@ -449,6 +550,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo debug_console agent bar devmode",
@@ -460,6 +562,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.debug_console",
@@ -471,6 +574,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "   agent.debug_console ",
@@ -482,6 +586,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.debug_console foo",
@@ -493,6 +598,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: " agent.debug_console foo",
@@ -504,6 +610,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.debug_console bar",
@@ -515,6 +622,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.debug_console",
@@ -526,6 +634,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.debug_console ",
@@ -537,6 +646,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode",
@@ -548,6 +658,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "   agent.devmode ",
@@ -559,6 +670,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode foo",
@@ -570,6 +682,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: " agent.devmode foo",
@@ -581,6 +694,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.devmode bar",
@@ -592,6 +706,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.devmode",
@@ -603,6 +718,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "foo agent.devmode ",
@@ -614,6 +730,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console",
@@ -625,6 +742,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.hotplug_timeout=100 agent.unified_cgroup_hierarchy=a",
@@ -636,6 +754,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.hotplug_timeout=0 agent.unified_cgroup_hierarchy=11",
@@ -647,6 +766,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: true,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pipe_size=2097152 agent.unified_cgroup_hierarchy=false",
@@ -658,6 +778,7 @@ mod tests {
                container_pipe_size: 2097152,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pipe_size=100 agent.unified_cgroup_hierarchy=true",
@@ -669,6 +790,7 @@ mod tests {
                container_pipe_size: 100,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: true,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pipe_size=0 agent.unified_cgroup_hierarchy=0",
@@ -680,6 +802,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "agent.devmode agent.debug_console agent.container_pip_siz=100 agent.unified_cgroup_hierarchy=1",
@@ -691,6 +814,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: true,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -702,6 +826,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -713,6 +838,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "foo",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -724,6 +850,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "=",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -735,6 +862,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "=foo",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -746,6 +874,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "foo=bar=baz=",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -757,6 +886,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix:///tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -768,6 +898,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: "unix://@/tmp/foo.socket",
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -779,6 +910,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -790,6 +922,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -801,6 +934,7 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
            TestData {
                contents: "",
@@ -812,6 +946,175 @@ mod tests {
                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.server_address=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: " agent.server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: " agent.server_addr=unix:///tmp/foo.socket a",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "trace",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: ".trace",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.tracer",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.trac",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "agent.trace",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Isolated,
+            },
+            TestData {
+                contents: "agent.trace=isolated",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Isolated,
+            },
+            TestData {
+                contents: "agent.trace=disabled",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
+            },
+            TestData {
+                contents: "",
+                env_vars: vec!["KATA_AGENT_TRACE_TYPE=isolated"],
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Isolated,
+            },
+            TestData {
+                contents: "",
+                env_vars: vec!["KATA_AGENT_TRACE_TYPE=disabled"],
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+                tracing: tracer::TraceType::Disabled,
            },
        ];

@@ -822,7 +1125,7 @@ mod tests {

        let filename = file_path.to_str().expect("failed to create filename");

-        let mut config = agentConfig::new();
+        let mut config = AgentConfig::new();
        let result = config.parse_cmdline(&filename.to_owned());
        assert!(result.is_err());

@@ -854,7 +1157,7 @@ mod tests {
                vars_to_unset.push(name);
            }

-            let mut config = agentConfig::new();
+            let mut config = AgentConfig::new();
            assert_eq!(config.debug_console, false, "{}", msg);
            assert_eq!(config.dev_mode, false, "{}", msg);
            assert_eq!(config.unified_cgroup_hierarchy, false, "{}", msg);
@@ -866,6 +1169,7 @@ mod tests {
            );
            assert_eq!(config.container_pipe_size, 0, "{}", msg);
            assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
+            assert_eq!(config.tracing, tracer::TraceType::Disabled, "{}", msg);

            let result = config.parse_cmdline(filename);
            assert!(result.is_ok(), "{}", msg);
@@ -881,6 +1185,7 @@ mod tests {
            assert_eq!(d.hotplug_timeout, config.hotplug_timeout, "{}", msg);
            assert_eq!(d.container_pipe_size, config.container_pipe_size, "{}", msg);
            assert_eq!(d.server_addr, config.server_addr, "{}", msg);
+            assert_eq!(d.tracing, config.tracing, "{}", msg);

            for v in vars_to_unset {
                env::remove_var(v);
@@ -1199,4 +1504,140 @@ mod tests {
            assert_result!(d.result, result, msg);
        }
    }
+
+    #[test]
+    fn test_get_string_value() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            param: &'a str,
+            result: Result<String>,
+        }
+
+        let tests = &[
+            TestData {
+                param: "",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_PARAM)),
+            },
+            TestData {
+                param: "=",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
+            },
+            TestData {
+                param: "==",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
+            },
+            TestData {
+                param: "x=",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_VALUE)),
+            },
+            TestData {
+                param: "x==",
+                result: Ok("=".into()),
+            },
+            TestData {
+                param: "x===",
+                result: Ok("==".into()),
+            },
+            TestData {
+                param: "x==x",
+                result: Ok("=x".into()),
+            },
+            TestData {
+                param: "x=x",
+                result: Ok("x".into()),
+            },
+            TestData {
+                param: "x=x=",
+                result: Ok("x=".into()),
+            },
+            TestData {
+                param: "x=x=x",
+                result: Ok("x=x".into()),
+            },
+            TestData {
+                param: "foo=bar",
+                result: Ok("bar".into()),
+            },
+            TestData {
+                param: "x= =",
+                result: Ok(" =".into()),
+            },
+            TestData {
+                param: "x= =",
+                result: Ok(" =".into()),
+            },
+            TestData {
+                param: "x= = ",
+                result: Ok(" = ".into()),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = get_string_value(d.param);
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            assert_result!(d.result, result, msg);
+        }
+    }
+
+    #[test]
+    fn test_get_trace_type() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            param: &'a str,
+            result: Result<tracer::TraceType>,
+        }
+
+        let tests = &[
+            TestData {
+                param: "",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE_PARAM)),
+            },
+            TestData {
+                param: "agent.tracer",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE_KEY)),
+            },
+            TestData {
+                param: "agent.trac",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE_KEY)),
+            },
+            TestData {
+                param: "agent.trace=",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
+            },
+            TestData {
+                param: "agent.trace==",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
+            },
+            TestData {
+                param: "agent.trace=foo",
+                result: Err(make_err(ERR_INVALID_TRACE_TYPE)),
+            },
+            TestData {
+                param: "agent.trace",
+                result: Ok(tracer::TraceType::Isolated),
+            },
+            TestData {
+                param: "agent.trace=isolated",
+                result: Ok(tracer::TraceType::Isolated),
+            },
+            TestData {
+                param: "agent.trace=disabled",
+                result: Ok(tracer::TraceType::Disabled),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = get_trace_type(d.param);
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            assert_result!(d.result, result, msg);
+        }
+    }
 }
--- a/src/agent/src/console.rs
+++ b/src/agent/src/console.rs
@@ -0,0 +1,295 @@
+// Copyright (c) 2021 Ant Group
+// Copyright (c) 2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::util;
+use anyhow::{anyhow, Result};
+use nix::fcntl::{self, FcntlArg, FdFlag, OFlag};
+use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
+use nix::pty::{openpty, OpenptyResult};
+use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
+use nix::sys::stat::Mode;
+use nix::sys::wait;
+use nix::unistd::{self, close, dup2, fork, setsid, ForkResult, Pid};
+use rustjail::pipestream::PipeStream;
+use slog::Logger;
+use std::ffi::CString;
+use std::os::unix::io::{FromRawFd, RawFd};
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::sync::Mutex as SyncMutex;
+
+use futures::StreamExt;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::select;
+use tokio::sync::watch::Receiver;
+
+const CONSOLE_PATH: &str = "/dev/console";
+
+lazy_static! {
+    static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
+        let mut v = Vec::new();
+
+        if !cfg!(test) {
+            v.push("/bin/bash".to_string());
+            v.push("/bin/sh".to_string());
+        }
+
+        Arc::new(SyncMutex::new(v))
+    };
+}
+
+pub fn initialize() {
+    lazy_static::initialize(&SHELLS);
+}
+
+pub async fn debug_console_handler(
+    logger: Logger,
+    port: u32,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "debug-console"));
+
+    let shells = SHELLS.lock().unwrap().to_vec();
+
+    let shell = shells
+        .into_iter()
+        .find(|sh| PathBuf::from(sh).exists())
+        .ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
+
+    if port > 0 {
+        let listenfd = socket::socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )?;
+        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
+        socket::bind(listenfd, &addr)?;
+        socket::listen(listenfd, 1)?;
+
+        let mut incoming = util::get_vsock_incoming(listenfd);
+
+        loop {
+            select! {
+                _ = shutdown.changed() => {
+                    info!(logger, "debug console got shutdown request");
+                    break;
+                }
+
+                conn = incoming.next() => {
+                    if let Some(conn) = conn {
+                        // Accept a new connection
+                        match conn {
+                            Ok(stream) => {
+                                let logger = logger.clone();
+                                let shell = shell.clone();
+                                // Do not block(await) here, or we'll never receive the shutdown signal
+                                tokio::spawn(async move {
+                                    let _ = run_debug_console_vsock(logger, shell, stream).await;
+                                });
+                            }
+                            Err(e) => {
+                                error!(logger, "{:?}", e);
+                            }
+                        }
+                    } else {
+                        break;
+                    }
+                }
+            }
+        }
+    } else {
+        let mut flags = OFlag::empty();
+        flags.insert(OFlag::O_RDWR);
+        flags.insert(OFlag::O_CLOEXEC);
+
+        let fd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
+
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "debug console got shutdown request");
+            }
+
+            result = run_debug_console_serial(shell.clone(), fd) => {
+               match result {
+                   Ok(_) => {
+                       info!(logger, "run_debug_console_shell session finished");
+                   }
+                   Err(err) => {
+                       error!(logger, "run_debug_console_shell failed: {:?}", err);
+                   }
+               }
+            }
+        }
+    };
+
+    Ok(())
+}
+
+fn run_in_child(slave_fd: libc::c_int, shell: String) -> Result<()> {
+    // create new session with child as session leader
+    setsid()?;
+
+    // dup stdin, stdout, stderr to let child act as a terminal
+    dup2(slave_fd, STDIN_FILENO)?;
+    dup2(slave_fd, STDOUT_FILENO)?;
+    dup2(slave_fd, STDERR_FILENO)?;
+
+    // set tty
+    unsafe {
+        libc::ioctl(0, libc::TIOCSCTTY);
+    }
+
+    let cmd = CString::new(shell).unwrap();
+    let args: Vec<CString> = Vec::new();
+
+    // run shell
+    let _ = unistd::execvp(cmd.as_c_str(), &args).map_err(|e| match e {
+        nix::Error::Sys(errno) => {
+            std::process::exit(errno as i32);
+        }
+        _ => std::process::exit(-2),
+    });
+
+    Ok(())
+}
+
+async fn run_in_parent<T: AsyncRead + AsyncWrite>(
+    logger: Logger,
+    stream: T,
+    pseudo: OpenptyResult,
+    child_pid: Pid,
+) -> Result<()> {
+    info!(logger, "get debug shell pid {:?}", child_pid);
+
+    let master_fd = pseudo.master;
+    let _ = close(pseudo.slave);
+
+    let (mut socket_reader, mut socket_writer) = tokio::io::split(stream);
+    let (mut master_reader, mut master_writer) = tokio::io::split(PipeStream::from_fd(master_fd));
+
+    select! {
+        res = tokio::io::copy(&mut master_reader, &mut socket_writer) => {
+            debug!(
+                logger,
+                "master closed: {:?}", res
+            );
+        }
+        res = tokio::io::copy(&mut socket_reader, &mut master_writer) => {
+            info!(
+                logger,
+                "socket closed: {:?}", res
+            );
+        }
+    }
+
+    let wait_status = wait::waitpid(child_pid, None);
+    info!(logger, "debug console process exit code: {:?}", wait_status);
+
+    Ok(())
+}
+
+async fn run_debug_console_vsock<T: AsyncRead + AsyncWrite>(
+    logger: Logger,
+    shell: String,
+    stream: T,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "debug-console-shell"));
+
+    let pseudo = openpty(None, None)?;
+    let _ = fcntl::fcntl(pseudo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
+    let _ = fcntl::fcntl(pseudo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
+
+    let slave_fd = pseudo.slave;
+
+    match unsafe { fork() } {
+        Ok(ForkResult::Child) => run_in_child(slave_fd, shell),
+        Ok(ForkResult::Parent { child: child_pid }) => {
+            run_in_parent(logger.clone(), stream, pseudo, child_pid).await
+        }
+        Err(err) => Err(anyhow!("fork error: {:?}", err)),
+    }
+}
+
+async fn run_debug_console_serial(shell: String, fd: RawFd) -> Result<()> {
+    let mut child = match tokio::process::Command::new(shell)
+        .arg("-i")
+        .kill_on_drop(true)
+        .stdin(unsafe { Stdio::from_raw_fd(fd) })
+        .stdout(unsafe { Stdio::from_raw_fd(fd) })
+        .stderr(unsafe { Stdio::from_raw_fd(fd) })
+        .spawn()
+    {
+        Ok(c) => c,
+        Err(_) => return Err(anyhow!("failed to spawn shell")),
+    };
+
+    child.wait().await?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+    use tokio::sync::watch;
+
+    #[tokio::test]
+    async fn test_setup_debug_console_no_shells() {
+        {
+            // Guarantee no shells have been added
+            // (required to avoid racing with
+            // test_setup_debug_console_invalid_shell()).
+            let shells_ref = SHELLS.clone();
+            let mut shells = shells_ref.lock().unwrap();
+            shells.clear();
+        }
+
+        let logger = slog_scope::logger();
+
+        let (_, rx) = watch::channel(true);
+        let result = debug_console_handler(logger, 0, rx).await;
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "no shell found to launch debug console"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_setup_debug_console_invalid_shell() {
+        {
+            let shells_ref = SHELLS.clone();
+            let mut shells = shells_ref.lock().unwrap();
+
+            let dir = tempdir().expect("failed to create tmpdir");
+
+            // Add an invalid shell
+            let shell = dir
+                .path()
+                .join("enoent")
+                .to_str()
+                .expect("failed to construct shell path")
+                .to_string();
+
+            shells.push(shell);
+        }
+
+        let logger = slog_scope::logger();
+
+        let (_, rx) = watch::channel(true);
+        let result = debug_console_handler(logger, 0, rx).await;
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "no shell found to launch debug console"
+        );
+    }
+}
--- a/src/agent/src/device.rs
+++ b/src/agent/src/device.rs
@@ -5,19 +5,24 @@

 use libc::{c_uint, major, minor};
 use nix::sys::stat;
+use regex::Regex;
 use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::MetadataExt;
 use std::path::Path;
-use std::sync::{mpsc, Arc, Mutex};
+use std::str::FromStr;
+use std::sync::Arc;
+use tokio::sync::Mutex;

 use crate::linux_abi::*;
-use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE};
+use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
+use crate::pci;
 use crate::sandbox::Sandbox;
-use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER};
+use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
 use anyhow::{anyhow, Result};
 use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
 use protocols::agent::Device;
+use tracing::instrument;

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -28,142 +33,184 @@ macro_rules! sl {

 const VM_ROOTFS: &str = "/";

+#[derive(Debug)]
 struct DevIndexEntry {
    idx: usize,
    residx: Vec<usize>,
 }

+#[derive(Debug)]
 struct DevIndex(HashMap<String, DevIndexEntry>);

-// DeviceHandler is the type of callback to be defined to handle every type of device driver.
-type DeviceHandler = fn(&Device, &mut Spec, &Arc<Mutex<Sandbox>>, &DevIndex) -> Result<()>;
-
-// DEVICEHANDLERLIST lists the supported drivers.
-#[rustfmt::skip]
-lazy_static! {
-    static ref DEVICEHANDLERLIST: HashMap<&'static str, DeviceHandler> = {
-        let mut m: HashMap<&'static str, DeviceHandler> = HashMap::new();
-        m.insert(DRIVERBLKTYPE, virtio_blk_device_handler);
-        m.insert(DRIVERMMIOBLKTYPE, virtiommio_blk_device_handler);
-        m.insert(DRIVERNVDIMMTYPE, virtio_nvdimm_device_handler);
-        m.insert(DRIVERSCSITYPE, virtio_scsi_device_handler);
-        m
-    };
-}
-
+#[instrument]
 pub fn rescan_pci_bus() -> Result<()> {
    online_device(SYSFS_PCI_BUS_RESCAN_FILE)
 }

+#[instrument]
 pub fn online_device(path: &str) -> Result<()> {
    fs::write(path, "1")?;
    Ok(())
 }

-// get_pci_device_address fetches the complete PCI address in sysfs, based on the PCI
-// identifier provided. This should be in the format: "bridgeAddr/deviceAddr".
-// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
-// while deviceAddr is the address at which the device is attached on the bridge.
-fn get_pci_device_address(pci_id: &str) -> Result<String> {
-    let tokens: Vec<&str> = pci_id.split('/').collect();
+// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
+// the sysfs path for the PCI host bridge, based on the PCI path
+// provided.
+#[instrument]
+fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
+    let mut bus = "0000:00".to_string();
+    let mut relpath = String::new();

-    if tokens.len() != 2 {
-        return Err(anyhow!(
-            "PCI Identifier for device should be of format [bridgeAddr/deviceAddr], got {}",
-            pci_id
-        ));
-    }
+    for i in 0..pcipath.len() {
+        let bdf = format!("{}:{}.0", bus, pcipath[i]);

-    let bridge_id = tokens[0];
-    let device_id = tokens[1];
+        relpath = format!("{}/{}", relpath, bdf);

-    // Deduce the complete bridge address based on the bridge address identifier passed
-    // and the fact that bridges are attached on the main bus with function 0.
-    let pci_bridge_addr = format!("0000:00:{}.0", bridge_id);
-
-    // Find out the bus exposed by bridge
-    let bridge_bus_path = format!("{}/{}/pci_bus/", SYSFS_PCI_BUS_PREFIX, pci_bridge_addr);
-
-    let files_slice: Vec<_> = fs::read_dir(&bridge_bus_path)
-        .unwrap()
-        .map(|res| res.unwrap().path())
-        .collect();
-    let bus_num = files_slice.len();
-
-    if bus_num != 1 {
-        return Err(anyhow!(
-            "Expected an entry for bus in {}, got {} entries instead",
-            bridge_bus_path,
-            bus_num
-        ));
-    }
-
-    let bus = files_slice[0].file_name().unwrap().to_str().unwrap();
-
-    // Device address is based on the bus of the bridge to which it is attached.
-    // We do not pass devices as multifunction, hence the trailing 0 in the address.
-    let pci_device_addr = format!("{}:{}.0", bus, device_id);
-
-    let bridge_device_pci_addr = format!("{}/{}", pci_bridge_addr, pci_device_addr);
-
-    info!(
-        sl!(),
-        "Fetched PCI address for device PCIAddr:{}\n", bridge_device_pci_addr
-    );
-
-    Ok(bridge_device_pci_addr)
-}
-
-fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
-    // Keep the same lock order as uevent::handle_block_add_event(), otherwise it may cause deadlock.
-    let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
-    let sb = sandbox.lock().unwrap();
-    for (key, value) in sb.pci_device_map.iter() {
-        if key.contains(dev_addr) {
-            info!(sl!(), "Device {} found in pci device map", dev_addr);
-            return Ok(format!("{}/{}", SYSTEM_DEV_PATH, value));
+        if i == pcipath.len() - 1 {
+            // Final device need not be a bridge
+            break;
        }
+
+        // Find out the bus exposed by bridge
+        let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath);
+        let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect();
+
+        if files.len() != 1 {
+            return Err(anyhow!(
+                "Expected exactly one PCI bus in {}, got {} instead",
+                bridgebuspath,
+                files.len()
+            ));
+        }
+
+        // unwrap is safe, because of the length test above
+        let busfile = files.pop().unwrap()?;
+        bus = busfile
+            .file_name()
+            .into_string()
+            .map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?;
    }
-    drop(sb);

-    // If device is not found in the device map, hotplug event has not
-    // been received yet, create and add channel to the watchers map.
-    // The key of the watchers map is the device we are interested in.
-    // Note this is done inside the lock, not to miss any events from the
-    // global udev listener.
-    let (tx, rx) = mpsc::channel::<String>();
-    w.insert(dev_addr.to_string(), tx);
-    drop(w);
-
-    info!(sl!(), "Waiting on channel for device notification\n");
-    let hotplug_timeout = AGENT_CONFIG.read().unwrap().hotplug_timeout;
-    let dev_name = rx.recv_timeout(hotplug_timeout).map_err(|_| {
-        GLOBAL_DEVICE_WATCHER.lock().unwrap().remove_entry(dev_addr);
-        anyhow!(
-            "Timeout reached after {:?} waiting for device {}",
-            hotplug_timeout,
-            dev_addr
-        )
-    })?;
-
-    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
+    Ok(relpath)
 }

-pub fn get_scsi_device_name(sandbox: &Arc<Mutex<Sandbox>>, scsi_addr: &str) -> Result<String> {
-    let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX);
+// FIXME: This matcher is only correct if the guest has at most one
+// SCSI host.
+#[derive(Debug)]
+struct ScsiBlockMatcher {
+    search: String,
+}
+
+impl ScsiBlockMatcher {
+    fn new(scsi_addr: &str) -> ScsiBlockMatcher {
+        let search = format!(r"/0:0:{}/block/", scsi_addr);
+
+        ScsiBlockMatcher { search }
+    }
+}
+
+impl UeventMatcher for ScsiBlockMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block" && uev.devpath.contains(&self.search) && !uev.devname.is_empty()
+    }
+}
+
+#[instrument]
+pub async fn get_scsi_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    scsi_addr: &str,
+) -> Result<String> {
+    let matcher = ScsiBlockMatcher::new(scsi_addr);

    scan_scsi_bus(scsi_addr)?;
-    get_device_name(sandbox, &dev_sub_path)
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
 }

-pub fn get_pci_device_name(sandbox: &Arc<Mutex<Sandbox>>, pci_id: &str) -> Result<String> {
-    let pci_addr = get_pci_device_address(pci_id)?;
+#[derive(Debug)]
+struct VirtioBlkPciMatcher {
+    rex: Regex,
+}
+
+impl VirtioBlkPciMatcher {
+    fn new(relpath: &str) -> VirtioBlkPciMatcher {
+        let root_bus = create_pci_root_bus_path();
+        let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath);
+        VirtioBlkPciMatcher {
+            rex: Regex::new(&re).unwrap(),
+        }
+    }
+}
+
+impl UeventMatcher for VirtioBlkPciMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty()
+    }
+}
+
+#[instrument]
+pub async fn get_virtio_blk_pci_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    pcipath: &pci::Path,
+) -> Result<String> {
+    let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
+    let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
+    let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);

    rescan_pci_bus()?;
-    get_device_name(sandbox, &pci_addr)
+
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
+}
+
+#[derive(Debug)]
+struct PmemBlockMatcher {
+    suffix: String,
+}
+
+impl PmemBlockMatcher {
+    fn new(devname: &str) -> PmemBlockMatcher {
+        let suffix = format!(r"/block/{}", devname);
+
+        PmemBlockMatcher { suffix }
+    }
+}
+
+impl UeventMatcher for PmemBlockMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block"
+            && uev.devpath.starts_with(ACPI_DEV_PATH)
+            && uev.devpath.ends_with(&self.suffix)
+            && !uev.devname.is_empty()
+    }
+}
+
+#[instrument]
+pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str) -> Result<()> {
+    let devname = match devpath.strip_prefix("/dev/") {
+        Some(dev) => dev,
+        None => {
+            return Err(anyhow!(
+                "Storage source '{}' must start with /dev/",
+                devpath
+            ))
+        }
+    };
+
+    let matcher = PmemBlockMatcher::new(devname);
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    if uev.devname != devname {
+        return Err(anyhow!(
+            "Unexpected device name {} for pmem device (expected {})",
+            uev.devname,
+            devname
+        ));
+    }
+    Ok(())
 }

 /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
+#[instrument]
 fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
    let tokens: Vec<&str> = scsi_addr.split(':').collect();
    if tokens.len() != 2 {
@@ -198,13 +245,14 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
 // the same device in the list of devices provided through the OCI spec.
 // This is needed to update information about minor/major numbers that cannot
 // be predicted from the caller.
+#[instrument]
 fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> {
    let major_id: c_uint;
    let minor_id: c_uint;

    // If no container_path is provided, we won't be able to match and
    // update the device in the OCI spec device list. This is an error.
-    if device.container_path == "" {
+    if device.container_path.is_empty() {
        return Err(anyhow!(
            "container_path cannot empty for device {:?}",
            device
@@ -274,58 +322,57 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

 // device.Id should be the predicted device name (vda, vdb, ...)
 // device.VmPath already provides a way to send it in
-fn virtiommio_blk_device_handler(
+#[instrument]
+async fn virtiommio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
    _sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
-    if device.vm_path == "" {
+    if device.vm_path.is_empty() {
        return Err(anyhow!("Invalid path for virtio mmio blk device"));
    }

    update_spec_device_list(device, spec, devidx)
 }

-// device.Id should be the PCI address in the format  "bridgeAddr/deviceAddr".
-// Here, bridgeAddr is the address at which the brige is attached on the root bus,
-// while deviceAddr is the address at which the device is attached on the bridge.
-fn virtio_blk_device_handler(
+// device.Id should be a PCI path string
+#[instrument]
+async fn virtio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
    let mut dev = device.clone();
+    let pcipath = pci::Path::from_str(&device.id)?;

-    // When "Id (PCIAddr)" is not set, we allow to use the predicted "VmPath" passed from kata-runtime
-    // Note this is a special code path for cloud-hypervisor when BDF information is not available
-    if device.id != "" {
-        dev.vm_path = get_pci_device_name(sandbox, &device.id)?;
-    }
+    dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;

    update_spec_device_list(&dev, spec, devidx)
 }

 // device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
-fn virtio_scsi_device_handler(
+#[instrument]
+async fn virtio_scsi_device_handler(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
    let mut dev = device.clone();
-    dev.vm_path = get_scsi_device_name(sandbox, &device.id)?;
+    dev.vm_path = get_scsi_device_name(sandbox, &device.id).await?;
    update_spec_device_list(&dev, spec, devidx)
 }

-fn virtio_nvdimm_device_handler(
+#[instrument]
+async fn virtio_nvdimm_device_handler(
    device: &Device,
    spec: &mut Spec,
    _sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
-    if device.vm_path == "" {
+    if device.vm_path.is_empty() {
        return Err(anyhow!("Invalid path for nvdimm device"));
    }

@@ -357,7 +404,8 @@ impl DevIndex {
    }
 }

-pub fn add_devices(
+#[instrument]
+pub async fn add_devices(
    devices: &[Device],
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -365,13 +413,14 @@ pub fn add_devices(
    let devidx = DevIndex::new(spec);

    for device in devices.iter() {
-        add_device(device, spec, sandbox, &devidx)?;
+        add_device(device, spec, sandbox, &devidx).await?;
    }

    Ok(())
 }

-fn add_device(
+#[instrument]
+async fn add_device(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -381,27 +430,31 @@ fn add_device(
    info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}",
          device.id, device.field_type, device.vm_path, device.container_path, device.options);

-    if device.field_type == "" {
+    if device.field_type.is_empty() {
        return Err(anyhow!("invalid type for device {:?}", device));
    }

-    if device.id == "" && device.vm_path == "" {
+    if device.id.is_empty() && device.vm_path.is_empty() {
        return Err(anyhow!("invalid ID and VM path for device {:?}", device));
    }

-    if device.container_path == "" {
+    if device.container_path.is_empty() {
        return Err(anyhow!("invalid container path for device {:?}", device));
    }

-    match DEVICEHANDLERLIST.get(device.field_type.as_str()) {
-        None => Err(anyhow!("Unknown device type {}", device.field_type)),
-        Some(dev_handler) => dev_handler(device, spec, sandbox, devidx),
+    match device.field_type.as_str() {
+        DRIVER_BLK_TYPE => virtio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
+        _ => Err(anyhow!("Unknown device type {}", device.field_type)),
    }
 }

 // update_device_cgroup update the device cgroup for container
 // to not allow access to the guest root partition. This prevents
 // the container from being able to access the VM rootfs.
+#[instrument]
 pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
    let meta = fs::metadata(VM_ROOTFS)?;
    let rdev = meta.dev();
@@ -432,13 +485,16 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::uevent::spawn_test_watcher;
    use oci::Linux;
+    use tempfile::tempdir;

    #[test]
    fn test_update_device_cgroup() {
-        let mut spec = Spec::default();
-
-        spec.linux = Some(Linux::default());
+        let mut spec = Spec {
+            linux: Some(Linux::default()),
+            ..Default::default()
+        };

        update_device_cgroup(&mut spec).unwrap();

@@ -712,4 +768,171 @@ mod tests {
        assert_eq!(Some(host_major), specresources.devices[1].major);
        assert_eq!(Some(host_minor), specresources.devices[1].minor);
    }
+
+    #[test]
+    fn test_pcipath_to_sysfs() {
+        let testdir = tempdir().expect("failed to create tmpdir");
+        let rootbuspath = testdir.path().to_str().unwrap();
+
+        let path2 = pci::Path::from_str("02").unwrap();
+        let path23 = pci::Path::from_str("02/03").unwrap();
+        let path234 = pci::Path::from_str("02/03/04").unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert!(relpath.is_err());
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files for the device at 0000:00:02.0
+        let bridge2path = format!("{}{}", rootbuspath, "/0000:00:02.0");
+
+        fs::create_dir_all(&bridge2path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert!(relpath.is_err());
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files to indicate that 0000:00:02.0 is a bridge to bus 01
+        let bridge2bus = "0000:01";
+        let bus2path = format!("{}/pci_bus/{}", bridge2path, bridge2bus);
+
+        fs::create_dir_all(bus2path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files for a bridge at 0000:01:03.0 to bus 02
+        let bridge3path = format!("{}/0000:01:03.0", bridge2path);
+        let bridge3bus = "0000:02";
+        let bus3path = format!("{}/pci_bus/{}", bridge3path, bridge3bus);
+
+        fs::create_dir_all(bus3path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0");
+    }
+
+    // We use device specific variants of this for real cases, but
+    // they have some complications that make them troublesome to unit
+    // test
+    async fn example_get_device_name(
+        sandbox: &Arc<Mutex<Sandbox>>,
+        relpath: &str,
+    ) -> Result<String> {
+        let matcher = VirtioBlkPciMatcher::new(relpath);
+
+        let uev = wait_for_uevent(sandbox, matcher).await?;
+
+        Ok(uev.devname)
+    }
+
+    #[tokio::test]
+    async fn test_get_device_name() {
+        let devname = "vda";
+        let root_bus = create_pci_root_bus_path();
+        let relpath = "/0000:00:0a.0/0000:03:0b.0";
+        let devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath, devname);
+
+        let mut uev = crate::uevent::Uevent::default();
+        uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev.subsystem = "block".to_string();
+        uev.devpath = devpath.clone();
+        uev.devname = devname.to_string();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.insert(devpath.clone(), uev);
+        drop(sb); // unlock
+
+        let name = example_get_device_name(&sandbox, relpath).await;
+        assert!(name.is_ok(), "{}", name.unwrap_err());
+        assert_eq!(name.unwrap(), devname);
+
+        let mut sb = sandbox.lock().await;
+        let uev = sb.uevent_map.remove(&devpath).unwrap();
+        drop(sb); // unlock
+
+        spawn_test_watcher(sandbox.clone(), uev);
+
+        let name = example_get_device_name(&sandbox, relpath).await;
+        assert!(name.is_ok(), "{}", name.unwrap_err());
+        assert_eq!(name.unwrap(), devname);
+    }
+
+    #[tokio::test]
+    async fn test_virtio_blk_matcher() {
+        let root_bus = create_pci_root_bus_path();
+        let devname = "vda";
+
+        let mut uev_a = crate::uevent::Uevent::default();
+        let relpath_a = "/0000:00:0a.0";
+        uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev_a.subsystem = "block".to_string();
+        uev_a.devname = devname.to_string();
+        uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname);
+        let matcher_a = VirtioBlkPciMatcher::new(&relpath_a);
+
+        let mut uev_b = uev_a.clone();
+        let relpath_b = "/0000:00:0a.0/0000:00:0b.0";
+        uev_b.devpath = format!("{}{}/virtio0/block/{}", root_bus, relpath_b, devname);
+        let matcher_b = VirtioBlkPciMatcher::new(&relpath_b);
+
+        assert!(matcher_a.is_match(&uev_a));
+        assert!(matcher_b.is_match(&uev_b));
+        assert!(!matcher_b.is_match(&uev_a));
+        assert!(!matcher_a.is_match(&uev_b));
+    }
+
+    #[tokio::test]
+    async fn test_scsi_block_matcher() {
+        let root_bus = create_pci_root_bus_path();
+        let devname = "sda";
+
+        let mut uev_a = crate::uevent::Uevent::default();
+        let addr_a = "0:0";
+        uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev_a.subsystem = "block".to_string();
+        uev_a.devname = devname.to_string();
+        uev_a.devpath = format!(
+            "{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda",
+            root_bus, addr_a
+        );
+        let matcher_a = ScsiBlockMatcher::new(&addr_a);
+
+        let mut uev_b = uev_a.clone();
+        let addr_b = "2:0";
+        uev_b.devpath = format!(
+            "{}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{}/block/sdb",
+            root_bus, addr_b
+        );
+        let matcher_b = ScsiBlockMatcher::new(&addr_b);
+
+        assert!(matcher_a.is_match(&uev_a));
+        assert!(matcher_b.is_match(&uev_b));
+        assert!(!matcher_b.is_match(&uev_a));
+        assert!(!matcher_a.is_match(&uev_b));
+    }
 }
--- a/src/agent/src/linux_abi.rs
+++ b/src/agent/src/linux_abi.rs
@@ -9,7 +9,6 @@
 use std::fs;

 pub const SYSFS_DIR: &str = "/sys";
-pub const SYSFS_PCI_BUS_PREFIX: &str = "/sys/bus/pci/devices";
 pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
 #[cfg(any(
    target_arch = "powerpc64",
@@ -25,10 +24,18 @@ pub fn create_pci_root_bus_path() -> String {
 pub fn create_pci_root_bus_path() -> String {
    let ret = String::from("/devices/platform/4010000000.pcie/pci0000:00");

+    let acpi_root_bus_path = String::from("/devices/pci0000:00");
+    let mut acpi_sysfs_dir = String::from(SYSFS_DIR);
    let mut sysfs_dir = String::from(SYSFS_DIR);
    let mut start_root_bus_path = String::from("/devices/platform/");
    let end_root_bus_path = String::from("/pci0000:00");

+    // check if there is pci bus path for acpi
+    acpi_sysfs_dir.push_str(&acpi_root_bus_path);
+    if let Ok(_) = fs::metadata(&acpi_sysfs_dir) {
+        return acpi_root_bus_path;
+    }
+
    sysfs_dir.push_str(&start_root_bus_path);
    let entries = match fs::read_dir(sysfs_dir) {
        Ok(e) => e,
@@ -58,17 +65,19 @@ pub fn create_pci_root_bus_path() -> String {
    ret
 }

+// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
+// The Linux kernel's core ACPI subsystem creates struct acpi_device
+// objects for ACPI namespace objects representing devices, power resources
+// processors, thermal zones. Those objects are exported to user space via
+// sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00
+pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM";
+
 pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu";

 pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block_size_bytes";
 pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe";
 pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory";

-// Here in "0:0", the first number is the SCSI host number because
-// only one SCSI controller has been plugged, while the second number
-// is always 0.
-pub const SCSI_HOST_CHANNEL: &str = "0:0:";
-pub const SCSI_BLOCK_SUFFIX: &str = "block";
 pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host";

 pub const SYSFS_CGROUPPATH: &str = "/sys/fs/cgroup";
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -3,91 +3,86 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#![allow(non_camel_case_types)]
-#![allow(unused_parens)]
-#![allow(unused_unsafe)]
-#![allow(dead_code)]
-#![allow(non_snake_case)]
 #[macro_use]
 extern crate lazy_static;
+extern crate capctl;
 extern crate oci;
-extern crate prctl;
 extern crate prometheus;
 extern crate protocols;
 extern crate regex;
-extern crate rustjail;
 extern crate scan_fmt;
 extern crate serde_json;
-extern crate signal_hook;

 #[macro_use]
 extern crate scopeguard;

 #[macro_use]
 extern crate slog;
-extern crate netlink;

-use crate::netlink::{RtnlHandle, NETLINK_ROUTE};
 use anyhow::{anyhow, Context, Result};
-use nix::fcntl::{self, OFlag};
-use nix::fcntl::{FcntlArg, FdFlag};
-use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
-use nix::pty;
-use nix::sys::select::{select, FdSet};
+use nix::fcntl::OFlag;
 use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
-use nix::sys::wait::{self, WaitStatus};
-use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
-use prctl::set_child_subreaper;
-use signal_hook::{iterator::Signals, SIGCHLD};
-use std::collections::HashMap;
+use nix::unistd::{self, dup, Pid};
 use std::env;
-use std::ffi::{CStr, CString, OsStr};
+use std::ffi::OsStr;
 use std::fs::{self, File};
-use std::io::{Read, Write};
 use std::os::unix::ffi::OsStrExt;
 use std::os::unix::fs as unixfs;
 use std::os::unix::io::AsRawFd;
 use std::path::Path;
-use std::sync::mpsc::{self, Sender};
-use std::sync::{Arc, Mutex, RwLock};
-use std::{io, thread, thread::JoinHandle};
-use unistd::Pid;
+use std::process::exit;
+use std::sync::Arc;
+use tracing::{instrument, span};

 mod config;
+mod console;
 mod device;
 mod linux_abi;
 mod metrics;
 mod mount;
 mod namespace;
+mod netlink;
 mod network;
+mod pci;
 pub mod random;
 mod sandbox;
+mod signal;
 #[cfg(test)]
 mod test_utils;
 mod uevent;
+mod util;
 mod version;

 use mount::{cgroups_mount, general_mount};
 use sandbox::Sandbox;
-use slog::Logger;
+use signal::setup_signal_handler;
+use slog::{error, info, o, warn, Logger};
 use uevent::watch_uevents;

+use futures::future::join_all;
+use rustjail::pipestream::PipeStream;
+use tokio::{
+    io::AsyncWrite,
+    sync::{
+        watch::{channel, Receiver},
+        Mutex, RwLock,
+    },
+    task::JoinHandle,
+};
+
 mod rpc;
+mod tracer;

 const NAME: &str = "kata-agent";
 const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
-const CONSOLE_PATH: &str = "/dev/console";
-
-const DEFAULT_BUF_SIZE: usize = 8 * 1024;

 lazy_static! {
-    static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Sender<String>>>> =
-        Arc::new(Mutex::new(HashMap::new()));
-    static ref AGENT_CONFIG: Arc<RwLock<agentConfig>> =
-        Arc::new(RwLock::new(config::agentConfig::new()));
+    static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
+        Arc::new(RwLock::new(config::AgentConfig::new()));
 }

-fn announce(logger: &Logger, config: &agentConfig) {
+#[instrument]
+fn announce(logger: &Logger, config: &AgentConfig) {
    info!(logger, "announce";
    "agent-commit" => version::VERSION_COMMIT,

@@ -100,7 +95,162 @@ fn announce(logger: &Logger, config: &agentConfig) {
    );
 }

-fn main() -> Result<()> {
+// Create a thread to handle reading from the logger pipe. The thread will
+// output to the vsock port specified, or stdout.
+async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool>) -> Result<()> {
+    let mut reader = PipeStream::from_fd(rfd);
+    let mut writer: Box<dyn AsyncWrite + Unpin + Send>;
+
+    if vsock_port > 0 {
+        let listenfd = socket::socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )?;
+
+        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, vsock_port);
+        socket::bind(listenfd, &addr).unwrap();
+        socket::listen(listenfd, 1).unwrap();
+
+        writer = Box::new(util::get_vsock_stream(listenfd).await.unwrap());
+    } else {
+        writer = Box::new(tokio::io::stdout());
+    }
+
+    let _ = util::interruptable_io_copier(&mut reader, &mut writer, shutdown).await;
+
+    Ok(())
+}
+
+async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
+    env::set_var("RUST_BACKTRACE", "full");
+
+    // List of tasks that need to be stopped for a clean shutdown
+    let mut tasks: Vec<JoinHandle<Result<()>>> = vec![];
+
+    console::initialize();
+
+    lazy_static::initialize(&AGENT_CONFIG);
+
+    // support vsock log
+    let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
+
+    let (shutdown_tx, shutdown_rx) = channel(true);
+
+    let agent_config = AGENT_CONFIG.clone();
+
+    let init_mode = unistd::getpid() == Pid::from_raw(1);
+    if init_mode {
+        // dup a new file descriptor for this temporary logger writer,
+        // since this logger would be dropped and it's writer would
+        // be closed out of this code block.
+        let newwfd = dup(wfd)?;
+        let writer = unsafe { File::from_raw_fd(newwfd) };
+
+        // Init a temporary logger used by init agent as init process
+        // since before do the base mount, it wouldn't access "/proc/cmdline"
+        // to get the customzied debug level.
+        let (logger, logger_async_guard) =
+            logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
+
+        // Must mount proc fs before parsing kernel command line
+        general_mount(&logger).map_err(|e| {
+            error!(logger, "fail general mount: {}", e);
+            e
+        })?;
+
+        let mut config = agent_config.write().await;
+        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
+
+        init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
+        drop(logger_async_guard);
+    } else {
+        // once parsed cmdline and set the config, release the write lock
+        // as soon as possible in case other thread would get read lock on
+        // it.
+        let mut config = agent_config.write().await;
+        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
+    }
+    let config = agent_config.read().await;
+
+    let log_vport = config.log_vport as u32;
+
+    let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
+
+    tasks.push(log_handle);
+
+    let writer = unsafe { File::from_raw_fd(wfd) };
+
+    // Recreate a logger with the log level get from "/proc/cmdline".
+    let (logger, logger_async_guard) =
+        logging::create_logger(NAME, "agent", config.log_level, writer);
+
+    announce(&logger, &config);
+
+    // This variable is required as it enables the global (and crucially static) logger,
+    // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
+    let global_logger = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
+
+    // Allow the global logger to be modified later (for shutdown)
+    global_logger.cancel_reset();
+
+    let mut ttrpc_log_guard: Result<(), log::SetLoggerError> = Ok(());
+
+    if config.log_level == slog::Level::Trace {
+        // Redirect ttrpc log calls to slog iff full debug requested
+        ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
+    }
+
+    if config.tracing != tracer::TraceType::Disabled {
+        let _ = tracer::setup_tracing(NAME, &logger, &config)?;
+    }
+
+    let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
+
+    // XXX: Start the root trace transaction.
+    //
+    // XXX: Note that *ALL* spans needs to start after this point!!
+    let _enter = root.enter();
+
+    // Start the sandbox and wait for its ttRPC server to end
+    start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
+
+    // Install a NOP logger for the remainder of the shutdown sequence
+    // to ensure any log calls made by local crates using the scope logger
+    // don't fail.
+    let global_logger_guard2 =
+        slog_scope::set_global_logger(slog::Logger::root(slog::Discard, o!()));
+    global_logger_guard2.cancel_reset();
+
+    drop(logger_async_guard);
+
+    drop(ttrpc_log_guard);
+
+    // Trigger a controlled shutdown
+    shutdown_tx
+        .send(true)
+        .map_err(|e| anyhow!(e).context("failed to request shutdown"))?;
+
+    // Wait for all threads to finish
+    let results = join_all(tasks).await;
+
+    for result in results {
+        if let Err(e) = result {
+            return Err(anyhow!(e).into());
+        }
+    }
+
+    if config.tracing != tracer::TraceType::Disabled {
+        tracer::end_tracing();
+    }
+
+    eprintln!("{} shutdown complete", NAME);
+
+    Ok(())
+}
+
+fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let args: Vec<String> = env::args().collect();

    if args.len() == 2 && args[1] == "--version" {
@@ -116,244 +266,68 @@ fn main() -> Result<()> {
    }

    if args.len() == 2 && args[1] == "init" {
+        reset_sigpipe();
        rustjail::container::init_child();
        exit(0);
    }

-    env::set_var("RUST_BACKTRACE", "full");
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()?;

-    lazy_static::initialize(&SHELLS);
-
-    lazy_static::initialize(&AGENT_CONFIG);
-
-    // support vsock log
-    let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-
-    let agentConfig = AGENT_CONFIG.clone();
-
-    let init_mode = unistd::getpid() == Pid::from_raw(1);
-    if init_mode {
-        // dup a new file descriptor for this temporary logger writer,
-        // since this logger would be dropped and it's writer would
-        // be closed out of this code block.
-        let newwfd = dup(wfd)?;
-        let writer = unsafe { File::from_raw_fd(newwfd) };
-
-        // Init a temporary logger used by init agent as init process
-        // since before do the base mount, it wouldn't access "/proc/cmdline"
-        // to get the customzied debug level.
-        let logger = logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
-
-        // Must mount proc fs before parsing kernel command line
-        general_mount(&logger).map_err(|e| {
-            error!(logger, "fail general mount: {}", e);
-            e
-        })?;
-
-        let mut config = agentConfig.write().unwrap();
-        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
-
-        init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
-    } else {
-        // once parsed cmdline and set the config, release the write lock
-        // as soon as possible in case other thread would get read lock on
-        // it.
-        let mut config = agentConfig.write().unwrap();
-        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
-    }
-    let config = agentConfig.read().unwrap();
-
-    let log_vport = config.log_vport as u32;
-    let log_handle = thread::spawn(move || -> Result<()> {
-        let mut reader = unsafe { File::from_raw_fd(rfd) };
-        if log_vport > 0 {
-            let listenfd = socket::socket(
-                AddressFamily::Vsock,
-                SockType::Stream,
-                SockFlag::SOCK_CLOEXEC,
-                None,
-            )?;
-            let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, log_vport);
-            socket::bind(listenfd, &addr)?;
-            socket::listen(listenfd, 1)?;
-            let datafd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
-            let mut log_writer = unsafe { File::from_raw_fd(datafd) };
-            let _ = io::copy(&mut reader, &mut log_writer)?;
-            let _ = unistd::close(listenfd);
-            let _ = unistd::close(datafd);
-        }
-        // copy log to stdout
-        let mut stdout_writer = io::stdout();
-        let _ = io::copy(&mut reader, &mut stdout_writer)?;
-        Ok(())
-    });
-
-    let writer = unsafe { File::from_raw_fd(wfd) };
-    // Recreate a logger with the log level get from "/proc/cmdline".
-    let logger = logging::create_logger(NAME, "agent", config.log_level, writer);
-
-    announce(&logger, &config);
-
-    // This "unused" variable is required as it enables the global (and crucially static) logger,
-    // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
-    let _guard = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
-
-    let mut _log_guard: Result<(), log::SetLoggerError> = Ok(());
-
-    if config.log_level == slog::Level::Trace {
-        // Redirect ttrpc log calls to slog iff full debug requested
-        _log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
-    }
-
-    start_sandbox(&logger, &config, init_mode)?;
-
-    let _ = log_handle.join();
-
-    Ok(())
+    rt.block_on(real_main())
 }

-fn start_sandbox(logger: &Logger, config: &agentConfig, init_mode: bool) -> Result<()> {
-    let shells = SHELLS.clone();
+#[instrument]
+async fn start_sandbox(
+    logger: &Logger,
+    config: &AgentConfig,
+    init_mode: bool,
+    tasks: &mut Vec<JoinHandle<Result<()>>>,
+    shutdown: Receiver<bool>,
+) -> Result<()> {
    let debug_console_vport = config.debug_console_vport as u32;

-    let mut shell_handle: Option<JoinHandle<()>> = None;
    if config.debug_console {
-        let thread_logger = logger.clone();
+        let debug_console_task = tokio::task::spawn(console::debug_console_handler(
+            logger.clone(),
+            debug_console_vport,
+            shutdown.clone(),
+        ));

-        let builder = thread::Builder::new();
-
-        let handle = builder.spawn(move || {
-            let shells = shells.lock().unwrap();
-            let result = setup_debug_console(&thread_logger, shells.to_vec(), debug_console_vport);
-            if result.is_err() {
-                // Report error, but don't fail
-                warn!(thread_logger, "failed to setup debug console";
-                    "error" => format!("{}", result.unwrap_err()));
-            }
-        })?;
-
-        shell_handle = Some(handle);
+        tasks.push(debug_console_task);
    }

    // Initialize unique sandbox structure.
-    let mut s = Sandbox::new(&logger).context("Failed to create sandbox")?;
-
+    let s = Sandbox::new(&logger).context("Failed to create sandbox")?;
    if init_mode {
-        let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
-        rtnl.handle_localhost()?;
-
-        s.rtnl = Some(rtnl);
+        s.rtnl.handle_localhost().await?;
    }

    let sandbox = Arc::new(Mutex::new(s));

-    setup_signal_handler(&logger, sandbox.clone()).unwrap();
-    watch_uevents(sandbox.clone());
+    let signal_handler_task = tokio::spawn(setup_signal_handler(
+        logger.clone(),
+        sandbox.clone(),
+        shutdown.clone(),
+    ));

-    let (tx, rx) = mpsc::channel::<i32>();
-    sandbox.lock().unwrap().sender = Some(tx);
+    tasks.push(signal_handler_task);
+
+    let uevents_handler_task = tokio::spawn(watch_uevents(sandbox.clone(), shutdown.clone()));
+
+    tasks.push(uevents_handler_task);
+
+    let (tx, rx) = tokio::sync::oneshot::channel();
+    sandbox.lock().await.sender = Some(tx);

    // vsock:///dev/vsock, port
-    let mut server = rpc::start(sandbox, config.server_addr.as_str());
+    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
+    server.start().await?;

-    let _ = server.start().unwrap();
+    rx.await?;
+    server.shutdown().await?;

-    let _ = rx.recv()?;
-
-    server.shutdown();
-
-    if let Some(handle) = shell_handle {
-        handle.join().map_err(|e| anyhow!("{:?}", e))?;
-    }
-
-    Ok(())
-}
-
-use nix::sys::wait::WaitPidFlag;
-
-fn setup_signal_handler(logger: &Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
-    let logger = logger.new(o!("subsystem" => "signals"));
-
-    set_child_subreaper(true)
-        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
-
-    let signals = Signals::new(&[SIGCHLD])?;
-
-    thread::spawn(move || {
-        'outer: for sig in signals.forever() {
-            info!(logger, "received signal"; "signal" => sig);
-
-            // sevral signals can be combined together
-            // as one. So loop around to reap all
-            // exited children
-            'inner: loop {
-                let wait_status = match wait::waitpid(
-                    Some(Pid::from_raw(-1)),
-                    Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
-                ) {
-                    Ok(s) => {
-                        if s == WaitStatus::StillAlive {
-                            continue 'outer;
-                        }
-                        s
-                    }
-                    Err(e) => {
-                        info!(
-                            logger,
-                            "waitpid reaper failed";
-                            "error" => e.as_errno().unwrap().desc()
-                        );
-                        continue 'outer;
-                    }
-                };
-                info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
-
-                let pid = wait_status.pid();
-                if let Some(pid) = pid {
-                    let raw_pid = pid.as_raw();
-                    let child_pid = format!("{}", raw_pid);
-
-                    let logger = logger.new(o!("child-pid" => child_pid));
-
-                    let mut sandbox = sandbox.lock().unwrap();
-                    let process = sandbox.find_process(raw_pid);
-                    if process.is_none() {
-                        info!(logger, "child exited unexpectedly");
-                        continue 'inner;
-                    }
-
-                    let mut p = process.unwrap();
-
-                    if p.exit_pipe_w.is_none() {
-                        error!(logger, "the process's exit_pipe_w isn't set");
-                        continue 'inner;
-                    }
-                    let pipe_write = p.exit_pipe_w.unwrap();
-                    let ret: i32;
-
-                    match wait_status {
-                        WaitStatus::Exited(_, c) => ret = c,
-                        WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
-                        _ => {
-                            info!(logger, "got wrong status for process";
-                                  "child-status" => format!("{:?}", wait_status));
-                            continue 'inner;
-                        }
-                    }
-
-                    p.exit_code = ret;
-                    let _ = unistd::close(pipe_write);
-
-                    if let Some(ref poller) = p.epoller {
-                        info!(logger, "close epoller");
-                        // close the socket file to notify readStdio to close terminal specifically
-                        // in case this process's terminal has been inherited by its children.
-                        poller.close_wfd()
-                    }
-                }
-            }
-        }
-    });
    Ok(())
 }

@@ -374,7 +348,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
    unistd::setsid()?;

    unsafe {
-        libc::ioctl(io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
+        libc::ioctl(std::io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
    }

    env::set_var("PATH", "/bin:/sbin/:/usr/bin/:/usr/sbin/");
@@ -391,6 +365,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
    Ok(())
 }

+#[instrument]
 fn sethostname(hostname: &OsStr) -> Result<()> {
    let size = hostname.len() as usize;

@@ -404,295 +379,16 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
    }
 }

-lazy_static! {
-    static ref SHELLS: Arc<Mutex<Vec<String>>> = {
-        let mut v = Vec::new();
-
-        if !cfg!(test) {
-            v.push("/bin/bash".to_string());
-            v.push("/bin/sh".to_string());
-        }
-
-        Arc::new(Mutex::new(v))
-    };
+// The Rust standard library had suppressed the default SIGPIPE behavior,
+// see https://github.com/rust-lang/rust/pull/13158.
+// Since the parent's signal handler would be inherited by it's child process,
+// thus we should re-enable the standard SIGPIPE behavior as a workaround to
+// fix the issue of https://github.com/kata-containers/kata-containers/issues/1887.
+fn reset_sigpipe() {
+    unsafe {
+        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
+    }
 }

-// pub static mut LOG_LEVEL: ;
-// pub static mut TRACE_MODE: ;
-
-use crate::config::agentConfig;
-use nix::sys::stat::Mode;
+use crate::config::AgentConfig;
 use std::os::unix::io::{FromRawFd, RawFd};
-use std::path::PathBuf;
-use std::process::exit;
-
-fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
-    let mut shell: &str = "";
-    for sh in shells.iter() {
-        let binary = PathBuf::from(sh);
-        if binary.exists() {
-            shell = sh;
-            break;
-        }
-    }
-
-    if shell == "" {
-        return Err(anyhow!("no shell found to launch debug console"));
-    }
-
-    if port > 0 {
-        let listenfd = socket::socket(
-            AddressFamily::Vsock,
-            SockType::Stream,
-            SockFlag::SOCK_CLOEXEC,
-            None,
-        )?;
-        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
-        socket::bind(listenfd, &addr)?;
-        socket::listen(listenfd, 1)?;
-        loop {
-            let f: RawFd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
-            match run_debug_console_shell(logger, shell, f) {
-                Ok(_) => {
-                    info!(logger, "run_debug_console_shell session finished");
-                }
-                Err(err) => {
-                    error!(logger, "run_debug_console_shell failed: {:?}", err);
-                }
-            }
-        }
-    } else {
-        let mut flags = OFlag::empty();
-        flags.insert(OFlag::O_RDWR);
-        flags.insert(OFlag::O_CLOEXEC);
-        loop {
-            let f: RawFd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
-            match run_debug_console_shell(logger, shell, f) {
-                Ok(_) => {
-                    info!(logger, "run_debug_console_shell session finished");
-                }
-                Err(err) => {
-                    error!(logger, "run_debug_console_shell failed: {:?}", err);
-                }
-            }
-        }
-    };
-}
-
-fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
-where
-    R: Read,
-    W: Write,
-{
-    let mut buf = [0; DEFAULT_BUF_SIZE];
-    let buf_len;
-
-    match reader.read(&mut buf) {
-        Ok(0) => return Ok(0),
-        Ok(len) => buf_len = len,
-        Err(err) => return Err(err),
-    };
-
-    // write and return
-    match writer.write_all(&buf[..buf_len]) {
-        Ok(_) => Ok(buf_len as u64),
-        Err(err) => Err(err),
-    }
-}
-
-fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Result<()> {
-    let pseduo = pty::openpty(None, None)?;
-    let _ = fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
-    let _ = fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
-
-    let slave_fd = pseduo.slave;
-
-    match fork() {
-        Ok(ForkResult::Child) => {
-            // create new session with child as session leader
-            setsid()?;
-
-            // dup stdin, stdout, stderr to let child act as a terminal
-            dup2(slave_fd, STDIN_FILENO)?;
-            dup2(slave_fd, STDOUT_FILENO)?;
-            dup2(slave_fd, STDERR_FILENO)?;
-
-            // set tty
-            unsafe {
-                libc::ioctl(0, libc::TIOCSCTTY);
-            }
-
-            let cmd = CString::new(shell).unwrap();
-            let args: Vec<&CStr> = vec![];
-
-            // run shell
-            let _ = unistd::execvp(cmd.as_c_str(), args.as_slice()).map_err(|e| match e {
-                nix::Error::Sys(errno) => {
-                    std::process::exit(errno as i32);
-                }
-                _ => std::process::exit(-2),
-            });
-        }
-
-        Ok(ForkResult::Parent { child: child_pid }) => {
-            info!(logger, "get debug shell pid {:?}", child_pid);
-
-            let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-            let master_fd = pseduo.master;
-            let debug_shell_logger = logger.clone();
-
-            // channel that used to sync between thread and main process
-            let (tx, rx) = mpsc::channel::<i32>();
-
-            // start a thread to do IO copy between socket and pseduo.master
-            thread::spawn(move || {
-                let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
-                let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
-                let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };
-                let mut socket_writer = unsafe { File::from_raw_fd(socket_fd) };
-
-                loop {
-                    let mut fd_set = FdSet::new();
-                    fd_set.insert(rfd);
-                    fd_set.insert(master_fd);
-                    fd_set.insert(socket_fd);
-
-                    match select(
-                        Some(fd_set.highest().unwrap() + 1),
-                        &mut fd_set,
-                        None,
-                        None,
-                        None,
-                    ) {
-                        Ok(_) => (),
-                        Err(e) => {
-                            if e == nix::Error::from(nix::errno::Errno::EINTR) {
-                                continue;
-                            } else {
-                                error!(debug_shell_logger, "select error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-
-                    if fd_set.contains(rfd) {
-                        info!(
-                            debug_shell_logger,
-                            "debug shell process {} exited", child_pid
-                        );
-                        tx.send(1).unwrap();
-                        break;
-                    }
-
-                    if fd_set.contains(master_fd) {
-                        match io_copy(&mut master_reader, &mut socket_writer) {
-                            Ok(0) => {
-                                debug!(debug_shell_logger, "master fd closed");
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                            Ok(_) => {}
-                            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
-                            Err(e) => {
-                                error!(debug_shell_logger, "read master fd error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-
-                    if fd_set.contains(socket_fd) {
-                        match io_copy(&mut socket_reader, &mut master_writer) {
-                            Ok(0) => {
-                                debug!(debug_shell_logger, "socket fd closed");
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                            Ok(_) => {}
-                            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
-                            Err(e) => {
-                                error!(debug_shell_logger, "read socket fd error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-                }
-            });
-
-            let wait_status = wait::waitpid(child_pid, None);
-            info!(logger, "debug console process exit code: {:?}", wait_status);
-
-            info!(logger, "notify debug monitor thread to exit");
-            // close pipe to exit select loop
-            let _ = close(wfd);
-
-            // wait for thread exit.
-            let _ = rx.recv().unwrap();
-            info!(logger, "debug monitor thread has exited");
-
-            // close files
-            let _ = close(rfd);
-            let _ = close(master_fd);
-            let _ = close(slave_fd);
-        }
-        Err(err) => {
-            return Err(anyhow!("fork error: {:?}", err));
-        }
-    }
-
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tempfile::tempdir;
-
-    #[test]
-    fn test_setup_debug_console_no_shells() {
-        // Guarantee no shells have been added
-        // (required to avoid racing with
-        // test_setup_debug_console_invalid_shell()).
-        let shells_ref = SHELLS.clone();
-        let mut shells = shells_ref.lock().unwrap();
-        shells.clear();
-        let logger = slog_scope::logger();
-
-        let result = setup_debug_console(&logger, shells.to_vec(), 0);
-
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "no shell found to launch debug console"
-        );
-    }
-
-    #[test]
-    fn test_setup_debug_console_invalid_shell() {
-        let shells_ref = SHELLS.clone();
-        let mut shells = shells_ref.lock().unwrap();
-
-        let dir = tempdir().expect("failed to create tmpdir");
-
-        // Add an invalid shell
-        let shell = dir
-            .path()
-            .join("enoent")
-            .to_str()
-            .expect("failed to construct shell path")
-            .to_string();
-
-        shells.push(shell);
-        let logger = slog_scope::logger();
-
-        let result = setup_debug_console(&logger, shells.to_vec(), 0);
-
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "no shell found to launch debug console"
-        );
-    }
-}
--- a/src/agent/src/metrics.rs
+++ b/src/agent/src/metrics.rs
@@ -8,6 +8,7 @@ extern crate procfs;
 use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder};

 use anyhow::Result;
+use tracing::instrument;

 const NAMESPACE_KATA_AGENT: &str = "kata_agent";
 const NAMESPACE_KATA_GUEST: &str = "kata_guest";
@@ -68,6 +69,7 @@ lazy_static! {
    prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo").as_ref() , "Statistics about memory usage in the system.", &["item"]).unwrap();
 }

+#[instrument]
 pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
    AGENT_SCRAPE_COUNT.inc();

@@ -87,6 +89,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
    Ok(String::from_utf8(buffer).unwrap())
 }

+#[instrument]
 fn update_agent_metrics() {
    let me = procfs::process::Process::myself();

@@ -136,6 +139,7 @@ fn update_agent_metrics() {
    }
 }

+#[instrument]
 fn update_guest_metrics() {
    // try get load and task info
    match procfs::LoadAverage::new() {
@@ -187,9 +191,9 @@ fn update_guest_metrics() {
            info!(sl!(), "failed to get guest KernelStats: {:?}", err);
        }
        Ok(kernel_stats) => {
-            set_gauge_vec_CPU_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
+            set_gauge_vec_cpu_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
            for (i, cpu_time) in kernel_stats.cpu_time.iter().enumerate() {
-                set_gauge_vec_CPU_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
+                set_gauge_vec_cpu_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
            }
        }
    }
@@ -218,6 +222,7 @@ fn update_guest_metrics() {
    }
 }

+#[instrument]
 fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
    gv.with_label_values(&["mem_total"])
        .set(meminfo.mem_total as f64);
@@ -332,7 +337,8 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
        .set(meminfo.k_reclaimable.unwrap_or(0) as f64);
 }

-fn set_gauge_vec_CPU_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
+#[instrument]
+fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
    gv.with_label_values(&[cpu, "user"])
        .set(cpu_time.user as f64);
    gv.with_label_values(&[cpu, "nice"])
@@ -355,6 +361,7 @@ fn set_gauge_vec_CPU_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procf
        .set(cpu_time.guest_nice.unwrap_or(0.0) as f64);
 }

+#[instrument]
 fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat) {
    gv.with_label_values(&[diskstat.name.as_str(), "reads"])
        .set(diskstat.reads as f64);
@@ -393,6 +400,7 @@ fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat
 }

 // set_gauge_vec_netdev set gauge for NetDevLine
+#[instrument]
 fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceStatus) {
    gv.with_label_values(&[status.name.as_str(), "recv_bytes"])
        .set(status.recv_bytes as f64);
@@ -429,6 +437,7 @@ fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceS
 }

 // set_gauge_vec_proc_status set gauge for ProcStatus
+#[instrument]
 fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process::Status) {
    gv.with_label_values(&["vmpeak"])
        .set(status.vmpeak.unwrap_or(0) as f64);
@@ -469,6 +478,7 @@ fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process
 }

 // set_gauge_vec_proc_io set gauge for ProcIO
+#[instrument]
 fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::Io) {
    gv.with_label_values(&["rchar"]).set(io_stat.rchar as f64);
    gv.with_label_values(&["wchar"]).set(io_stat.wchar as f64);
@@ -483,6 +493,7 @@ fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::I
 }

 // set_gauge_vec_proc_stat set gauge for ProcStat
+#[instrument]
 fn set_gauge_vec_proc_stat(gv: &prometheus::GaugeVec, stat: &procfs::process::Stat) {
    gv.with_label_values(&["utime"]).set(stat.utime as f64);
    gv.with_label_values(&["stime"]).set(stat.stime as f64);
--- a/src/agent/src/mount.rs
+++ b/src/agent/src/mount.rs
@@ -7,37 +7,48 @@ use std::collections::HashMap;
 use std::ffi::CString;
 use std::fs;
 use std::io;
-use std::iter::FromIterator;
-use std::os::unix::fs::PermissionsExt;
+use std::os::unix::fs::{MetadataExt, PermissionsExt};

 use std::path::Path;
 use std::ptr::null;
-use std::sync::{Arc, Mutex};
+use std::str::FromStr;
+use std::sync::Arc;
+use tokio::sync::Mutex;

 use libc::{c_void, mount};
 use nix::mount::{self, MsFlags};
+use nix::unistd::Gid;

 use regex::Regex;
 use std::fs::File;
 use std::io::{BufRead, BufReader};

-use crate::device::{get_pci_device_name, get_scsi_device_name, online_device};
+use crate::device::{
+    get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
+};
 use crate::linux_abi::*;
+use crate::pci;
 use crate::protocols::agent::Storage;
 use crate::Sandbox;
 use anyhow::{anyhow, Context, Result};
 use slog::Logger;
+use tracing::instrument;

-pub const DRIVER9PTYPE: &str = "9p";
-pub const DRIVERVIRTIOFSTYPE: &str = "virtio-fs";
-pub const DRIVERBLKTYPE: &str = "blk";
-pub const DRIVERMMIOBLKTYPE: &str = "mmioblk";
-pub const DRIVERSCSITYPE: &str = "scsi";
-pub const DRIVERNVDIMMTYPE: &str = "nvdimm";
-pub const DRIVEREPHEMERALTYPE: &str = "ephemeral";
-pub const DRIVERLOCALTYPE: &str = "local";
+pub const DRIVER_9P_TYPE: &str = "9p";
+pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
+pub const DRIVER_BLK_TYPE: &str = "blk";
+pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
+pub const DRIVER_SCSI_TYPE: &str = "scsi";
+pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
+pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
+pub const DRIVER_LOCAL_TYPE: &str = "local";

-pub const TYPEROOTFS: &str = "rootfs";
+pub const TYPE_ROOTFS: &str = "rootfs";
+
+pub const MOUNT_GUEST_TAG: &str = "kataShared";
+
+// Allocating an FSGroup that owns the pod's volumes
+const FS_GID: &str = "fsgid";

 #[rustfmt::skip]
 lazy_static! {
@@ -81,7 +92,7 @@ lazy_static! {
 }

 #[derive(Debug, PartialEq)]
-pub struct INIT_MOUNT {
+pub struct InitMount {
    fstype: &'static str,
    src: &'static str,
    dest: &'static str,
@@ -111,42 +122,26 @@ lazy_static!{

 #[rustfmt::skip]
 lazy_static! {
-    pub static ref INIT_ROOTFS_MOUNTS: Vec<INIT_MOUNT> = vec![
-        INIT_MOUNT{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
-        INIT_MOUNT{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
-        INIT_MOUNT{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
-        INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
-        INIT_MOUNT{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
-        INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
+    pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount> = vec![
+        InitMount{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
+        InitMount{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
+        InitMount{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
+        InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
+        InitMount{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
+        InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
    ];
 }

-// StorageHandler is the type of callback to be defined to handle every
-// type of storage driver.
-type StorageHandler = fn(&Logger, &Storage, Arc<Mutex<Sandbox>>) -> Result<String>;
-
-// STORAGEHANDLERLIST lists the supported drivers.
-#[rustfmt::skip]
-lazy_static! {
-    pub static ref STORAGEHANDLERLIST: HashMap<&'static str, StorageHandler> = {
-    	let mut m = HashMap::new();
-    let blk: StorageHandler = virtio_blk_storage_handler;
-        m.insert(DRIVERBLKTYPE, blk);
-	let p9: StorageHandler= virtio9p_storage_handler;
-        m.insert(DRIVER9PTYPE, p9);
-	let virtiofs: StorageHandler = virtiofs_storage_handler;
-        m.insert(DRIVERVIRTIOFSTYPE, virtiofs);
-    let ephemeral: StorageHandler = ephemeral_storage_handler;
-        m.insert(DRIVEREPHEMERALTYPE, ephemeral);
-    let virtiommio: StorageHandler = virtiommio_blk_storage_handler;
-        m.insert(DRIVERMMIOBLKTYPE, virtiommio);
-    let local: StorageHandler = local_storage_handler;
-        m.insert(DRIVERLOCALTYPE, local);
-    let scsi: StorageHandler = virtio_scsi_storage_handler;
-        m.insert(DRIVERSCSITYPE, scsi);
-        m
-    };
-}
+pub const STORAGE_HANDLER_LIST: [&str; 8] = [
+    DRIVER_BLK_TYPE,
+    DRIVER_9P_TYPE,
+    DRIVER_VIRTIOFS_TYPE,
+    DRIVER_EPHEMERAL_TYPE,
+    DRIVER_MMIO_BLK_TYPE,
+    DRIVER_LOCAL_TYPE,
+    DRIVER_SCSI_TYPE,
+    DRIVER_NVDIMM_TYPE,
+];

 #[derive(Debug, Clone)]
 pub struct BareMount<'a> {
@@ -162,6 +157,7 @@ pub struct BareMount<'a> {
 // * evaluate all symlinks
 // * ensure the source exists
 impl<'a> BareMount<'a> {
+    #[instrument]
    pub fn new(
        s: &'a str,
        d: &'a str,
@@ -180,6 +176,7 @@ impl<'a> BareMount<'a> {
        }
    }

+    #[instrument]
    pub fn mount(&self) -> Result<()> {
        let source;
        let dest;
@@ -238,12 +235,13 @@ impl<'a> BareMount<'a> {
    }
 }

-fn ephemeral_storage_handler(
+#[instrument]
+async fn ephemeral_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut sb = sandbox.lock().unwrap();
+    let mut sb = sandbox.lock().await;
    let new_storage = sb.set_sandbox_storage(&storage.mount_point);

    if !new_storage {
@@ -251,17 +249,46 @@ fn ephemeral_storage_handler(
    }

    fs::create_dir_all(Path::new(&storage.mount_point))?;
-    common_storage_handler(logger, storage)?;
+
+    // By now we only support one option field: "fsGroup" which
+    // isn't an valid mount option, thus we should remove it when
+    // do mount.
+    if storage.options.len() > 0 {
+        // ephemeral_storage didn't support mount options except fsGroup.
+        let mut new_storage = storage.clone();
+        new_storage.options = protobuf::RepeatedField::default();
+        common_storage_handler(logger, &new_storage)?;
+
+        let opts_vec: Vec<String> = storage.options.to_vec();
+
+        let opts = parse_options(opts_vec);
+
+        if let Some(fsgid) = opts.get(FS_GID) {
+            let gid = fsgid.parse::<u32>()?;
+
+            nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
+
+            let meta = fs::metadata(&storage.mount_point)?;
+            let mut permission = meta.permissions();
+
+            let o_mode = meta.mode() | 0o2000;
+            permission.set_mode(o_mode);
+            fs::set_permissions(&storage.mount_point, permission)?;
+        }
+    } else {
+        common_storage_handler(logger, &storage)?;
+    }

    Ok("".to_string())
 }

-fn local_storage_handler(
+#[instrument]
+async fn local_storage_handler(
    _logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut sb = sandbox.lock().unwrap();
+    let mut sb = sandbox.lock().await;
    let new_storage = sb.set_sandbox_storage(&storage.mount_point);

    if !new_storage {
@@ -276,11 +303,24 @@ fn local_storage_handler(
    let opts_vec: Vec<String> = storage.options.to_vec();

    let opts = parse_options(opts_vec);
-    let mode = opts.get("mode");
-    if let Some(mode) = mode {
+
+    let mut need_set_fsgid = false;
+    if let Some(fsgid) = opts.get(FS_GID) {
+        let gid = fsgid.parse::<u32>()?;
+
+        nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
+        need_set_fsgid = true;
+    }
+
+    if let Some(mode) = opts.get("mode") {
        let mut permission = fs::metadata(&storage.mount_point)?.permissions();

-        let o_mode = u32::from_str_radix(mode, 8)?;
+        let mut o_mode = u32::from_str_radix(mode, 8)?;
+
+        if need_set_fsgid {
+            // set SetGid mode mask.
+            o_mode |= 0o2000;
+        }
        permission.set_mode(o_mode);

        fs::set_permissions(&storage.mount_point, permission)?;
@@ -289,7 +329,8 @@ fn local_storage_handler(
    Ok("".to_string())
 }

-fn virtio9p_storage_handler(
+#[instrument]
+async fn virtio9p_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -298,7 +339,8 @@ fn virtio9p_storage_handler(
 }

 // virtiommio_blk_storage_handler handles the storage for mmio blk driver.
-fn virtiommio_blk_storage_handler(
+#[instrument]
+async fn virtiommio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -308,7 +350,8 @@ fn virtiommio_blk_storage_handler(
 }

 // virtiofs_storage_handler handles the storage for virtio-fs.
-fn virtiofs_storage_handler(
+#[instrument]
+async fn virtiofs_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -317,14 +360,15 @@ fn virtiofs_storage_handler(
 }

 // virtio_blk_storage_handler handles the storage for blk driver.
-fn virtio_blk_storage_handler(
+#[instrument]
+async fn virtio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
    let mut storage = storage.clone();
-    // If hot-plugged, get the device node path based on the PCI address else
-    // use the virt path provided in Storage Source
+    // If hot-plugged, get the device node path based on the PCI path
+    // otherwise use the virt path provided in Storage Source
    if storage.source.starts_with("/dev") {
        let metadata = fs::metadata(&storage.source)
            .context(format!("get metadata on file {:?}", &storage.source))?;
@@ -334,15 +378,17 @@ fn virtio_blk_storage_handler(
            return Err(anyhow!("Invalid device {}", &storage.source));
        }
    } else {
-        let dev_path = get_pci_device_name(&sandbox, &storage.source)?;
+        let pcipath = pci::Path::from_str(&storage.source)?;
+        let dev_path = get_virtio_blk_pci_device_name(&sandbox, &pcipath).await?;
        storage.source = dev_path;
    }

    common_storage_handler(logger, &storage)
 }

-// virtio_scsi_storage_handler handles the storage for scsi driver.
-fn virtio_scsi_storage_handler(
+// virtio_scsi_storage_handler handles the  storage for scsi driver.
+#[instrument]
+async fn virtio_scsi_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
@@ -350,12 +396,13 @@ fn virtio_scsi_storage_handler(
    let mut storage = storage.clone();

    // Retrieve the device path from SCSI address.
-    let dev_path = get_scsi_device_name(&sandbox, &storage.source)?;
+    let dev_path = get_scsi_device_name(&sandbox, &storage.source).await?;
    storage.source = dev_path;

    common_storage_handler(logger, &storage)
 }

+#[instrument]
 fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String> {
    // Mount the storage device.
    let mount_point = storage.mount_point.to_string();
@@ -363,12 +410,39 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
    mount_storage(logger, storage).and(Ok(mount_point))
 }

+// nvdimm_storage_handler handles the storage for NVDIMM driver.
+#[instrument]
+async fn nvdimm_storage_handler(
+    logger: &Logger,
+    storage: &Storage,
+    sandbox: Arc<Mutex<Sandbox>>,
+) -> Result<String> {
+    let storage = storage.clone();
+
+    // Retrieve the device path from NVDIMM address.
+    wait_for_pmem_device(&sandbox, &storage.source).await?;
+
+    common_storage_handler(logger, &storage)
+}
+
 // mount_storage performs the mount described by the storage structure.
+#[instrument]
 fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

+    // Check share before attempting to mount to see if the destination is already a mount point.
+    // If so, skip doing the mount. This facilitates mounting the sharedfs automatically
+    // in the guest before the agent service starts.
+    if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? {
+        warn!(
+            logger,
+            "{} already mounted on {}, ignoring...", MOUNT_GUEST_TAG, &storage.mount_point
+        );
+        return Ok(());
+    }
+
    match storage.fstype.as_str() {
-        DRIVER9PTYPE | DRIVERVIRTIOFSTYPE => {
+        DRIVER_9P_TYPE | DRIVER_VIRTIOFS_TYPE => {
            let dest_path = Path::new(storage.mount_point.as_str());
            if !dest_path.exists() {
                fs::create_dir_all(dest_path).context("Create mount destination failed")?;
@@ -380,7 +454,7 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    }

    let options_vec = storage.options.to_vec();
-    let options_vec = Vec::from_iter(options_vec.iter().map(String::as_str));
+    let options_vec = options_vec.iter().map(String::as_str).collect();
    let (flags, options) = parse_mount_flags_and_options(options_vec);

    info!(logger, "mounting storage";
@@ -402,6 +476,26 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    bare_mount.mount()
 }

+/// Looks for `mount_point` entry in the /proc/mounts.
+#[instrument]
+fn is_mounted(mount_point: &str) -> Result<bool> {
+    let mount_point = mount_point.trim_end_matches('/');
+    let found = fs::metadata(mount_point).is_ok()
+        // Looks through /proc/mounts and check if the mount exists
+        && fs::read_to_string("/proc/mounts")?
+            .lines()
+            .any(|line| {
+                // The 2nd column reveals the mount point.
+                line.split_whitespace()
+                    .nth(1)
+                    .map(|target| mount_point.eq(target))
+                    .unwrap_or(false)
+            });
+
+    Ok(found)
+}
+
+#[instrument]
 fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
    let mut flags = MsFlags::empty();
    let mut options: String = "".to_string();
@@ -430,7 +524,8 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
 // associated operations such as waiting for the device to show up, and mount
 // it to a specific location, according to the type of handler chosen, and for
 // each storage.
-pub fn add_storages(
+#[instrument]
+pub async fn add_storages(
    logger: Logger,
    storages: Vec<Storage>,
    sandbox: Arc<Mutex<Sandbox>>,
@@ -443,17 +538,33 @@ pub fn add_storages(
            "subsystem" => "storage",
            "storage-type" => handler_name.to_owned()));

-        let handler = STORAGEHANDLERLIST
-            .get(&handler_name.as_str())
-            .ok_or_else(|| {
-                anyhow!(
+        let res = match handler_name.as_str() {
+            DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_VIRTIOFS_TYPE => {
+                virtiofs_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_EPHEMERAL_TYPE => {
+                ephemeral_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_MMIO_BLK_TYPE => {
+                virtiommio_blk_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_LOCAL_TYPE => local_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_SCSI_TYPE => {
+                virtio_scsi_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await,
+            _ => {
+                return Err(anyhow!(
                    "Failed to find the storage handler {}",
                    storage.driver.to_owned()
-                )
-            })?;
+                ));
+            }
+        };

        // Todo need to rollback the mounted storage if err met.
-        let mount_point = handler(&logger, &storage, sandbox.clone())?;
+        let mount_point = res?;

        if !mount_point.is_empty() {
            mount_list.push(mount_point);
@@ -463,7 +574,8 @@ pub fn add_storages(
    Ok(mount_list)
 }

-fn mount_to_rootfs(logger: &Logger, m: &INIT_MOUNT) -> Result<()> {
+#[instrument]
+fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    let options_vec: Vec<&str> = m.options.clone();

    let (flags, options) = parse_mount_flags_and_options(options_vec);
@@ -488,6 +600,7 @@ fn mount_to_rootfs(logger: &Logger, m: &INIT_MOUNT) -> Result<()> {
    Ok(())
 }

+#[instrument]
 pub fn general_mount(logger: &Logger) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -505,8 +618,9 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {

 // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
 // any error ecountered.
+#[instrument]
 pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
-    if mount_point == "" {
+    if mount_point.is_empty() {
        return Err(anyhow!("Invalid mount point {}", mount_point));
    }

@@ -535,15 +649,16 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
    ))
 }

+#[instrument]
 pub fn get_cgroup_mounts(
    logger: &Logger,
    cg_path: &str,
    unified_cgroup_hierarchy: bool,
-) -> Result<Vec<INIT_MOUNT>> {
+) -> Result<Vec<InitMount>> {
    // cgroup v2
    // https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249
    if unified_cgroup_hierarchy {
-        return Ok(vec![INIT_MOUNT {
+        return Ok(vec![InitMount {
            fstype: "cgroup2",
            src: "cgroup2",
            dest: "/sys/fs/cgroup",
@@ -555,7 +670,7 @@ pub fn get_cgroup_mounts(
    let reader = BufReader::new(file);

    let mut has_device_cgroup = false;
-    let mut cg_mounts: Vec<INIT_MOUNT> = vec![INIT_MOUNT {
+    let mut cg_mounts: Vec<InitMount> = vec![InitMount {
        fstype: "tmpfs",
        src: "tmpfs",
        dest: SYSFS_CGROUPPATH,
@@ -591,7 +706,7 @@ pub fn get_cgroup_mounts(
            }
        }

-        if fields[0] == "" {
+        if fields[0].is_empty() {
            continue;
        }

@@ -601,7 +716,7 @@ pub fn get_cgroup_mounts(

        if let Some(value) = CGROUPS.get(&fields[0]) {
            let key = CGROUPS.keys().find(|&&f| f == fields[0]).unwrap();
-            cg_mounts.push(INIT_MOUNT {
+            cg_mounts.push(InitMount {
                fstype: "cgroup",
                src: "cgroup",
                dest: *value,
@@ -615,7 +730,7 @@ pub fn get_cgroup_mounts(
        return Ok(Vec::new());
    }

-    cg_mounts.push(INIT_MOUNT {
+    cg_mounts.push(InitMount {
        fstype: "tmpfs",
        src: "tmpfs",
        dest: SYSFS_CGROUPPATH,
@@ -625,6 +740,7 @@ pub fn get_cgroup_mounts(
    Ok(cg_mounts)
 }

+#[instrument]
 pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -640,6 +756,7 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
    Ok(())
 }

+#[instrument]
 pub fn remove_mounts(mounts: &[String]) -> Result<()> {
    for m in mounts.iter() {
        mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
@@ -649,6 +766,7 @@ pub fn remove_mounts(mounts: &[String]) -> Result<()> {

 // ensure_destination_exists will recursively create a given mountpoint. If directories
 // are created, their permissions are initialized to mountPerm(0755)
+#[instrument]
 fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
    let d = Path::new(destination);
    if !d.exists() {
@@ -669,6 +787,7 @@ fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
    Ok(())
 }

+#[instrument]
 fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
    let mut options = HashMap::new();
    for opt in option_list.iter() {
@@ -798,7 +917,7 @@ mod tests {
            let src_filename: String;
            let dest_filename: String;

-            if d.src != "" {
+            if !d.src.is_empty() {
                src = dir.path().join(d.src.to_string());
                src_filename = src
                    .to_str()
@@ -808,7 +927,7 @@ mod tests {
                src_filename = "".to_owned();
            }

-            if d.dest != "" {
+            if !d.dest.is_empty() {
                dest = dir.path().join(d.dest.to_string());
                dest_filename = dest
                    .to_str()
@@ -820,7 +939,7 @@ mod tests {

            // Create the mount directories
            for d in [src_filename.clone(), dest_filename.clone()].iter() {
-                if d == "" {
+                if d.is_empty() {
                    continue;
                }

@@ -840,8 +959,8 @@ mod tests {

            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
-                assert!(result.is_ok(), msg);
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);

                // Cleanup
                unsafe {
@@ -853,7 +972,7 @@ mod tests {

                    let msg = format!("{}: umount result: {:?}", msg, result);

-                    assert!(ret == 0, msg);
+                    assert!(ret == 0, "{}", msg);
                };

                continue;
@@ -861,10 +980,18 @@ mod tests {

            let err = result.unwrap_err();
            let error_msg = format!("{}", err);
-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

+    #[test]
+    fn test_is_mounted() {
+        assert!(is_mounted("/proc").unwrap());
+        assert!(!is_mounted("").unwrap());
+        assert!(!is_mounted("!").unwrap());
+        assert!(!is_mounted("/not_existing_path").unwrap());
+    }
+
    #[test]
    fn test_remove_mounts() {
        skip_if_not_root!();
@@ -958,14 +1085,14 @@ mod tests {

            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
-                assert!(result.is_ok(), msg);
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);
                continue;
            }

            let error_msg = format!("{:#}", result.unwrap_err());

-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1041,6 +1168,7 @@ mod tests {

            assert!(
                format!("{}", err).contains("No such file or directory"),
+                "{}",
                msg
            );
        }
@@ -1066,16 +1194,16 @@ mod tests {
            // add more details if an assertion fails
            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
+            if d.error_contains.is_empty() {
                let fs_type = result.unwrap();

-                assert!(d.fs_type == fs_type, msg);
+                assert!(d.fs_type == fs_type, "{}", msg);

                continue;
            }

            let error_msg = format!("{}", result.unwrap_err());
-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1113,21 +1241,21 @@ mod tests {
        let drain = slog::Discard;
        let logger = slog::Logger::root(drain, o!());

-        let first_mount = INIT_MOUNT {
+        let first_mount = InitMount {
            fstype: "tmpfs",
            src: "tmpfs",
            dest: SYSFS_CGROUPPATH,
            options: vec!["nosuid", "nodev", "noexec", "mode=755"],
        };

-        let last_mount = INIT_MOUNT {
+        let last_mount = InitMount {
            fstype: "tmpfs",
            src: "tmpfs",
            dest: SYSFS_CGROUPPATH,
            options: vec!["remount", "ro", "nosuid", "nodev", "noexec", "mode=755"],
        };

-        let cg_devices_mount = INIT_MOUNT {
+        let cg_devices_mount = InitMount {
            fstype: "cgroup",
            src: "cgroup",
            dest: "/sys/fs/cgroup/devices",
@@ -1223,35 +1351,35 @@ mod tests {
            let result = get_cgroup_mounts(&logger, filename, false);
            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains != "" {
-                assert!(result.is_err(), msg);
+            if !d.error_contains.is_empty() {
+                assert!(result.is_err(), "{}", msg);

                let error_msg = format!("{}", result.unwrap_err());
-                assert!(error_msg.contains(d.error_contains), msg);
+                assert!(error_msg.contains(d.error_contains), "{}", msg);
                continue;
            }

-            assert!(result.is_ok(), msg);
+            assert!(result.is_ok(), "{}", msg);

            let mounts = result.unwrap();
            let count = mounts.len();

            if !d.devices_cgroup {
-                assert!(count == 0, msg);
+                assert!(count == 0, "{}", msg);
                continue;
            }

            // get_cgroup_mounts() adds the device cgroup plus two other mounts.
-            assert!(count == (1 + 2), msg);
+            assert!(count == (1 + 2), "{}", msg);

            // First mount
-            assert!(mounts[0].eq(&first_mount), msg);
+            assert!(mounts[0].eq(&first_mount), "{}", msg);

            // Last mount
-            assert!(mounts[2].eq(&last_mount), msg);
+            assert!(mounts[2].eq(&last_mount), "{}", msg);

            // Devices cgroup
-            assert!(mounts[1].eq(&cg_devices_mount), msg);
+            assert!(mounts[1].eq(&cg_devices_mount), "{}", msg);
        }
    }
 }
--- a/src/agent/src/namespace.rs
+++ b/src/agent/src/namespace.rs
@@ -11,7 +11,7 @@ use std::fmt;
 use std::fs;
 use std::fs::File;
 use std::path::{Path, PathBuf};
-use std::thread::{self};
+use tracing::instrument;

 use crate::mount::{BareMount, FLAGS};
 use slog::Logger;
@@ -21,6 +21,7 @@ pub const NSTYPEIPC: &str = "ipc";
 pub const NSTYPEUTS: &str = "uts";
 pub const NSTYPEPID: &str = "pid";

+#[instrument]
 pub fn get_current_thread_ns_path(ns_type: &str) -> String {
    format!(
        "/proc/{}/task/{}/ns/{}",
@@ -41,34 +42,39 @@ pub struct Namespace {
 }

 impl Namespace {
+    #[instrument]
    pub fn new(logger: &Logger) -> Self {
        Namespace {
            logger: logger.clone(),
            path: String::from(""),
            persistent_ns_dir: String::from(PERSISTENT_NS_DIR),
-            ns_type: NamespaceType::IPC,
+            ns_type: NamespaceType::Ipc,
            hostname: None,
        }
    }

+    #[instrument]
    pub fn get_ipc(mut self) -> Self {
-        self.ns_type = NamespaceType::IPC;
+        self.ns_type = NamespaceType::Ipc;
        self
    }

+    #[instrument]
    pub fn get_uts(mut self, hostname: &str) -> Self {
-        self.ns_type = NamespaceType::UTS;
-        if hostname != "" {
+        self.ns_type = NamespaceType::Uts;
+        if !hostname.is_empty() {
            self.hostname = Some(String::from(hostname));
        }
        self
    }

+    #[instrument]
    pub fn get_pid(mut self) -> Self {
-        self.ns_type = NamespaceType::PID;
+        self.ns_type = NamespaceType::Pid;
        self
    }

+    #[allow(dead_code)]
    pub fn set_root_dir(mut self, dir: &str) -> Self {
        self.persistent_ns_dir = dir.to_string();
        self
@@ -76,12 +82,13 @@ impl Namespace {

    // setup creates persistent namespace without switching to it.
    // Note, pid namespaces cannot be persisted.
-    pub fn setup(mut self) -> Result<Self> {
+    #[instrument]
+    pub async fn setup(mut self) -> Result<Self> {
        fs::create_dir_all(&self.persistent_ns_dir)?;

        let ns_path = PathBuf::from(&self.persistent_ns_dir);
        let ns_type = self.ns_type;
-        if ns_type == NamespaceType::PID {
+        if ns_type == NamespaceType::Pid {
            return Err(anyhow!("Cannot persist namespace of PID type"));
        }
        let logger = self.logger.clone();
@@ -93,45 +100,51 @@ impl Namespace {
        self.path = new_ns_path.clone().into_os_string().into_string().unwrap();
        let hostname = self.hostname.clone();

-        let new_thread = thread::spawn(move || -> Result<()> {
-            let origin_ns_path = get_current_thread_ns_path(&ns_type.get());
+        let new_thread = tokio::spawn(async move {
+            if let Err(err) = || -> Result<()> {
+                let origin_ns_path = get_current_thread_ns_path(&ns_type.get());

-            File::open(Path::new(&origin_ns_path))?;
+                File::open(Path::new(&origin_ns_path))?;

-            // Create a new netns on the current thread.
-            let cf = ns_type.get_flags();
+                // Create a new netns on the current thread.
+                let cf = ns_type.get_flags();

-            unshare(cf)?;
+                unshare(cf)?;

-            if ns_type == NamespaceType::UTS && hostname.is_some() {
-                nix::unistd::sethostname(hostname.unwrap())?;
+                if ns_type == NamespaceType::Uts && hostname.is_some() {
+                    nix::unistd::sethostname(hostname.unwrap())?;
+                }
+                // Bind mount the new namespace from the current thread onto the mount point to persist it.
+                let source: &str = origin_ns_path.as_str();
+                let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
+
+                let mut flags = MsFlags::empty();
+
+                if let Some(x) = FLAGS.get("rbind") {
+                    let (_, f) = *x;
+                    flags |= f;
+                };
+
+                let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
+                bare_mount.mount().map_err(|e| {
+                    anyhow!(
+                        "Failed to mount {} to {} with err:{:?}",
+                        source,
+                        destination,
+                        e
+                    )
+                })?;
+
+                Ok(())
+            }() {
+                return Err(err);
            }
-            // Bind mount the new namespace from the current thread onto the mount point to persist it.
-            let source: &str = origin_ns_path.as_str();
-            let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
-
-            let mut flags = MsFlags::empty();
-
-            if let Some(x) = FLAGS.get("rbind") {
-                let (_, f) = *x;
-                flags |= f;
-            };
-
-            let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
-            bare_mount.mount().map_err(|e| {
-                anyhow!(
-                    "Failed to mount {} to {} with err:{:?}",
-                    source,
-                    destination,
-                    e
-                )
-            })?;

            Ok(())
        });

        new_thread
-            .join()
+            .await
            .map_err(|e| anyhow!("Failed to join thread {:?}!", e))??;

        Ok(self)
@@ -141,27 +154,27 @@ impl Namespace {
 /// Represents the Namespace type.
 #[derive(Clone, Copy, PartialEq)]
 enum NamespaceType {
-    IPC,
-    UTS,
-    PID,
+    Ipc,
+    Uts,
+    Pid,
 }

 impl NamespaceType {
    /// Get the string representation of the namespace type.
    pub fn get(&self) -> &str {
        match *self {
-            Self::IPC => "ipc",
-            Self::UTS => "uts",
-            Self::PID => "pid",
+            Self::Ipc => "ipc",
+            Self::Uts => "uts",
+            Self::Pid => "pid",
        }
    }

    /// Get the associate flags with the namespace type.
    pub fn get_flags(&self) -> CloneFlags {
        match *self {
-            Self::IPC => CloneFlags::CLONE_NEWIPC,
-            Self::UTS => CloneFlags::CLONE_NEWUTS,
-            Self::PID => CloneFlags::CLONE_NEWPID,
+            Self::Ipc => CloneFlags::CLONE_NEWIPC,
+            Self::Uts => CloneFlags::CLONE_NEWUTS,
+            Self::Pid => CloneFlags::CLONE_NEWPID,
        }
    }
 }
@@ -172,12 +185,6 @@ impl fmt::Debug for NamespaceType {
    }
 }

-impl Default for NamespaceType {
-    fn default() -> Self {
-        NamespaceType::IPC
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::{Namespace, NamespaceType};
@@ -185,8 +192,8 @@ mod tests {
    use nix::sched::CloneFlags;
    use tempfile::Builder;

-    #[test]
-    fn test_setup_persistent_ns() {
+    #[tokio::test]
+    async fn test_setup_persistent_ns() {
        skip_if_not_root!();
        // Create dummy logger and temp folder.
        let logger = slog::Logger::root(slog::Discard, o!());
@@ -195,7 +202,8 @@ mod tests {
        let ns_ipc = Namespace::new(&logger)
            .get_ipc()
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_ipc.is_ok());
        assert!(remove_mounts(&[ns_ipc.unwrap().path]).is_ok());
@@ -206,7 +214,8 @@ mod tests {
        let ns_uts = Namespace::new(&logger)
            .get_uts("test_hostname")
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_uts.is_ok());
        assert!(remove_mounts(&[ns_uts.unwrap().path]).is_ok());
@@ -218,22 +227,23 @@ mod tests {
        let ns_pid = Namespace::new(&logger)
            .get_pid()
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_pid.is_err());
    }

    #[test]
    fn test_namespace_type() {
-        let ipc = NamespaceType::IPC;
+        let ipc = NamespaceType::Ipc;
        assert_eq!("ipc", ipc.get());
        assert_eq!(CloneFlags::CLONE_NEWIPC, ipc.get_flags());

-        let uts = NamespaceType::UTS;
+        let uts = NamespaceType::Uts;
        assert_eq!("uts", uts.get());
        assert_eq!(CloneFlags::CLONE_NEWUTS, uts.get_flags());

-        let pid = NamespaceType::PID;
+        let pid = NamespaceType::Pid;
        assert_eq!("pid", pid.get());
        assert_eq!(CloneFlags::CLONE_NEWPID, pid.get_flags());
    }
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
--- a/src/agent/src/network.rs
+++ b/src/agent/src/network.rs
@@ -139,10 +139,10 @@ mod tests {
        assert_eq!(true, content.is_ok());
        let content = content.unwrap();

-        let expected_DNS: Vec<&str> = content.split('\n').collect();
+        let expected_dns: Vec<&str> = content.split('\n').collect();

        // assert the data are the same as /run/kata-containers/sandbox/resolv.conf
-        assert_eq!(dns, expected_DNS);
+        assert_eq!(dns, expected_dns);

        // umount /etc/resolv.conf
        let _ = mount::umount(dst_filename);
--- a/src/agent/src/pci.rs
+++ b/src/agent/src/pci.rs
@@ -0,0 +1,168 @@
+// Copyright Red Hat.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+use std::convert::TryInto;
+use std::fmt;
+use std::ops::Deref;
+use std::str::FromStr;
+
+use anyhow::anyhow;
+
+// The PCI spec reserves 5 bits for slot number (a.k.a. device
+// number), giving slots 0..31
+const SLOT_BITS: u8 = 5;
+const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
+
+// Represents a PCI function's slot number (a.k.a. device number),
+// giving its location on a single bus
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Slot(u8);
+
+impl Slot {
+    pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
+        if let Ok(v8) = v.try_into() {
+            if v8 <= SLOT_MAX {
+                return Ok(Slot(v8));
+            }
+        }
+        Err(anyhow!(
+            "PCI slot {} should be in range [0..{:#x}]",
+            v,
+            SLOT_MAX
+        ))
+    }
+}
+
+impl FromStr for Slot {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let v = isize::from_str_radix(s, 16)?;
+        Slot::new(v)
+    }
+}
+
+impl fmt::Display for Slot {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "{:02x}", self.0)
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Path(Vec<Slot>);
+
+impl Path {
+    pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
+        if slots.is_empty() {
+            return Err(anyhow!("PCI path must have at least one element"));
+        }
+        Ok(Path(slots))
+    }
+}
+
+// Let Path be treated as a slice of Slots
+impl Deref for Path {
+    type Target = [Slot];
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl fmt::Display for Path {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        let sslots: Vec<String> = self
+            .0
+            .iter()
+            .map(std::string::ToString::to_string)
+            .collect();
+        write!(f, "{}", sslots.join("/"))
+    }
+}
+
+impl FromStr for Path {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
+        Path::new(rslots?)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::pci::{Path, Slot};
+    use std::str::FromStr;
+
+    #[test]
+    fn test_slot() {
+        // Valid slots
+        let slot = Slot::new(0x00).unwrap();
+        assert_eq!(format!("{}", slot), "00");
+
+        let slot = Slot::from_str("00").unwrap();
+        assert_eq!(format!("{}", slot), "00");
+
+        let slot = Slot::new(31).unwrap();
+        let slot2 = Slot::from_str("1f").unwrap();
+        assert_eq!(slot, slot2);
+
+        // Bad slots
+        let slot = Slot::new(-1);
+        assert!(slot.is_err());
+
+        let slot = Slot::new(32);
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("20");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("xy");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("00/");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("");
+        assert!(slot.is_err());
+    }
+
+    #[test]
+    fn test_path() {
+        let slot3 = Slot::new(0x03).unwrap();
+        let slot4 = Slot::new(0x04).unwrap();
+        let slot5 = Slot::new(0x05).unwrap();
+
+        // Valid paths
+        let pcipath = Path::new(vec![slot3]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03");
+        let pcipath2 = Path::from_str("03").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 1);
+        assert_eq!(pcipath[0], slot3);
+
+        let pcipath = Path::new(vec![slot3, slot4]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03/04");
+        let pcipath2 = Path::from_str("03/04").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 2);
+        assert_eq!(pcipath[0], slot3);
+        assert_eq!(pcipath[1], slot4);
+
+        let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03/04/05");
+        let pcipath2 = Path::from_str("03/04/05").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 3);
+        assert_eq!(pcipath[0], slot3);
+        assert_eq!(pcipath[1], slot4);
+        assert_eq!(pcipath[2], slot5);
+
+        // Bad paths
+        assert!(Path::new(vec!()).is_err());
+        assert!(Path::from_str("20").is_err());
+        assert!(Path::from_str("//").is_err());
+        assert!(Path::from_str("xyz").is_err());
+    }
+}
--- a/src/agent/src/random.rs
+++ b/src/agent/src/random.rs
@@ -9,6 +9,7 @@ use nix::fcntl::{self, OFlag};
 use nix::sys::stat::Mode;
 use std::fs;
 use std::os::unix::io::{AsRawFd, FromRawFd};
+use tracing::instrument;

 pub const RNGDEV: &str = "/dev/random";
 pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
@@ -20,6 +21,7 @@ type IoctlRequestType = libc::c_int;
 #[cfg(target_env = "gnu")]
 type IoctlRequestType = libc::c_ulong;

+#[instrument]
 pub fn reseed_rng(data: &[u8]) -> Result<()> {
    let len = data.len() as libc::c_long;
    fs::write(RNGDEV, data)?;
@@ -37,10 +39,10 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
            &len as *const libc::c_long,
        )
    };
-    let _ = Errno::result(ret).map(drop)?;
+    Errno::result(ret).map(drop)?;

    let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
-    let _ = Errno::result(ret).map(drop)?;
+    Errno::result(ret).map(drop)?;

    Ok(())
 }
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -4,12 +4,13 @@
 //

 use crate::linux_abi::*;
-use crate::mount::{get_mount_fs_type, remove_mounts, TYPEROOTFS};
+use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
 use crate::namespace::Namespace;
+use crate::netlink::Handle;
 use crate::network::Network;
+use crate::uevent::{Uevent, UeventMatcher};
 use anyhow::{anyhow, Context, Result};
 use libc::pid_t;
-use netlink::{RtnlHandle, NETLINK_ROUTE};
 use oci::{Hook, Hooks};
 use protocols::agent::OnlineCPUMemRequest;
 use regex::Regex;
@@ -22,9 +23,14 @@ use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
-use std::sync::mpsc::{self, Receiver, Sender};
-use std::sync::{Arc, Mutex};
+use std::sync::Arc;
 use std::{thread, time};
+use tokio::sync::mpsc::{channel, Receiver, Sender};
+use tokio::sync::oneshot;
+use tokio::sync::Mutex;
+use tracing::instrument;
+
+type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);

 #[derive(Debug)]
 pub struct Sandbox {
@@ -35,25 +41,27 @@ pub struct Sandbox {
    pub network: Network,
    pub mounts: Vec<String>,
    pub container_mounts: HashMap<String, Vec<String>>,
-    pub pci_device_map: HashMap<String, String>,
+    pub uevent_map: HashMap<String, Uevent>,
+    pub uevent_watchers: Vec<Option<UeventWatcher>>,
    pub shared_utsns: Namespace,
    pub shared_ipcns: Namespace,
    pub sandbox_pidns: Option<Namespace>,
    pub storages: HashMap<String, u32>,
    pub running: bool,
    pub no_pivot_root: bool,
-    pub sender: Option<Sender<i32>>,
-    pub rtnl: Option<RtnlHandle>,
+    pub sender: Option<tokio::sync::oneshot::Sender<i32>>,
+    pub rtnl: Handle,
    pub hooks: Option<Hooks>,
    pub event_rx: Arc<Mutex<Receiver<String>>>,
-    pub event_tx: Sender<String>,
+    pub event_tx: Option<Sender<String>>,
 }

 impl Sandbox {
+    #[instrument]
    pub fn new(logger: &Logger) -> Result<Self> {
        let fs_type = get_mount_fs_type("/")?;
        let logger = logger.new(o!("subsystem" => "sandbox"));
-        let (tx, rx) = mpsc::channel::<String>();
+        let (tx, rx) = channel::<String>(100);
        let event_rx = Arc::new(Mutex::new(rx));

        Ok(Sandbox {
@@ -64,18 +72,19 @@ impl Sandbox {
            containers: HashMap::new(),
            mounts: Vec::new(),
            container_mounts: HashMap::new(),
-            pci_device_map: HashMap::new(),
+            uevent_map: HashMap::new(),
+            uevent_watchers: Vec::new(),
            shared_utsns: Namespace::new(&logger),
            shared_ipcns: Namespace::new(&logger),
            sandbox_pidns: None,
            storages: HashMap::new(),
            running: false,
-            no_pivot_root: fs_type.eq(TYPEROOTFS),
+            no_pivot_root: fs_type.eq(TYPE_ROOTFS),
            sender: None,
-            rtnl: Some(RtnlHandle::new(NETLINK_ROUTE, 0).unwrap()),
+            rtnl: Handle::new()?,
            hooks: None,
            event_rx,
-            event_tx: tx,
+            event_tx: Some(tx),
        })
    }

@@ -87,6 +96,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn set_sandbox_storage(&mut self, path: &str) -> bool {
        match self.storages.get_mut(path) {
            None => {
@@ -109,6 +119,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn unset_sandbox_storage(&mut self, path: &str) -> Result<bool> {
        match self.storages.get_mut(path) {
            None => Err(anyhow!("Sandbox storage with path {} not found", path)),
@@ -128,6 +139,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> {
        let mounts = vec![path.to_string()];
        remove_mounts(&mounts)?;
@@ -141,6 +153,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
        if self.unset_sandbox_storage(path)? {
            return self.remove_sandbox_storage(path);
@@ -149,34 +162,31 @@ impl Sandbox {
        Ok(())
    }

-    pub fn is_running(&self) -> bool {
-        self.running
-    }
-
-    pub fn set_hostname(&mut self, hostname: String) {
-        self.hostname = hostname;
-    }
-
-    pub fn setup_shared_namespaces(&mut self) -> Result<bool> {
+    #[instrument]
+    pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
        // Set up shared IPC namespace
        self.shared_ipcns = Namespace::new(&self.logger)
            .get_ipc()
            .setup()
+            .await
            .context("Failed to setup persistent IPC namespace")?;

        // // Set up shared UTS namespace
        self.shared_utsns = Namespace::new(&self.logger)
            .get_uts(self.hostname.as_str())
            .setup()
+            .await
            .context("Failed to setup persistent UTS namespace")?;

        Ok(true)
    }

+    #[instrument]
    pub fn add_container(&mut self, c: LinuxContainer) {
        self.containers.insert(c.id.clone(), c);
    }

+    #[instrument]
    pub fn update_shared_pidns(&mut self, c: &LinuxContainer) -> Result<()> {
        // Populate the shared pid path only if this is an infra container and
        // sandbox_pidns has not been passed in the create_sandbox request.
@@ -200,10 +210,12 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn get_container(&mut self, id: &str) -> Option<&mut LinuxContainer> {
        self.containers.get_mut(id)
    }

+    #[instrument]
    pub fn find_process(&mut self, pid: pid_t) -> Option<&mut Process> {
        for (_, c) in self.containers.iter_mut() {
            if c.processes.get(&pid).is_some() {
@@ -214,13 +226,15 @@ impl Sandbox {
        None
    }

-    pub fn destroy(&mut self) -> Result<()> {
+    #[instrument]
+    pub async fn destroy(&mut self) -> Result<()> {
        for ctr in self.containers.values_mut() {
-            ctr.destroy()?;
+            ctr.destroy().await?;
        }
        Ok(())
    }

+    #[instrument]
    pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> {
        if req.nb_cpus > 0 {
            // online cpus
@@ -264,6 +278,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn add_hooks(&mut self, dir: &str) -> Result<()> {
        let mut hooks = Hooks::default();
        if let Ok(hook) = self.find_hooks(dir, "prestart") {
@@ -279,6 +294,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    fn find_hooks(&self, hook_path: &str, hook_type: &str) -> Result<Vec<Hook>> {
        let mut hooks = Vec::new();
        for entry in fs::read_dir(Path::new(hook_path).join(hook_type))? {
@@ -315,21 +331,40 @@ impl Sandbox {
        Ok(hooks)
    }

-    pub fn run_oom_event_monitor(&self, rx: Receiver<String>, container_id: String) {
-        let tx = self.event_tx.clone();
+    #[instrument]
+    pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
        let logger = self.logger.clone();

-        thread::spawn(move || {
-            for event in rx {
+        if self.event_tx.is_none() {
+            error!(
+                logger,
+                "sandbox.event_tx not found in run_oom_event_monitor"
+            );
+            return;
+        }
+
+        let tx = self.event_tx.as_ref().unwrap().clone();
+
+        tokio::spawn(async move {
+            loop {
+                let event = rx.recv().await;
+                // None means the container has exited,
+                // and sender in OOM notifier is dropped.
+                if event.is_none() {
+                    return;
+                }
                info!(logger, "got an OOM event {:?}", event);
+
                let _ = tx
                    .send(container_id.clone())
+                    .await
                    .map_err(|e| error!(logger, "failed to send message: {:?}", e));
            }
        });
    }
 }

+#[instrument]
 fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Result<i32> {
    let mut count = 0;
    let re = Regex::new(pattern)?;
@@ -375,6 +410,7 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res
 const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50;
 const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100;

+#[instrument]
 fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
    let mut onlined_count: i32 = 0;

@@ -383,7 +419,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
            logger,
            SYSFS_CPU_ONLINE_PATH,
            r"cpu[0-9]+",
-            (num - onlined_count),
+            num - onlined_count,
        );
        if r.is_err() {
            return r;
@@ -404,6 +440,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
    ))
 }

+#[instrument]
 fn online_memory(logger: &Logger) -> Result<()> {
    online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?;
    Ok(())
@@ -428,8 +465,8 @@ mod tests {
        baremount.mount()
    }

-    #[test]
-    fn set_sandbox_storage() {
+    #[tokio::test]
+    async fn set_sandbox_storage() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -462,8 +499,8 @@ mod tests {
        );
    }

-    #[test]
-    fn remove_sandbox_storage() {
+    #[tokio::test]
+    async fn remove_sandbox_storage() {
        skip_if_not_root!();

        let logger = slog::Logger::root(slog::Discard, o!());
@@ -518,9 +555,9 @@ mod tests {
        assert!(s.remove_sandbox_storage(destdir_path).is_ok());
    }

-    #[test]
+    #[tokio::test]
    #[allow(unused_assignments)]
-    fn unset_and_remove_sandbox_storage() {
+    async fn unset_and_remove_sandbox_storage() {
        skip_if_not_root!();

        let logger = slog::Logger::root(slog::Discard, o!());
@@ -570,8 +607,8 @@ mod tests {
        assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err());
    }

-    #[test]
-    fn unset_sandbox_storage() {
+    #[tokio::test]
+    async fn unset_sandbox_storage() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -624,13 +661,16 @@ mod tests {
    }

    fn create_dummy_opts() -> CreateOpts {
-        let mut root = Root::default();
-        root.path = String::from("/");
+        let root = Root {
+            path: String::from("/"),
+            ..Default::default()
+        };

-        let linux = Linux::default();
-        let mut spec = Spec::default();
-        spec.root = Some(root);
-        spec.linux = Some(linux);
+        let spec = Spec {
+            linux: Some(Linux::default()),
+            root: Some(root),
+            ..Default::default()
+        };

        CreateOpts {
            cgroup_name: "".to_string(),
@@ -653,8 +693,8 @@ mod tests {
        .unwrap()
    }

-    #[test]
-    fn get_container_entry_exist() {
+    #[tokio::test]
+    async fn get_container_entry_exist() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -666,8 +706,8 @@ mod tests {
        assert!(cnt.is_some());
    }

-    #[test]
-    fn get_container_no_entry() {
+    #[tokio::test]
+    async fn get_container_no_entry() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -675,8 +715,8 @@ mod tests {
        assert!(cnt.is_none());
    }

-    #[test]
-    fn add_and_get_container() {
+    #[tokio::test]
+    async fn add_and_get_container() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -685,8 +725,9 @@ mod tests {
        s.add_container(linux_container);
        assert!(s.get_container("some_id").is_some());
    }
-    #[test]
-    fn update_shared_pidns() {
+
+    #[tokio::test]
+    async fn update_shared_pidns() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -702,8 +743,9 @@ mod tests {
        let ns_path = format!("/proc/{}/ns/pid", test_pid);
        assert_eq!(s.sandbox_pidns.unwrap().path, ns_path);
    }
-    #[test]
-    fn add_guest_hooks() {
+
+    #[tokio::test]
+    async fn add_guest_hooks() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
        let tmpdir = Builder::new().tempdir().unwrap();
@@ -725,30 +767,11 @@ mod tests {
        assert!(s.hooks.as_ref().unwrap().poststop.is_empty());
    }

-    #[test]
-    pub fn test_sandbox_is_running() {
+    #[tokio::test]
+    async fn test_sandbox_set_destroy() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
-        s.running = true;
-        assert!(s.is_running());
-        s.running = false;
-        assert!(!s.is_running());
-    }
-
-    #[test]
-    fn test_sandbox_set_hostname() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let mut s = Sandbox::new(&logger).unwrap();
-        let hostname = "abc123";
-        s.set_hostname(hostname.to_string());
-        assert_eq!(s.hostname, hostname);
-    }
-
-    #[test]
-    fn test_sandbox_set_destroy() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let mut s = Sandbox::new(&logger).unwrap();
-        let ret = s.destroy();
+        let ret = s.destroy().await;
        assert!(ret.is_ok());
    }
 }
--- a/src/agent/src/signal.rs
+++ b/src/agent/src/signal.rs
@@ -0,0 +1,155 @@
+// Copyright (c) 2019-2020 Ant Financial
+// Copyright (c) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::sandbox::Sandbox;
+use anyhow::{anyhow, Result};
+use capctl::prctl::set_subreaper;
+use nix::sys::wait::WaitPidFlag;
+use nix::sys::wait::{self, WaitStatus};
+use nix::unistd;
+use slog::{error, info, o, Logger};
+use std::sync::Arc;
+use tokio::select;
+use tokio::signal::unix::{signal, SignalKind};
+use tokio::sync::watch::Receiver;
+use tokio::sync::Mutex;
+use unistd::Pid;
+
+async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
+    info!(logger, "handling signal"; "signal" => "SIGCHLD");
+
+    loop {
+        // Avoid reaping the undesirable child's signal, e.g., execute_hook's
+        // The lock should be released immediately.
+        rustjail::container::WAIT_PID_LOCKER.lock().await;
+        let result = wait::waitpid(
+            Some(Pid::from_raw(-1)),
+            Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
+        );
+
+        let wait_status = match result {
+            Ok(s) => {
+                if s == WaitStatus::StillAlive {
+                    return Ok(());
+                }
+                s
+            }
+            Err(e) => return Err(anyhow!(e).context("waitpid reaper failed")),
+        };
+
+        info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
+
+        if let Some(pid) = wait_status.pid() {
+            let raw_pid = pid.as_raw();
+            let child_pid = format!("{}", raw_pid);
+
+            let logger = logger.new(o!("child-pid" => child_pid));
+
+            let sandbox_ref = sandbox.clone();
+            let mut sandbox = sandbox_ref.lock().await;
+
+            let process = sandbox.find_process(raw_pid);
+            if process.is_none() {
+                info!(logger, "child exited unexpectedly");
+                continue;
+            }
+
+            let mut p = process.unwrap();
+            let ret: i32;
+
+            match wait_status {
+                WaitStatus::Exited(_, c) => ret = c,
+                WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
+                _ => {
+                    info!(logger, "got wrong status for process";
+                                  "child-status" => format!("{:?}", wait_status));
+                    continue;
+                }
+            }
+
+            p.exit_code = ret;
+            let _ = p.exit_tx.take();
+
+            info!(logger, "notify term to close");
+            // close the socket file to notify readStdio to close terminal specifically
+            // in case this process's terminal has been inherited by its children.
+            p.notify_term_close();
+        }
+    }
+}
+
+pub async fn setup_signal_handler(
+    logger: Logger,
+    sandbox: Arc<Mutex<Sandbox>>,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "signals"));
+
+    set_subreaper(true)
+        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
+
+    let mut sigchild_stream = signal(SignalKind::child())?;
+
+    loop {
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "got shutdown request");
+                break;
+            }
+
+            _ = sigchild_stream.recv() => {
+                let result = handle_sigchild(logger.clone(), sandbox.clone()).await;
+
+                match result {
+                    Ok(()) => (),
+                    Err(e) => {
+                        // Log errors, but don't abort - just wait for more signals!
+                        error!(logger, "failed to handle signal"; "error" => format!("{:?}", e));
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::pin;
+    use tokio::sync::watch::channel;
+    use tokio::time::Duration;
+
+    #[tokio::test]
+    async fn test_setup_signal_handler() {
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let s = Sandbox::new(&logger).unwrap();
+
+        let sandbox = Arc::new(Mutex::new(s));
+
+        let (tx, rx) = channel(true);
+
+        let handle = tokio::spawn(setup_signal_handler(logger, sandbox, rx));
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        tx.send(true).expect("failed to request shutdown");
+
+        loop {
+            select! {
+                _ = handle => {
+                    println!("INFO: task completed");
+                    break;
+                },
+                _ = &mut timeout => {
+                    panic!("signal thread failed to stop");
+                }
+            }
+        }
+    }
+}
--- a/Show More
+++ b/Show More