Merge pull request #2007 from fidencio/2.1.1-branch-bump

# Kata Containers 2.1.1
release: Kata Containers 2.1.1
2026-03-05 12:22:10 +00:00 · 2021-06-11 17:09:11 +02:00 · 2021-06-11 09:48:55 +02:00 · 2021-06-10 10:19:52 +02:00 · 2021-06-08 10:37:42 +02:00 · 2021-06-08 10:02:21 +02:00
922 changed files with 73444 additions and 38556 deletions
--- a/.github/workflows/kata-deploy-test.yaml
+++ b/.github/workflows/kata-deploy-test.yaml
@@ -1,7 +1,12 @@
-on: issue_comment
+on:
+  issue_comment:
+    types: [created, edited]
+
 name: test-kata-deploy
+
 jobs:
  check_comments:
+    if: ${{ github.event.issue.pull_request }}
    runs-on: ubuntu-latest
    steps:
      - name: Check for Command
@@ -9,7 +14,7 @@ jobs:
        uses: kata-containers/slash-command-action@v1
        with:
          repo-token: ${{ secrets.GITHUB_TOKEN }}
-          command: "test-kata-deploy"
+          command: "test_kata_deploy"
          reaction: "true"
          reaction-type: "eyes"
          allow-edits: "false"
@@ -17,6 +22,7 @@ jobs:
      - name: verify command arg is kata-deploy
        run: |
           echo "The command was '${{ steps.command.outputs.command-name }}' with arguments '${{ steps.command.outputs.command-arguments }}'"
+
  create-and-test-container:
    needs: check_comments
    runs-on: ubuntu-latest
@@ -27,22 +33,26 @@ jobs:
            ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed  's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
            echo "reference for PR: " ${ref}
            echo "##[set-output name=pr-ref;]${ref}"
-      - uses: actions/checkout@v2-beta
+
+      - name: check out
+        uses: actions/checkout@v2
        with:
-          ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
+           ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
+
      - name: build-container-image
        id: build-container-image
        run: |
            PR_SHA=$(git log --format=format:%H -n1)
-            VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/VERSION)
+            VERSION="2.0.0"
            ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
-            wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
-            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
+            wget "${ARTIFACT_URL}" -O tools/packaging/kata-deploy/kata-static.tar.xz
+            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./tools/packaging/kata-deploy
            docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
            docker push katadocker/kata-deploy-ci:$PR_SHA
            echo "##[set-output name=pr-sha;]${PR_SHA}"
+
      - name: test-kata-deploy-ci-in-aks
-        uses: ./kata-deploy/action
+        uses: ./tools/packaging/kata-deploy/action
        with:
          packaging-sha: ${{ steps.build-container-image.outputs.pr-sha }}
        env:
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -103,59 +103,6 @@ jobs:
          name: kata-artifacts
          path: kata-static-qemu.tar.gz

-  build-nemu:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_nemu"
-    steps:
-      - uses: actions/checkout@v1
-      - name: get-artifact-list
-        uses: actions/download-artifact@master
-        with:
-          name: artifact-list
-      - name: build-nemu
-        run: |
-         if grep -q $buildstr ./artifact-list/artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@master
-        with:
-          name: kata-artifacts
-          path: kata-static-nemu.tar.gz
-
-  # Job for building the QEMU binaries with virtiofs support
-  build-qemu-virtiofsd:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_qemu_virtiofsd"
-    steps:
-      - uses: actions/checkout@v1
-      - name: get-artifact-list
-        uses: actions/download-artifact@master
-        with:
-          name: artifact-list
-      - name: build-qemu-virtiofsd
-        run: |
-         if grep -q $buildstr ./artifact-list/artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@master
-        with:
-          name: kata-artifacts
-          path: kata-static-qemu-virtiofsd.tar.gz
-
  # Job for building the image
  build-image:
    runs-on: ubuntu-16.04
@@ -266,7 +213,7 @@ jobs:

  gather-artifacts:
    runs-on: ubuntu-16.04
-    needs: [build-experimental-kernel, build-kernel, build-qemu, build-qemu-virtiofsd, build-image, build-firecracker, build-kata-components, build-nemu, build-clh]
+    needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
    steps:
      - uses: actions/checkout@v1
      - name: get-artifacts
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -104,32 +104,6 @@ jobs:
          name: kata-artifacts
          path: kata-static-qemu.tar.gz

-  build-qemu-virtiofsd:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_qemu_virtiofsd"
-    steps:
-      - uses: actions/checkout@v2
-      - name: get-artifact-list
-        uses: actions/download-artifact@v2
-        with:
-          name: artifact-list
-      - name: build-qemu-virtiofsd
-        run: |
-         if grep -q $buildstr artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-local-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@v2
-        with:
-          name: kata-artifacts
-          path: kata-static-qemu-virtiofsd.tar.gz
-
  build-image:
    runs-on: ubuntu-16.04
    needs: get-artifact-list
@@ -237,7 +211,7 @@ jobs:

  gather-artifacts:
    runs-on: ubuntu-16.04
-    needs: [build-experimental-kernel, build-kernel, build-qemu, build-qemu-virtiofsd, build-image, build-firecracker, build-kata-components, build-clh]
+    needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
    steps:
      - uses: actions/checkout@v2
      - name: get-artifacts
--- a/.github/workflows/require-pr-porting-labels.yaml
+++ b/.github/workflows/require-pr-porting-labels.yaml
@@ -6,6 +6,9 @@
 name: Ensure PR has required porting labels

 on:
+  pull_request:
+    branches:
+      - main
  pull_request_target:
    types:
      - opened
--- a/.github/workflows/snap-release.yaml
+++ b/.github/workflows/snap-release.yaml
@@ -21,8 +21,8 @@ jobs:
          kata_url="https://github.com/kata-containers/kata-containers"
          latest_version=$(git ls-remote --tags ${kata_url}  | egrep -o "refs.*" | egrep -v "\-alpha|\-rc|{}" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r | head -1)
          current_version="$(echo ${GITHUB_REF} | cut -d/ -f3)"
-          # Check if the current tag is the latest tag
-          if echo -e "$latest_version\n$current_version" | sort -C -V; then
+          # Check semantic versioning format (x.y.z) and if the current tag is the latest tag
+          if echo "${current_version}" | grep -q "^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+$" && echo -e "$latest_version\n$current_version" | sort -C -V; then
            # Current version is the latest version, build it
            snapcraft -d snap --destructive-mode
          fi
@@ -33,5 +33,5 @@ jobs:
          snap_file="kata-containers_${snap_version}_amd64.snap"
          # Upload the snap if it exists
          if [ -f ${snap_file} ]; then
-            snapcraft upload --release=candidate ${snap_file}
+            snapcraft upload --release=stable ${snap_file}
          fi
--- a/.github/workflows/snap.yaml
+++ b/.github/workflows/snap.yaml
@@ -1,15 +1,5 @@
 name: snap CI
-on:
-  pull_request:
-    paths:
-      - "**/Makefile"
-      - "**/*.go"
-      - "**/*.mk"
-      - "**/*.rs"
-      - "**/*.sh"
-      - "**/*.toml"
-      - "**/*.yaml"
-      - "**/*.yml"
+on: ["pull_request"]
 jobs:
  test:
    runs-on: ubuntu-20.04
--- a/.github/workflows/static-checks.yaml
+++ b/.github/workflows/static-checks.yaml
@@ -5,16 +5,14 @@ jobs:
    strategy:
      matrix:
        go-version: [1.13.x, 1.14.x, 1.15.x]
-        os: [ubuntu-18.04]
+        os: [ubuntu-20.04]
    runs-on: ${{ matrix.os }}
    env:
-      GO111MODULE: off
      TRAVIS: "true"
      TRAVIS_BRANCH: ${{ github.base_ref }}
      TRAVIS_PULL_REQUEST_BRANCH: ${{ github.head_ref }}
      TRAVIS_PULL_REQUEST_SHA : ${{ github.event.pull_request.head.sha }}
      RUST_BACKTRACE: "1"
-      RUST_AGENT: "yes"
      target_branch: ${TRAVIS_BRANCH}
    steps:
    - name: Install Go
@@ -25,9 +23,6 @@ jobs:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Setup GOPATH
      run: |
-        gopath_org=$(go env GOPATH)/src/github.com/kata-containers/
-        mkdir -p ${gopath_org}
-        ln -s ${PWD} ${gopath_org}
        echo "TRAVIS_BRANCH: ${TRAVIS_BRANCH}"
        echo "TRAVIS_PULL_REQUEST_BRANCH: ${TRAVIS_PULL_REQUEST_BRANCH}"
        echo "TRAVIS_PULL_REQUEST_SHA: ${TRAVIS_PULL_REQUEST_SHA}"
@@ -43,26 +38,29 @@ jobs:
        path: ./src/github.com/${{ github.repository }}
    - name: Setup travis references
      run: |
-        echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}" 
+        echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}"
        target_branch=${TRAVIS_BRANCH}
    - name: Setup
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/setup.sh
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
      env:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Building rust
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/install_rust.sh
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh
        PATH=$PATH:"$HOME/.cargo/bin"
-    - name: Make clippy
+        rustup target add x86_64-unknown-linux-musl
+        rustup component add rustfmt clippy
+    # Must build before static checks as we depend on some generated code in runtime and agent
+    - name: Build
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && rustup target add x86_64-unknown-linux-musl && rustup component add rustfmt && rustup component add clippy && make clippy
-    - name: Static checks
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make
+    - name: Static Checks
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/static-checks.sh
-    - name: Build agent
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/static-checks.sh
+    - name: Run Compiler Checks
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && make
-    - name: Run agent unit tests
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make check
+    - name: Run Unit Tests
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && make check
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make test
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 **/*.bk
+**/*~
 **/*.orig
 **/*.rej
 **/target
--- a/README.md
+++ b/README.md
@@ -126,9 +126,9 @@ The following repositories are used by both the current and first generation Kat

 | Component | Description | Current | First generation | Notes |
 |-|-|-|-|-|
-| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/2.0-dev) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
+| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/main) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
 | kernel | The Linux kernel used by the hypervisor to boot the guest image. | [Kata 2.x][kernel] | [Kata 1.x][kernel] | Patches are stored in the packaging component. |
-| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/2.0-dev) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
+| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/main) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
 | www.katacontainers.io | Contains the source for the [main web site](https://www.katacontainers.io). | [Kata 2.x][github-katacontainers.io] | [Kata 1.x][github-katacontainers.io] | | |

 ### Packaging and releases
--- a/2
+++ b/2
@@ -1 +1 @@
-2.1-alpha0
+2.1.1
--- a/ci/install_musl.sh
+++ b/ci/install_musl.sh
@@ -12,10 +12,11 @@ install_aarch64_musl() {
 		local musl_tar="${arch}-linux-musl-native.tgz"
 		local musl_dir="${arch}-linux-musl-native"
 		pushd /tmp
-		curl -sLO https://musl.cc/${musl_tar}
-		tar -zxf ${musl_tar}
-		mkdir -p /usr/local/musl/
-		cp -r ${musl_dir}/* /usr/local/musl/
+		if curl -sLO --fail https://musl.cc/${musl_tar}; then
+			tar -zxf ${musl_tar}
+			mkdir -p /usr/local/musl/
+			cp -r ${musl_dir}/* /usr/local/musl/
+		fi
 		popd
 	fi
 }
--- a/ci/install_yq.sh
+++ b/ci/install_yq.sh
@@ -18,7 +18,9 @@ function install_yq() {
 	GOPATH=${GOPATH:-${HOME}/go}
 	local yq_path="${GOPATH}/bin/yq"
 	local yq_pkg="github.com/mikefarah/yq"
-	[ -x  "${GOPATH}/bin/yq" ] && return
+	local yq_version=3.4.1
+
+	[ -x  "${GOPATH}/bin/yq" ] && [ "`${GOPATH}/bin/yq --version`"X == "yq version ${yq_version}"X ] && return

 	read -r -a sysInfo <<< "$(uname -sm)"

@@ -56,8 +58,6 @@ function install_yq() {
 		die "Please install curl"
 	fi

-	local yq_version=3.1.0
-
 	## NOTE: ${var,,} => gives lowercase value of var
 	local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
 	curl -o "${yq_path}" -LSsf "${yq_url}"
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -5,18 +5,27 @@

 export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
 export tests_repo_dir="$GOPATH/src/$tests_repo"
-export branch="${branch:-2.0-dev}"
+export branch="${branch:-main}"

+# Clones the tests repository and checkout to the branch pointed out by
+# the global $branch variable.
+# If the clone exists and `CI` is exported then it does nothing. Otherwise
+# it will clone the repository or `git pull` the latest code.
+#
 clone_tests_repo()
 {
-	if [ -d "$tests_repo_dir" -a -n "$CI" ]
-	then
-		return
+	if [ -d "$tests_repo_dir" ]; then
+		[ -n "$CI" ] && return
+		pushd "${tests_repo_dir}"
+		git checkout "${branch}"
+		git pull
+		popd
+	else
+		git clone -q "https://${tests_repo}" "$tests_repo_dir"
+		pushd "${tests_repo_dir}"
+		git checkout "${branch}"
+		popd
 	fi
-
-	go get -d -u "$tests_repo" || true
-
-	pushd "${tests_repo_dir}" && git checkout "${branch}" && popd
 }

 run_static_checks()
--- a/ci/openshift-ci/images/Dockerfile.buildroot
+++ b/ci/openshift-ci/images/Dockerfile.buildroot
@@ -0,0 +1,9 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# This is the build root image for Kata Containers on OpenShift CI.
+#
+FROM centos:8
+
+RUN yum -y update && yum -y install git sudo wget
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -1,55 +1,54 @@
-* [Warning](#warning)
-* [Assumptions](#assumptions)
-* [Initial setup](#initial-setup)
-* [Requirements to build individual components](#requirements-to-build-individual-components)
-* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
-* [Check hardware requirements](#check-hardware-requirements)
-    * [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
-    * [Enable full debug](#enable-full-debug)
-        * [debug logs and shimv2](#debug-logs-and-shimv2)
-            * [Enabling full `containerd` debug](#enabling-full-containerd-debug)
-            * [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
-            * [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
-        * [journald rate limiting](#journald-rate-limiting)
-            * [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
-            * [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
-* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
-    * [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
-    * [Get the osbuilder](#get-the-osbuilder)
-    * [Create a rootfs image](#create-a-rootfs-image)
-        * [Create a local rootfs](#create-a-local-rootfs)
-        * [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
-        * [Build a rootfs image](#build-a-rootfs-image)
-        * [Install the rootfs image](#install-the-rootfs-image)
-    * [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
-        * [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
-        * [Build an initrd image](#build-an-initrd-image)
-        * [Install the initrd image](#install-the-initrd-image)
-* [Install guest kernel images](#install-guest-kernel-images)
-* [Install a hypervisor](#install-a-hypervisor)
-    * [Build a custom QEMU](#build-a-custom-qemu)
-        * [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
-* [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
-* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
-* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
-* [Appendices](#appendices)
-    * [Checking Docker default runtime](#checking-docker-default-runtime)
-    * [Set up a debug console](#set-up-a-debug-console)
-      * [Simple debug console setup](#simple-debug-console-setup)
-          * [Enable agent debug console](#enable-agent-debug-console)
-          * [Start `kata-monitor`](#start-kata-monitor)
-          * [Connect to debug console](#connect-to-debug-console)
-      * [Traditional debug console setup](#traditional-debug-console-setup)
-          * [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
-          * [Build the debug image](#build-the-debug-image)
-          * [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
-          * [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
-              * [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
-              * [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
-          * [Create a container](#create-a-container)
-          * [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
-    * [Obtain details of the image](#obtain-details-of-the-image)
-    * [Capturing kernel boot logs](#capturing-kernel-boot-logs)
+- [Warning](#warning)
+- [Assumptions](#assumptions)
+- [Initial setup](#initial-setup)
+- [Requirements to build individual components](#requirements-to-build-individual-components)
+- [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
+- [Check hardware requirements](#check-hardware-requirements)
+  - [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
+  - [Enable full debug](#enable-full-debug)
+    - [debug logs and shimv2](#debug-logs-and-shimv2)
+      - [Enabling full `containerd` debug](#enabling-full-containerd-debug)
+      - [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
+      - [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
+    - [journald rate limiting](#journald-rate-limiting)
+      - [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
+      - [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
+- [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
+  - [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
+  - [Get the osbuilder](#get-the-osbuilder)
+  - [Create a rootfs image](#create-a-rootfs-image)
+    - [Create a local rootfs](#create-a-local-rootfs)
+    - [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
+    - [Build a rootfs image](#build-a-rootfs-image)
+    - [Install the rootfs image](#install-the-rootfs-image)
+  - [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
+    - [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
+    - [Build an initrd image](#build-an-initrd-image)
+    - [Install the initrd image](#install-the-initrd-image)
+- [Install guest kernel images](#install-guest-kernel-images)
+- [Install a hypervisor](#install-a-hypervisor)
+  - [Build a custom QEMU](#build-a-custom-qemu)
+    - [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
+- [Run Kata Containers with Containerd](#run-kata-containers-with-containerd)
+- [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
+- [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
+- [Appendices](#appendices)
+  - [Checking Docker default runtime](#checking-docker-default-runtime)
+  - [Set up a debug console](#set-up-a-debug-console)
+    - [Simple debug console setup](#simple-debug-console-setup)
+      - [Enable agent debug console](#enable-agent-debug-console)
+      - [Connect to debug console](#connect-to-debug-console)
+    - [Traditional debug console setup](#traditional-debug-console-setup)
+      - [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
+      - [Build the debug image](#build-the-debug-image)
+      - [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
+      - [Create a container](#create-a-container)
+      - [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
+        - [Enabling debug console for QEMU](#enabling-debug-console-for-qemu)
+        - [Enabling debug console for cloud-hypervisor / firecracker](#enabling-debug-console-for-cloud-hypervisor--firecracker)
+        - [Connecting to the debug console](#connecting-to-the-debug-console)
+  - [Obtain details of the image](#obtain-details-of-the-image)
+  - [Capturing kernel boot logs](#capturing-kernel-boot-logs)

 # Warning

@@ -385,31 +384,30 @@ You can build and install the guest kernel image as shown [here](../tools/packag

 # Install a hypervisor

-When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
+When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the
+`QEMU` VMM is installed automatically. Cloud-Hypervisor and Firecracker VMMs are available from the [release tarballs](https://github.com/kata-containers/kata-containers/releases), as well as through [`kata-deploy`](../tools/packaging/kata-deploy/README.md).
+You may choose to manually build your VMM/hypervisor.

 ## Build a custom QEMU

-Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
+Kata Containers makes use of upstream QEMU branch. The exact version
+and repository utilized can be found by looking at the [versions file](../versions.yaml).

-```
-$ go get -d github.com/kata-containers/qemu
-$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/kata-containers/versions.yaml | cut -d '"' -f2)
-$ cd ${GOPATH}/src/github.com/kata-containers/qemu
-$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
-$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
-```
-
-To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
+Kata often utilizes patches for not-yet-upstream fixes for components,
+including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches)

+To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
 ```
 $ go get -d github.com/kata-containers/kata-containers/tools/packaging
 $ cd $your_qemu_directory
-$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
+$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
 $ eval ./configure "$(cat kata.cfg)"
 $ make -j $(nproc)
 $ sudo -E make install
 ```

+See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata.
+
 ### Build a custom QEMU for aarch64/arm64 - REQUIRED
 > **Note:**
 >
@@ -477,17 +475,6 @@ debug_console_enabled = true

 This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.

-#### Start `kata-monitor`
-
-The `kata-runtime exec` command needs `kata-monitor` to get the sandbox's `vsock` address to connect to, first start `kata-monitor`.
-
-```
-$ sudo kata-monitor
-```
-
-`kata-monitor` will serve at `localhost:8090` by default.
-
-
 #### Connect to debug console

 Command `kata-runtime exec` is used to connect to the debug console.
@@ -502,6 +489,10 @@ bash-4.2# exit
 exit
 ```

+`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/master/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured
+ with Kubernetes. For CRI-O, the namespace should set to `default` explicitly. This should not be confused with [Kubernetes namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
+For other CRI-runtimes and configurations, you may need to set the namespace utilizing the `runtime-namespace` option.
+
 If you want to access guest OS through a traditional way, see [Traditional debug console setup)](#traditional-debug-console-setup).

 ### Traditional debug console setup
@@ -621,8 +612,11 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
 > **Note** Ports 1024 and 1025 are reserved for communication with the agent
 > and gathering of agent logs respectively. 

-Next, connect to the debug console. The VSOCKS paths vary slightly between
-cloud-hypervisor and firecracker.
+##### Connecting to the debug console
+
+Next, connect to the debug console. The VSOCKS paths vary slightly between each
+VMM solution.
+
 In case of cloud-hypervisor, connect to the `vsock` as shown:
 ```
 $ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
@@ -639,6 +633,12 @@ CONNECT 1026

 **Note**: You need to press the `RETURN` key to see the shell prompt.

+
+For QEMU, connect to the `vsock` as shown:
+```
+$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"
+```
+
 To disconnect from the virtual machine, type `CONTROL+q` (hold down the
 `CONTROL` key and press `q`).

--- a/docs/Documentation-Requirements.md
+++ b/docs/Documentation-Requirements.md
@@ -25,7 +25,7 @@ All documents must:
 - Have a `.md` file extension.
 - Include a TOC (table of contents) at the top of the document with links to
  all heading sections. We recommend using the
-  [`check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
+  [`kata-check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
  tool to generate the TOC.
 - Be linked to from another document in the same repository.

--- a/docs/Licensing-strategy.md
+++ b/docs/Licensing-strategy.md
@@ -22,4 +22,4 @@ licensing and allows automated tooling to check the license of individual
 files.

 This SPDX licence identifier requirement is enforced by the
-[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/master/.ci/static-checks.sh).
+[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/main/.ci/static-checks.sh).
--- a/docs/Limitations.md
+++ b/docs/Limitations.md
@@ -19,6 +19,8 @@
        * [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
        * [docker --net=host](#docker---nethost)
        * [docker run --link](#docker-run---link)
+    * [Storage limitations](#storage-limitations)
+        * [Kubernetes `volumeMounts.subPaths`](#kubernetes-volumemountssubpaths)
    * [Host resource sharing](#host-resource-sharing)
        * [docker run --privileged](#docker-run---privileged)
 * [Miscellaneous](#miscellaneous)
@@ -26,7 +28,7 @@
 * [Appendices](#appendices)
    * [The constraints challenge](#the-constraints-challenge)

---
+***

 # Overview

@@ -92,7 +94,9 @@ This section lists items that might be possible to fix.
 ### checkpoint and restore

 The runtime does not provide `checkpoint` and `restore` commands. There
-are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
+are discussions about using VM save and restore to give us a
+`[criu](https://github.com/checkpoint-restore/criu)`-like functionality,
+which might provide a solution.

 Note that the OCI standard does not specify `checkpoint` and `restore`
 commands.
@@ -216,6 +220,17 @@ Equivalent functionality can be achieved with the newer docker networking comman
 See more documentation at
 [docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).

+## Storage limitations
+
+### Kubernetes `volumeMounts.subPaths`
+
+Kubernetes `volumeMount.subPath` is not supported by Kata Containers at the
+moment.
+
+See [this issue](https://github.com/kata-containers/runtime/issues/2812) for more details.
+[Another issue](https://github.com/kata-containers/kata-containers/issues/1728) focuses on the case of `emptyDir`.
+
+
 ## Host resource sharing

 ### docker run --privileged
@@ -224,7 +239,7 @@ Privileged support in Kata is essentially different from `runc` containers.
 Kata does support `docker run --privileged` command, but in this case full access
 to the guest VM is provided in addition to some host access.

-The container runs with elevated capabilities within the guest and is granted 
+The container runs with elevated capabilities within the guest and is granted
 access to guest devices instead of the host devices.
 This is also true with using `securityContext privileged=true` with Kubernetes.

--- a/docs/README.md
+++ b/docs/README.md
@@ -49,6 +49,7 @@ Documents that help to understand and contribute to Kata Containers.
 ### Design and Implementations

 * [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
+* [Kata Containers E2E Flow](design/end-to-end-flow.md): The entire end-to-end flow of Kata Containers
 * [Kata Containers design](./design/README.md): More Kata Containers design documents

 ### How to Contribute
--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -18,8 +18,7 @@
 ## Requirements

 - [hub](https://github.com/github/hub)
-
- OBS account with permissions on [`/home:katacontainers`](https://build.opensuse.org/project/subprojects/home:katacontainers)
+  * Using an [application token](https://github.com/settings/tokens) is required for hub.

 - GitHub permissions to push tags and create releases in Kata repositories.

@@ -32,14 +31,9 @@

 ### Bump all Kata repositories

-  - We have set up a Jenkins job to bump the version in the `VERSION` file in all Kata repositories. Go to the [Jenkins bump-job page](http://jenkins.katacontainers.io/job/release/build) to trigger a new job.
-  - Start a new job with variables for the job passed as:
-     - `BRANCH=<the-branch-you-want-to-bump>`
-     - `NEW_VERSION=<the-new-kata-version>`
-
-     For example, in the case where you want to make a patch release `1.10.2`, the variable `NEW_VERSION` should be `1.10.2` and `BRANCH` should point to  `stable-1.10`. In case of an alpha or release candidate release, `BRANCH` should point to `master` branch.
-
-  Alternatively, you can also bump the repositories using a script in the Kata packaging repo
+  Bump the repositories using a script in the Kata packaging repo, where:
+  - `BRANCH=<the-branch-you-want-to-bump>`
+  - `NEW_VERSION=<the-new-kata-version>`
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  $ export NEW_VERSION=<the-new-kata-version>
@@ -66,7 +60,7 @@

 ### Check Git-hub Actions

-  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
+  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.

  Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).

@@ -79,9 +73,9 @@
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  # Note: OLD_VERSION is where the script should start to get changes.
-  $ ./runtime-release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
+  $ ./release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
  # Edit the `notes.md` file to review and make any changes to the release notes.
-  # Add the release notes in GitHub runtime.
+  # Add the release notes in the project's GitHub.
  $ hub release edit -F notes.md "${NEW_VERSION}"
  ```

--- a/docs/design/arch-images/katacontainers-e2e-with-bg.jpg
+++ b/docs/design/arch-images/katacontainers-e2e-with-bg.jpg
--- a/docs/design/arch-images/katacontainers-e2e.svg
+++ b/docs/design/arch-images/katacontainers-e2e.svg
--- a/docs/design/architecture.md
+++ b/docs/design/architecture.md
@@ -137,7 +137,7 @@ The runtime uses a TOML format configuration file called `configuration.toml`. B

 The actual configuration file paths can be determined by running:
 ```
-$ kata-runtime --kata-show-default-config-paths
+$ kata-runtime --show-default-config-paths
 ```
 Most users will not need to modify the configuration file.

--- a/docs/design/end-to-end-flow.md
+++ b/docs/design/end-to-end-flow.md
@@ -0,0 +1,4 @@
+# Kata Containers E2E Flow
+
+
+![Kata containers e2e flow](arch-images/katacontainers-e2e-with-bg.jpg)
--- a/docs/how-to/how-to-import-kata-logs-with-fluentd.md
+++ b/docs/how-to/how-to-import-kata-logs-with-fluentd.md
@@ -185,7 +185,7 @@ in Kibana:
 ![Kata tags in EFK](./images/efk_syslog_entry_detail.png).

 We can however further sub-parse the Kata entries using the
-[Fluentd plugins](https://docs.fluentbit.io/manual/parser/logfmt) that will parse
+[Fluentd plugins](https://docs.fluentbit.io/manual/pipeline/parsers/logfmt) that will parse
 `logfmt` formatted data. We can utilise these to parse the sub-fields using a Fluentd filter
 section. At the same time, we will prefix the new fields with `kata_` to make it clear where
 they have come from:
@@ -222,7 +222,7 @@ test to check the parsing works. The resulting output from Fluentd is:
  "_COMM":"kata-runtime",
  "_EXE":"/opt/kata/bin/kata-runtime",
  "SYSLOG_TIMESTAMP":"Feb 21 10:31:27 ",
-  "_CMDLINE":"/opt/kata/bin/kata-runtime --kata-config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
+  "_CMDLINE":"/opt/kata/bin/kata-runtime --config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
  "SYSLOG_PID":"14314",
  "_PID":"14314",
  "MESSAGE":"time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox",
@@ -281,7 +281,7 @@ own file (rather than into the system journal).

 ```bash
 #!/bin/bash
-/opt/kata/bin/kata-runtime --kata-config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
+/opt/kata/bin/kata-runtime --config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
 ```

 And then we'll add the Fluentd config section to parse that file. Note, we inform the parser that Kata is
--- a/docs/how-to/how-to-set-prometheus-in-k8s.md
+++ b/docs/how-to/how-to-set-prometheus-in-k8s.md
@@ -34,7 +34,7 @@ Also you should ensure that `kubectl` working correctly.
 Start Prometheus by utilizing our sample manifest:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/prometheus.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/prometheus.yml
 ```

 This will create a new namespace, `prometheus`, and create the following resources:
@@ -60,7 +60,7 @@ go_gc_duration_seconds{quantile="0.75"} 0.000229911
 `kata-monitor` can be started on the cluster as follows:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/kata-monitor-daemonset.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/kata-monitor-daemonset.yml
 ```

 This will create a new namespace `kata-system` and a `daemonset` in it.
@@ -73,7 +73,7 @@ Once the `daemonset` is running, Prometheus should discover `kata-monitor` as a
 Run this command to run Grafana in Kubernetes:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/grafana.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/grafana.yml
 ```

 This will create deployment and service for Grafana under namespace `prometheus`.
@@ -99,7 +99,7 @@ You can import this dashboard using Grafana UI, or using `curl` command in conso
 $ curl -XPOST -i localhost:3000/api/dashboards/import \
    -u admin:admin \
    -H "Content-Type: application/json" \
-	-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/dashboard.json )}"
+	-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/dashboard.json )}"
 ```

 ## References
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -26,6 +26,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
 | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
 | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
+| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process |

 ## Agent Options
 | Key | Value Type | Comments |
@@ -60,7 +61,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
 | `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) |
 | `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) |
-| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
+| `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
 | `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory |
 | `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
 | `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
@@ -95,6 +96,8 @@ There are several kinds of Kata configurations and they are listed below.

 In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.

+# containerd Configuration
+
 For containerd, annotations specified in the pod spec are passed down to Kata
 starting with version `1.3.0` of containerd. Additionally, extra configuration is
 needed for containerd, by providing a `pod_annotations` field in the containerd config
@@ -107,11 +110,9 @@ for passing annotations to Kata from containerd:
 $ cat /etc/containerd/config
 ....

-[plugins.cri.containerd.runtimes.kata]
-           runtime_type = "io.containerd.runc.v1"
+         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
+           runtime_type = "io.containerd.kata.v2"
           pod_annotations = ["io.katacontainers.*"]
-           [plugins.cri.containerd.runtimes.kata.options]
-             BinaryName = "/usr/bin/kata-runtime"
 ....

 ```
@@ -197,6 +198,7 @@ the configuration entry:
 | Key | Config file entry | Comments |
 |-------| ----- | ----- |
 | `ctlpath`  | `valid_ctlpaths` | Valid paths for `acrnctl` binary |
+| `entropy_source` | `valid_entropy_sources` | Valid entropy sources, e.g. `/dev/random` |
 | `file_mem_backend`  | `valid_file_mem_backends` | Valid locations for the file-based memory backend root directory |
 | `jailer_path`  | `valid_jailer_paths`| Valid paths for the jailer constraining the container VM (Firecracker) |
 | `path`  | `valid_hypervisor_paths` | Valid hypervisors to run the container VM |
--- a/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
+++ b/docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
@@ -7,9 +7,10 @@
    * [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
    * [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
 * [Start Kubernetes](#start-kubernetes)
-* [Install a Pod Network](#install-a-pod-network)
+* [Configure Pod Network](#configure-pod-network)
 * [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
-* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
+* [Create runtime class for Kata Containers](#create-runtime-class-for-kata-containers)
+* [Run pod in Kata Containers](#run-pod-in-kata-containers)
 * [Delete created pod](#delete-created-pod)

 This document describes how to set up a single-machine Kubernetes (k8s) cluster.
@@ -18,9 +19,6 @@ The Kubernetes cluster will use the
 [CRI containerd plugin](https://github.com/containerd/cri) and
 [Kata Containers](https://katacontainers.io) to launch untrusted workloads.

-For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
-to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
-
 ## Requirements

 - Kubernetes, Kubelet, `kubeadm`
@@ -125,43 +123,33 @@ $ sudo systemctl daemon-reload
  $ sudo -E kubectl get pods
  ```

-## Install a Pod Network
+## Configure Pod Network

 A pod network plugin is needed to allow pods to communicate with each other.
+You can find more about CNI plugins from the [Creating a cluster with `kubeadm`](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions) guide.

- Install the `flannel` plugin by following the
-  [Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
-  guide, starting from the **Installing a pod network** section.
-
- Create a pod network using flannel
-
-  > **Note:** There is no known way to determine programmatically the best version (commit) to use.
-  > See https://github.com/coreos/flannel/issues/995.
+By default the CNI plugin binaries is installed under `/opt/cni/bin` (in package `kubernetes-cni`), you only need to create a configuration file for CNI plugin.

  ```bash
-  $ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
-  ```
+  $ sudo -E mkdir -p /etc/cni/net.d

- Wait for the pod network to become available
-
-  ```bash
-  # number of seconds to wait for pod network to become available
-  $ timeout_dns=420
-
-  $ while [ "$timeout_dns" -gt 0 ]; do
-      if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
-          break
-      fi
-
-      sleep 1s
-      ((timeout_dns--))
-   done
-  ```
-
- Check the pod network is running
-
-  ```bash
-  $ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
+  $ sudo -E cat > /etc/cni/net.d/10-mynet.conf <<EOF
+  {
+    "cniVersion": "0.2.0",
+    "name": "mynet",
+    "type": "bridge",
+    "bridge": "cni0",
+    "isGateway": true,
+    "ipMasq": true,
+    "ipam": {
+      "type": "host-local",
+      "subnet": "172.19.0.0/24",
+      "routes": [
+        { "dst": "0.0.0.0/0" }
+      ]
+    }
+  }
+  EOF
  ```

 ## Allow pods to run in the master node
@@ -172,24 +160,38 @@ By default, the cluster will not schedule pods in the master node. To enable mas
 $ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
 ```

-## Create an untrusted pod using Kata Containers
+## Create runtime class for Kata Containers

 By default, all pods are created with the default runtime configured in CRI containerd plugin.
+From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods.

-If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
+```bash
+$ cat > runtime.yaml <<EOF
+apiVersion: node.k8s.io/v1beta1
+kind: RuntimeClass
+metadata:
+  name: kata
+handler: kata
+EOF
+
+$ sudo -E kubectl apply -f runtime.yaml
+```
+
+## Run pod in Kata Containers
+
+If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the
 [Kata Containers runtime](../../src/runtime/README.md).

- Create an untrusted pod configuration
+- Create an pod configuration that using Kata Containers runtime

  ```bash
-  $ cat << EOT | tee nginx-untrusted.yaml
+  $ cat << EOT | tee nginx-kata.yaml
  apiVersion: v1
  kind: Pod
  metadata:
-    name: nginx-untrusted
-    annotations:
-      io.kubernetes.cri.untrusted-workload: "true"
+    name: nginx-kata
  spec:
+    runtimeClassName: kata
    containers:
    - name: nginx
      image: nginx
@@ -197,9 +199,9 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
  EOT
  ```

- Create an untrusted pod
+- Create the pod
  ```bash
-  $ sudo -E kubectl apply -f nginx-untrusted.yaml
+  $ sudo -E kubectl apply -f nginx-kata.yaml
  ```

 - Check pod is running
@@ -216,5 +218,5 @@ If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true
 ## Delete created pod

 ```bash
-$ sudo -E kubectl delete -f nginx-untrusted.yaml
+$ sudo -E kubectl delete -f nginx-kata.yaml
 ```
--- a/docs/how-to/how-to-use-kata-containers-with-acrn.md
+++ b/docs/how-to/how-to-use-kata-containers-with-acrn.md
@@ -91,7 +91,7 @@ To configure Kata Containers with ACRN, copy the generated `configuration-acrn.t
 The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)

 ```bash
-$ sudo kata-runtime --kata-show-default-config-paths
+$ sudo kata-runtime --show-default-config-paths
 ```

 >**Warning:** Please offline CPUs using [this](offline_cpu.sh) script, else VM launches will fail.
--- a/docs/how-to/how-to-use-virtio-fs-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-fs-with-kata.md
@@ -1,61 +1,12 @@
 # Kata Containers with virtio-fs

- [Introduction](#introduction)
- [Pre-requisites](#pre-requisites)
- [Install Kata Containers with virtio-fs support](#install-kata-containers-with-virtio-fs-support)
- [Run a Kata Container utilizing virtio-fs](#run-a-kata-container-utilizing-virtio-fs)
+- [Kata Containers with virtio-fs](#kata-containers-with-virtio-fs)
+  - [Introduction](#introduction)

 ## Introduction

 Container deployments utilize explicit or implicit file sharing between host filesystem and containers. From a trust perspective, avoiding a shared file-system between the trusted host and untrusted container is recommended. This is not always feasible. In Kata Containers, block-based volumes are preferred as they allow usage of either device pass through or `virtio-blk` for access within the virtual machine.

-As of the 1.7 release of Kata Containers, [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt) is the default filesystem sharing mechanism. While this does allow for workload compatibility, it does so with degraded performance and potential for POSIX compliance limitations.
+As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.

-To help address these limitations, [virtio-fs](https://virtio-fs.gitlab.io/) has been developed. virtio-fs is a shared file system that lets virtual machines access a directory tree on the host. In Kata Containers, virtio-fs can be used to share container volumes, secrets, config-maps, configuration files (hostname, hosts, `resolv.conf`) and the container rootfs on the host with the guest.  virtio-fs provides significant performance and POSIX compliance improvements compared to 9pfs.
-
-Enabling of virtio-fs requires changes in the guest kernel as well as the VMM. For Kata Containers, experimental virtio-fs support is enabled through `qemu` and `cloud-hypervisor` VMMs.
-
-**Note: virtio-fs support is experimental in the 1.7 release of Kata Containers. Work is underway to improve stability, performance and upstream integration. This is available for early preview - use at your own risk**
-
-This document describes how to get Kata Containers to work with virtio-fs.
-
-## Pre-requisites
-
-Before Kata 1.8 this feature required the host to have hugepages support enabled. Enable this with the `sysctl vm.nr_hugepages=1024` command on the host.In later versions of Kata, virtio-fs leverages `/dev/shm` as the shared memory backend. The default size of `/dev/shm` on a system is typically half of the total system memory. This can pose a physical limit to the maximum number of pods that can be launched with virtio-fs. This can be overcome by increasing the size of `/dev/shm` as shown below:
-
-```bash
-$ mount -o remount,size=${desired_shm_size} /dev/shm
-```
- 
-## Install Kata Containers with virtio-fs support
-
-The Kata Containers `qemu` configuration with virtio-fs and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/runtime/releases) artifacts starting with the 1.9 release. Installation is available through [distribution packages](https://github.com/kata-containers/documentation/blob/master/install/README.md#supported-distributions) as well through [`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
-
-**Note: Support for virtio-fs was first introduced in `NEMU` hypervisor in Kata 1.8 release. This hypervisor has been deprecated.**
-
-Install the latest release of Kata with `kata-deploy` as follows:
-```
-docker run --runtime=runc -v /opt/kata:/opt/kata -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd -v /etc/docker:/etc/docker -it katadocker/kata-deploy kata-deploy-docker install
-```
-
-This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-qemu-virtiofs`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
-
-## Run a Kata Container utilizing virtio-fs
-
-Once installed, start a new container, utilizing `qemu` + `virtiofs`:
-```bash
-$ docker run --runtime=kata-qemu-virtiofs -it busybox
-```
-
-Verify the new container is running with the `qemu` hypervisor as well as using `virtiofsd`. To do this look for the hypervisor path and the `virtiofs` daemon process on the host:
-```bash
-$ ps -aux | grep virtiofs
-root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
-...  -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
-root ... /home/foo/build-x86_64_virt/virtiofsd-x86_64 ...
-```
-
-You can also try out virtio-fs using `cloud-hypervisor` VMM:
-```bash
-$ docker run --runtime=kata-clh -it busybox
-```
+virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
--- a/docs/how-to/how-to-use-virtio-mem-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-mem-with-kata.md
@@ -13,26 +13,23 @@ Kata Containers with `virtio-mem` supports memory resize.

 ## Requisites

-Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
-The Linux kernel and QEMU upstream version still not support `virtio-mem`.  @davidhildenbrand is working on them.
-Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
+Kata Containers just supports `virtio-mem` with QEMU.
+Install and setup Kata Containers as shown [here](../install/README.md).

-The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
-The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
-
-The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem.  Its base version of QEMU cannot work with Kata Containers.  So merge the commit of `virtio-mem` to upstream QEMU.)
-
-Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
-```toml
-[hypervisor.qemu]
-path = "qemu-dir"
-kernel = "vmlinux-dir"
+### With x86_64
+The `virtio-mem` config of the x86_64 Kata Linux kernel is open.
+Enable `virtio-mem` as follows:
+```
+$ sudo sed -i -e 's/^#enable_virtio_mem.*$/enable_virtio_mem = true/g' /etc/kata-containers/configuration.toml
 ```

-Enable `virtio-mem` with following line in the Kata Containers configuration:
-```toml
-enable_virtio_mem = true
+### With other architectures
+The `virtio-mem` config of the others Kata Linux kernel is not open.
+You can open `virtio-mem` config as follows:
 ```
+CONFIG_VIRTIO_MEM=y
+```
+Then you can build and install the guest kernel image as shown [here](../../tools/packaging/kernel/README.md#build-kata-containers-kernel).

 ## Run a Kata Container utilizing `virtio-mem`

@@ -41,13 +38,35 @@ Use following command to enable memory overcommitment of a Linux kernel.  Becaus
 $ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
 ```

-Use following command start a Kata Container.
+Use following command to start a Kata Container.
 ```
-$ docker run --rm -it --runtime=kata --name test busybox
+$ pod_yaml=pod.yaml
+$ container_yaml=${REPORT_DIR}/container.yaml
+$ image="quay.io/prometheus/busybox:latest"
+$ cat << EOF > "${pod_yaml}"
+metadata:
+  name: busybox-sandbox1
+EOF
+$ cat << EOF > "${container_yaml}"
+metadata:
+  name: busybox-killed-vmm
+image:
+  image: "$image"
+command:
+- top
+EOF
+$ sudo crictl pull $image
+$ podid=$(sudo crictl runp $pod_yaml)
+$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
+$ sudo crictl start $cid
 ```

-Use following command set the memory size of test to default_memory + 512m.
+Use the following command to set the container memory limit to 2g and the memory size of the VM to its default_memory + 2g.
 ```
-$ docker update -m 512m --memory-swap -1 test
+$ sudo crictl update --memory $((2*1024*1024*1024)) $cid
 ```

+Use the following command to set the container memory limit to 1g and the memory size of the VM to its default_memory + 1g.
+```
+$ sudo crictl update --memory $((1*1024*1024*1024)) $cid
+```
--- a/docs/install/README.md
+++ b/docs/install/README.md
@@ -50,9 +50,7 @@ Kata packages are provided by official distribution repositories for:
 | Distribution (link to installation guide)                | Minimum versions                                                               |
 |----------------------------------------------------------|--------------------------------------------------------------------------------|
 | [CentOS](centos-installation-guide.md)                   | 8                                                                              |
-| [Fedora](fedora-installation-guide.md)                   | 32, Rawhide                                                                    |
-| [openSUSE](opensuse-installation-guide.md)               | [Leap 15.1](opensuse-leap-15.1-installation-guide.md)<br>Leap 15.2, Tumbleweed |
-| [SUSE Linux Enterprise (SLE)](sle-installation-guide.md) | SLE 15 SP1, 15 SP2                                                             |
+| [Fedora](fedora-installation-guide.md)                   | 34                                                                             |

 > **Note::**
 >
--- a/docs/install/centos-installation-guide.md
+++ b/docs/install/centos-installation-guide.md
@@ -3,15 +3,9 @@
 1. Install the Kata Containers components with the following commands:

   ```bash
+   $ sudo -E dnf install -y centos-release-advanced-virtualization
+   $ sudo -E dnf module disable -y virt:rhel
   $ source /etc/os-release
-   $ cat <<EOF | sudo -E tee /etc/yum.repos.d/advanced-virt.repo
-     [advanced-virt]
-     name=Advanced Virtualization
-     baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/advanced-virtualization
-     enabled=1
-     gpgcheck=1
-     skip_if_unavailable=1
-     EOF
   $ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
     [kata-containers]
     name=Kata Containers
@@ -20,8 +14,7 @@
     gpgcheck=1
     skip_if_unavailable=1
     EOF
-   $ sudo -E dnf module disable -y virt:rhel
-   $ sudo -E dnf install -y kata-runtime
+   $ sudo -E dnf install -y kata-containers
   ```

 2. Decide which container manager to use and select the corresponding link that follows:
--- a/docs/install/fedora-installation-guide.md
+++ b/docs/install/fedora-installation-guide.md
@@ -3,7 +3,7 @@
 1. Install the Kata Containers components with the following commands:

   ```bash
-   $ sudo -E dnf -y install kata-runtime
+   $ sudo -E dnf -y install kata-containers
   ```

 2. Decide which container manager to use and select the corresponding link that follows:
--- a/docs/install/opensuse-installation-guide.md
+++ b/docs/install/opensuse-installation-guide.md
@@ -1,10 +0,0 @@
-# Install Kata Containers on openSUSE
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ sudo -E zypper -n install katacontainers
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/install/opensuse-leap-15.1-installation-guide.md
+++ b/docs/install/opensuse-leap-15.1-installation-guide.md
@@ -1,11 +0,0 @@
-# Install Kata Containers on openSUSE Leap 15.1
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/openSUSE_Leap_15.1/devel:kubic.repo"
-   $ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/install/sle-installation-guide.md
+++ b/docs/install/sle-installation-guide.md
@@ -1,13 +0,0 @@
-# Install Kata Containers on SLE
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ source /etc/os-release
-   $ DISTRO_VERSION=$(sed "s/-/_/g" <<< "$VERSION")
-   $ sudo -E zypper addrepo --refresh "https://download.opensuse.org/repositories/devel:/kubic/SLE_${DISTRO_VERSION}_Backports/devel:kubic.repo"
-   $ sudo -E zypper -n --gpg-auto-import-keys install katacontainers
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/install/snap-installation-guide.md
+++ b/docs/install/snap-installation-guide.md
@@ -2,9 +2,6 @@

 * [Install Kata Containers](#install-kata-containers)
 * [Configure Kata Containers](#configure-kata-containers)
-* [Integration with non-compatible shim v2 Container Engines](#integration-with-non-compatible-shim-v2-container-engines)
-    * [Integration with Docker](#integration-with-docker)
-    * [Integration with Podman](#integration-with-podman)
 * [Integration with shim v2 Container Engines](#integration-with-shim-v2-container-engines)
 * [Remove Kata Containers snap package](#remove-kata-containers-snap-package)

@@ -14,23 +11,10 @@
 Kata Containers can be installed in any Linux distribution that supports
 [snapd](https://docs.snapcraft.io/installing-snapd).

-> NOTE: From Kata Containers 2.x, only the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
-> is supported, note that some container engines (`docker`, `podman`, etc) may not
-> be able to run Kata Containers 2.x.
-
-Kata Containers 1.x is released through the *stable* channel while Kata Containers
-2.x is available in the *candidate* channel.
-
-Run the following command to install **Kata Containers 1.x**:
+Run the following command to install **Kata Containers**:

 ```sh
-$ sudo snap install kata-containers --classic
-```
-
-Run the following command to install **Kata Containers 2.x**:
-
-```sh
-$ sudo snap install kata-containers --candidate --classic
+$ sudo snap install kata-containers --stable --classic
 ```

 ## Configure Kata Containers
@@ -46,55 +30,6 @@ $ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/confi
 $ $EDITOR /etc/kata-containers/configuration.toml
 ```

-## Integration with non-compatible shim v2 Container Engines
-
-At the time of writing this document, `docker` and `podman` **do not support Kata
-Containers 2.x, therefore Kata Containers 1.x must be used instead.**
-
-The path to the runtime provided by the Kata Containers 1.x snap package is
-`/snap/bin/kata-containers.runtime`, it should be used to run Kata Containers 1.x.
-
-### Integration with Docker
-
-`/etc/docker/daemon.json` is the configuration file for `docker`, use the
-following configuration to add a new runtime (`kata`) to `docker`.
-
-```json
-{
-  "runtimes": {
-    "kata": {
-      "path": "/snap/bin/kata-containers.runtime"
-    }
-  }
-}
-```
-
-Once the above configuration has been applied, use the
-following commands to restart `docker` and run Kata Containers 1.x.
-
-```sh
-$ sudo systemctl restart docker
-$ docker run -ti --runtime kata busybox sh
-```
-
-### Integration with Podman
-
-`/usr/share/containers/containers.conf` is the configuration file for `podman`,
-add the following configuration in the `[engine.runtimes]` section.
-
-```toml
-kata = [
-   "/snap/bin/kata-containers.runtime"
-]
-```
-
-Once the above configuration has been applied, use the following command to run
-Kata Containers 1.x with `podman`
-
-```sh
-$ sudo podman run -ti --runtime kata docker.io/library/busybox sh
-```
-
 ## Integration with shim v2 Container Engines

 The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the
@@ -110,8 +45,8 @@ can be used as runtime.

 Read the following documents to know how to run Kata Containers 2.x with `containerd`.

-* [How to use Kata Containers and Containerd](https://github.com/kata-containers/kata-containers/blob/2.0-dev/docs/how-to/containerd-kata.md)
-* [Install Kata Containers with containerd](https://github.com/kata-containers/kata-containers/blob/2.0-dev/docs/install/container-manager/containerd/containerd-install.md)
+* [How to use Kata Containers and Containerd](https://github.com/kata-containers/kata-containers/blob/main/docs/how-to/containerd-kata.md)
+* [Install Kata Containers with containerd](https://github.com/kata-containers/kata-containers/blob/main/docs/install/container-manager/containerd/containerd-install.md)


 ## Remove Kata Containers snap package
--- a/docs/install/ubuntu-installation-guide.md
+++ b/docs/install/ubuntu-installation-guide.md
@@ -1,15 +0,0 @@
-# Install Kata Containers on Ubuntu
-
-1. Install the Kata Containers components with the following commands:
-
-   ```bash
-   $ ARCH=$(arch)
-   $ BRANCH="${BRANCH:-master}"
-   $ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
-   $ curl -sL  http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
-   $ sudo -E apt-get update
-   $ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
-   ```
-
-2. Decide which container manager to use and select the corresponding link that follows:
-   - [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
--- a/docs/use-cases/using-Intel-QAT-and-kata.md
+++ b/docs/use-cases/using-Intel-QAT-and-kata.md
@@ -1,56 +1,62 @@
 # Table of Contents

-* [Table of Contents](#table-of-contents)
-* [Introduction](#introduction)
-    * [Helpful Links before starting](#helpful-links-before-starting)
-    * [Steps to enable Intel QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
-    * [Script variables](#script-variables)
-        * [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
-    * [Prepare the Clear Linux Host](#prepare-the-clear-linux-host)
-        * [Identify which PCI Bus the Intel QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
-        * [Install necessary bundles for Clear Linux](#install-necessary-bundles-for-clear-linux)
-        * [Download Intel QAT drivers](#download-intel-qat-drivers)
-        * [Copy Intel QAT configuration files and enable Virtual Functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
-        * [Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
-        * [Check Intel QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
-    * [Prepare Kata Containers](#prepare-kata-containers)
-        * [Download Kata kernel Source](#download-kata-kernel-source)
-        * [Build Kata kernel](#build-kata-kernel)
-        * [Copy Kata kernel](#copy-kata-kernel)
-        * [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
-        * [Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
-        * [Copy Kata rootfs](#copy-kata-rootfs)
-        * [Update Kata configuration to point to custom kernel and rootfs](#update-kata-configuration-to-point-to-custom-kernel-and-rootfs)
-    * [Verify Intel QAT works in a Docker Kata Containers container](#verify-intel-qat-works-in-a-docker-kata-containers-container)
-    * [Build OpenSSL Intel QAT engine container](#build-openssl-intel-qat-engine-container)
-        * [Test Intel QAT in Docker](#test-intel-qat-in-docker)
-        * [Troubleshooting](#troubleshooting)
-    * [Optional Scripts](#optional-scripts)
-        * [Verify Intel QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)
+- [Table of Contents](#table-of-contents)
+- [Introduction](#introduction)
+  - [Helpful Links before starting](#helpful-links-before-starting)
+  - [Steps to enable Intel® QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
+  - [Script variables](#script-variables)
+    - [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
+  - [Prepare the Ubuntu Host](#prepare-the-ubuntu-host)
+    - [Identify which PCI Bus the Intel® QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
+    - [Install necessary packages for Ubuntu](#install-necessary-packages-for-ubuntu)
+    - [Download Intel® QAT drivers](#download-intel-qat-drivers)
+    - [Copy Intel® QAT configuration files and enable virtual functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
+    - [Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
+    - [Check Intel® QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
+  - [Prepare Kata Containers](#prepare-kata-containers)
+    - [Download Kata kernel Source](#download-kata-kernel-source)
+    - [Build Kata kernel](#build-kata-kernel)
+    - [Copy Kata kernel](#copy-kata-kernel)
+    - [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
+    - [Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
+    - [Copy Kata rootfs](#copy-kata-rootfs)
+  - [Verify Intel® QAT works in a container](#verify-intel-qat-works-in-a-container)
+    - [Build OpenSSL Intel® QAT engine container](#build-openssl-intel-qat-engine-container)
+    - [Test Intel® QAT with the ctr tool](#test-intel-qat-with-the-ctr-tool)
+    - [Test Intel® QAT in Kubernetes](#test-intel-qat-in-kubernetes)
+    - [Troubleshooting](#troubleshooting)
+  - [Optional Scripts](#optional-scripts)
+    - [Verify Intel® QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)

 # Introduction

-Intel QuickAssist Technology (Intel QAT) provides hardware acceleration 
+Intel® QuickAssist Technology (QAT) provides hardware acceleration 
 for security (cryptography) and compression. These instructions cover the 
-steps for [Clear Linux](https://clearlinux.org) but can be adapted to any 
-Linux distribution. Your distribution may already have the Intel QAT 
-drivers, but it is likely they do not contain the necessary user space 
-components. These instructions guide the user on how to download the kernel 
-sources, compile kernel driver modules against those sources, and load them 
-onto the host as well as preparing a specially built Kata Containers kernel 
-and custom Kata Containers rootfs.  
+steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop) 
+which already include the QAT host driver. These instructions can be adapted to 
+any Linux distribution. These instructions guide the user on how to download 
+the kernel sources, compile kernel driver modules against those sources, and 
+load them onto the host as well as preparing a specially built Kata Containers 
+kernel and custom Kata Containers rootfs.
+
+* Download kernel sources
+* Compile Kata kernel
+* Compile kernel driver modules against those sources
+* Download rootfs
+* Add driver modules to rootfs
+* Build rootfs image 

 ## Helpful Links before starting

-[Intel QAT Engine](https://github.com/intel/QAT_Engine)
+[Intel® QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)

-[Intel QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)
+[Intel® QuickAssist Technology Engine for OpenSSL](https://github.com/intel/QAT_Engine)

 [Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)

-[Intel QuickAssist Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
+[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)

-## Steps to enable Intel QAT in Kata Containers
+## Steps to enable Intel® QAT in Kata Containers

 There are some steps to complete only once, some steps to complete with every
 reboot, and some steps to complete when the host kernel changes.
@@ -67,91 +73,95 @@ needed to point to updated drivers or different install locations.
 Make sure to check [`01.org`](https://01.org/intel-quickassist-technology) for 
 the latest driver.

-```sh
-$ export QAT_DRIVER_VER=qat1.7.l.4.8.0-00005.tar.gz 
-$ export QAT_DRIVER_URL=https://01.org/sites/default/files/downloads/${QAT_DRIVER_VER}
+```bash
+$ export QAT_DRIVER_VER=qat1.7.l.4.12.0-00011.tar.gz
+$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
 $ export QAT_CONF_LOCATION=~/QAT_conf
 $ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile
 $ export QAT_SRC=~/src/QAT
 $ export GOPATH=~/src/go
-$ export OSBUILDER=~/src/osbuilder
 $ export KATA_KERNEL_LOCATION=~/kata
 $ export KATA_ROOTFS_LOCATION=~/kata
 ```

-## Prepare the Clear Linux Host
+## Prepare the Ubuntu Host

 The host could be a bare metal instance or a virtual machine. If using a 
 virtual machine, make sure that KVM nesting is enabled. The following 
-instructions reference an Intel QAT. Some of the instructions must be 
-modified if using a different Intel QAT device. You can identify the Intel QAT
-chipset by executing the following.
+instructions reference an Intel® C62X chipset. Some of the instructions must be 
+modified if using a different Intel® QAT device. The Intel® QAT chipset can be
+identified by executing the following.

-### Identify which PCI Bus the Intel QAT card is on
+### Identify which PCI Bus the Intel® QAT card is on

-```sh
+```bash
 $ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
 ```

-### Install necessary bundles for Clear Linux
+### Install necessary packages for Ubuntu

-Clear Linux version 30780 (Released August 13, 2019) includes a 
-`linux-firmware-qat` bundle that has the necessary QAT firmware along with a
-functional QAT host driver that works with Kata Containers. 
+These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
+prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
+also needs to be installed to be able to build the rootfs. To test that 
+everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
+pass through of the virtual functions the kernel boot parameter needs to have
+`INTEL_IOMMU=on`.

-```sh
-$ sudo swupd bundle-add network-basic linux-firmware-qat make c-basic go-basic containers-virt dev-utils devpkg-elfutils devpkg-systemd devpkg-ssl
-$ sudo clr-boot-manager update
-$ sudo systemctl enable --now docker
+```bash
+$ sudo apt update
+$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
+$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
+$ sudo update-grub
 $ sudo reboot
 ```

-### Download Intel QAT drivers
+### Download Intel® QAT drivers

-This will download the Intel QAT drivers from [`01.org`](https://01.org/intel-quickassist-technology). 
+This will download the [Intel® QAT drivers](https://01.org/intel-quickassist-technology). 
 Make sure to check the website for the latest version.

-```sh
+```bash
 $ mkdir -p $QAT_SRC
 $ cd $QAT_SRC
 $ curl -L $QAT_DRIVER_URL | tar zx
 ```

-### Copy Intel QAT configuration files and enable Virtual Functions
+### Copy Intel® QAT configuration files and enable virtual functions

-Modify the instructions below as necessary if using a different QAT hardware 
+Modify the instructions below as necessary if using a different Intel® QAT hardware 
 platform. You can learn more about customizing configuration files at the 
-[Intel QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
+[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
 This section starts from a base config file and changes the `SSL` section to 
 `SHIM` to support the OpenSSL engine. There are more tweaks that you can make
-depending on the use case and how many Intel QAT engines should be run. You
+depending on the use case and how many Intel® QAT engines should be run. You
 can find more information about how to customize in the 
 [Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://01.org/sites/default/files/downloads/336210qatswprogrammersguiderev006.pdf) 

-> **Note: This section assumes that a QAT `c6xx` platform is used.**
+> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**

-```sh
+```bash
 $ mkdir -p $QAT_CONF_LOCATION
 $ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
 $ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
 ```

-### Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)
+### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)

 To enable virtual functions, the host OS should have IOMMU groups enabled. In 
-the UEFI Firmware Intel Virtualization Technology for Directed I/O 
-(Intel VT-d) must be enabled. Also, the kernel boot parameter should be 
-`intel_iommu=on` or `intel_iommu=ifgx_off`. The default in Clear Linux currently 
-is `intel_iommu=igfx_off` which should work with the Intel QAT device. The 
-following commands assume you installed an Intel QAT card, IOMMU is on, and
+the UEFI Firmware Intel® Virtualization Technology for Directed I/O 
+(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be 
+`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
+the instructions above. Check the output of `/proc/cmdline` to confirm. The 
+following commands assume you installed an Intel® QAT card, IOMMU is on, and
 VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
 each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
 complete, each virtual function passes into a Kata Containers container using
-the PCIe device passthrough feature. For Kubernetes, the Intel device plugin
-for Kubernetes handles the binding of the driver but the VF’s still must be
+the PCIe device passthrough feature. For Kubernetes, the 
+[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
+for Kubernetes handles the binding of the driver, but the VF’s still must be
 enabled.

-```sh
+```bash
 $ sudo modprobe vfio-pci
 $ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
 $ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
@@ -160,8 +170,10 @@ $ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/ueve
 $ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
 $ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
 ```
+
 Loop through all the virtual functions and bind to the VFIO driver
-```sh
+
+```bash
 $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
  do QAT_PCI_BUS_VF=$(basename $(readlink $f))
   echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
@@ -169,22 +181,23 @@ $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
  done
 ```

-### Check Intel QAT virtual functions are enabled
+### Check Intel® QAT virtual functions are enabled

 If the following command returns empty, then the virtual functions are not 
 properly enabled. This command checks the enumerated device IDs for just the 
-virtual functions. Using the Intel QAT as an example, the physical device ID 
+virtual functions. Using the Intel® QAT as an example, the physical device ID 
 is `37c8` and virtual function device ID is `37c9`. The following command checks 
-if VF's are enabled for any of the currently known Intel QAT device ID's. The
+if VF's are enabled for any of the currently known Intel® QAT device ID's. The
 following `ls` command should show the 16 VF's bound to `VFIO-PCI`.

-```sh
+```bash
 $ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
 ```

 Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
 It should match the device ID's of the VF's.
-```sh
+
+```bash
 $ ls -la /sys/bus/pci/drivers/vfio-pci
 ```

@@ -201,16 +214,16 @@ There are some patches that must be installed as well, which the
 `build-kernel.sh` script should automatically apply. If you are using a
 different kernel version, then you might need to manually apply them. Since
 the Kata Containers kernel has a minimal set of kernel flags set, you must
-create a QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
+create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
 Update the config to set some of the `CRYPTO` flags to enabled. This might
-change with different kernel versions. We tested the following instructions
-with kernel `v4.19.28-41`.
+change with different kernel versions. The following instructions were tested
+with kernel `v5.4.0-64-generic`.

-```sh
+```bash
 $ mkdir -p $GOPATH
 $ cd $GOPATH
-$ go get -v github.com/kata-containers/packaging
-$ cat << EOF > $GOPATH/src/github.com/kata-containers/packaging/kernel/configs/fragments/common/qat.conf
+$ go get -v github.com/kata-containers/kata-containers
+$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
 CONFIG_PCIEAER=y
 CONFIG_UIO=y
 CONFIG_CRYPTO_HW=y
@@ -221,61 +234,70 @@ CONFIG_MODULE_SIG=y
 CONFIG_CRYPTO_AUTHENC=y
 CONFIG_CRYPTO_DH=y
 EOF
-$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh setup
+$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
 ```

 ### Build Kata kernel

-```sh
-$ export LINUX_VER=$(ls -d kata*)
+```bash
+$ cd $GOPATH
+$ export LINUX_VER=$(ls -d kata-linux-*)
 $ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
-$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh build
+$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
 ```

-
 ### Copy Kata kernel

-```sh
+```bash
+$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
 $ mkdir -p $KATA_KERNEL_LOCATION
-$ cp $LINUX_VER/arch/x86/boot/bzImage $KATA_KERNEL_LOCATION/vmlinuz-${LINUX_VER}_qat
+$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
 ```

 ### Prepare Kata root filesystem

 These instructions build upon the OS builder instructions located in the 
-[Developer Guide](../Developer-Guide.md). The following instructions use Clear
-Linux (Kata Containers default) as the root filesystem with systemd as the 
-init and will add in the `kmod` binary, which is not a standard binary in a 
-Kata rootfs image. The `kmod` binary is necessary to load the QAT kernel 
-modules when the virtual machine rootfs boots. You should install Docker on
-your system before running the following commands. If you need to use a custom 
-`kata-agent`, then refer to the previous link on how to add it in.
+[Developer Guide](../Developer-Guide.md). At this point it is recommended that
+[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
+then [Kata-deploy](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy)
+is use to install Kata. This will make sure that the correct `agent` version 
+is installed into the rootfs in the steps below.

-```sh
-$ mkdir -p $OSBUILDER
-$ cd $OSBUILDER
-$ git clone https://github.com/kata-containers/osbuilder.git
-$ export ROOTFS_DIR=${OSBUILDER}/osbuilder/rootfs-builder/rootfs
+The following instructions use Debian as the root filesystem with systemd as 
+the init and will add in the `kmod` binary, which is not a standard binary in 
+a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT 
+kernel modules when the virtual machine rootfs boots. 
+
+```bash
+$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
+$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
 $ export EXTRA_PKGS='kmod'
 ```
+
 Make sure that the `kata-agent` version matches the installed `kata-runtime`
-version.
-```sh
+version. Also make sure the `kata-runtime` install location is in your `PATH` 
+variable. The following `AGENT_VERSION` can be set manually to match
+the `kata-runtime` version if the following commands don't work.
+
+```bash
+$ export PATH=$PATH:/opt/kata/bin
+$ cd $GOPATH
 $ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
-$ cd ${OSBUILDER}/osbuilder/rootfs-builder
+$ cd ${OSBUILDER}/rootfs-builder
 $ sudo rm -rf ${ROOTFS_DIR}
-$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh clearlinux'
+$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh debian'
 ```

-### Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
+### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs

 After the Kata Containers kernel builds with the proper configuration flags, 
-you must build the Intel QAT drivers against that Kata Containers kernel
+you must build the Intel® QAT drivers against that Kata Containers kernel
 version in a similar way they were previously built for the host OS. You must 
 set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source 
-directory and build the Intel QAT drivers again.
+directory and build the Intel® QAT drivers again. The  `make` command will
+install the Intel® QAT modules into the Kata rootfs.

-```sh
+```bash
 $ cd $GOPATH
 $ export LINUX_VER=$(ls -d kata*)
 $ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
@@ -284,16 +306,18 @@ $ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Mak
 $ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
 $ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
 $ cd $QAT_SRC
-$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --disable-qat-lkcf --enable-icp-sriov=guest
+$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
 $ sudo -E make all -j$(nproc)
 $ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j$(nproc)
 ```
+
 The `usdm_drv` module also needs to be copied into the rootfs modules path and
 `depmod` should be run. 
-```sh
-$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/usr/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers  
+
+```bash
+$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers  
 $ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
-$ cd ${OSBUILDER}/osbuilder/image-builder
+$ cd ${OSBUILDER}/image-builder
 $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
 ```

@@ -302,84 +326,225 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'

 ### Copy Kata rootfs

-```sh
+```bash
 $ mkdir -p $KATA_ROOTFS_LOCATION
-$ cp ${OSBUILDER}/osbuilder/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
+$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
 ```

-### Update Kata configuration to point to custom kernel and rootfs
+## Verify Intel® QAT works in a container

-You must update the `configuration.toml` for Kata Containers to point to the 
-custom kernel, custom rootfs, and to specify which modules to load when the 
-virtual machine is booted when a container is run. The following example
-assumes you installed an Intel QAT, and you need to load those modules.
-
-```sh
-$ sudo mkdir -p /etc/kata-containers
-$ sudo cp /usr/share/defaults/kata-containers/configuration-qemu.toml /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|kernel_params = \"\"|kernel_params = \"modules-load=usdm_drv,qat_c62xvf\"|g" /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|\/usr\/share\/kata-containers\/kata-containers.img|${KATA_KERNEL_LOCATION}\/kata-containers.img|g" /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|\/usr\/share\/kata-containers\/vmlinuz.container|${KATA_ROOTFS_LOCATION}\/vmlinuz-${LINUX_VER}_qat|g" /etc/kata-containers/configuration.toml
-```
-
-## Verify Intel QAT works in a Docker Kata Containers container
-
-The following instructions leverage an OpenSSL Dockerfile that builds the 
-Intel QAT engine to allow OpenSSL to offload crypto functions. It is a 
-convenient way to test that VFIO device passthrough for the Intel QAT VF’s are
+The following instructions uses a OpenSSL Dockerfile that builds the 
+Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a 
+convenient way to test that VFIO device passthrough for the Intel® QAT VF’s are
 working properly with the Kata Containers VM.

-## Build OpenSSL Intel QAT engine container
+### Build OpenSSL Intel® QAT engine container

-Use the OpenSSL Intel QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
+Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
 to build a container image with an optimized OpenSSL engine for 
-Intel QAT. Using `docker build` with the Kata Containers runtime can sometimes
-have issues. Therefore, we recommended you change the default runtime to
-`runc` before doing a build. Instructions for this are below.
+Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
+have issues. Therefore, make sure that `runc` is the default Docker container 
+runtime.

-```sh
+```bash
 $ cd $QAT_SRC
 $ curl -O $QAT_DOCKERFILE
-$ sudo sed -i 's/kata-runtime/runc/g' /etc/systemd/system/docker.service.d/50-runtime.conf
-$ sudo systemctl daemon-reload && sudo systemctl restart docker
 $ sudo docker build -t openssl-qat-engine .
 ```

-> **Note: The Intel QAT driver version in this container might not match the 
-> Intel QAT driver compiled and loaded on the host when compiling.**
+> **Note: The Intel® QAT driver version in this container might not match the 
+> Intel® QAT driver compiled and loaded on the host when compiling.**

-### Test Intel QAT in Docker
+### Test Intel® QAT with the ctr tool

-The host should already be setup with 16 virtual functions of the Intel QAT 
-card bound to `VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing
-of devices. Replace the number 90 with one of the VF’s exposed in `/dev/vfio`.
-It might require you to add an `IPC_LOCK` capability to your Docker runtime
-depending on which rootfs you use.
+The `ctr` tool can be used to interact with the containerd daemon. It may be 
+more convenient to use this tool to verify the kernel and image instead of
+setting up a Kubernetes cluster. The correct Kata runtimes need to be added
+to the containerd `config.toml`. Below is a sample snippet that can be added
+to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.

-```sh
-$ sudo docker run -it --runtime=kata-runtime --cap-add=IPC_LOCK --cap-add=SYS_ADMIN --device=/dev/vfio/90 -v /dev:/dev -v ${QAT_CONF_LOCATION}:/etc openssl-qat-engine bash
+```
+[plugins.cri.containerd.runtimes.kata-qemu]
+  runtime_type = "io.containerd.kata-qemu.v2"
+  privileged_without_host_devices = true
+  pod_annotations = ["io.katacontainers.*"]
+  [plugins.cri.containerd.runtimes.kata-qemu.options]
+    ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
+[plugins.cri.containerd.runtimes.kata-clh]
+  runtime_type = "io.containerd.kata-clh.v2"
+  privileged_without_host_devices = true
+  pod_annotations = ["io.katacontainers.*"]
+  [plugins.cri.containerd.runtimes.kata-clh.options]
+    ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
 ```

-Below are some commands to run in the container image to verify Intel QAT is 
+In addition, containerd expects the binary to be in `/usr/local/bin` so add 
+this small script so that it redirects to be able to use either QEMU or
+Cloud Hypervisor with Kata.
+
+```bash
+$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
+$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
+$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
+$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
+```
+
+After the OpenSSL image is built and imported into containerd, a Intel® QAT 
+virtual function exposed in the step above can be added to the `ctr` command. 
+Make sure to change the `/dev/vfio` number to one that actually exists on the 
+host system. When using the `ctr` tool, the`configuration.toml` for Kata needs 
+to point to the custom Kata kernel and rootfs built above and the Intel® QAT 
+modules in the Kata rootfs need to load at boot. The following steps assume that 
+`kata-deploy` was used to install Kata and QEMU is being tested. If using a 
+different hypervisor, different install method for Kata, or a different 
+Intel® QAT chipset then the command will need to be modified. 
+
+> **Note: The following was tested with 
+[containerd v1.3.9](https://github.com/containerd/containerd/releases/tag/v1.3.9).**
+
+```bash
+$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
+$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
+$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
+$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file 
+$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
+$ sudo ctr images import openssl-qat-engine.tar
+$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw  docker.io/library/openssl-qat-engine:latest bash
+```
+
+Below are some commands to run in the container image to verify Intel® QAT is 
 working

 ```sh
-bash-5.0# cat /proc/modules
-bash-5.0# adf_ctl restart
-bash-5.0# adf_ctl status
-bash-5.0# openssl engine -c -t qat
+root@67561dc2757a/ # cat /proc/modules
+qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
+usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
+intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
+
+root@67561dc2757a/ # adf_ctl restart
+Restarting all devices.
+Processing /etc/c6xxvf_dev0.conf
+
+root@67561dc2757a/ # adf_ctl status
+Checking status of all devices.
+There is 1 QAT acceleration device(s) in the system:
+ qat_dev0 - type: c6xxvf,  inst_id: 0,  node_id: 0,  bsf: 0000:01:01.0,  #accel: 1 #engines: 1 state: up
+
+root@67561dc2757a/ # openssl engine -c -t qat-hw
+(qat-hw) Reference implementation of QAT crypto engine v0.6.1
+ [RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
+     [ available ]
 ```

-Test with Intel QAT card acceleration
+### Test Intel® QAT in Kubernetes

-```sh
-bash-5.0# openssl speed -engine qat -elapsed -async_jobs 72 rsa2048 
+Start a Kubernetes cluster with containerd as the CRI. The host should 
+already be setup with 16 virtual functions of the Intel® QAT card bound to 
+`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices. 
+You might need to disable Docker before initializing Kubernetes. Be aware 
+that the OpenSSL container image built above will need to be exported from
+Docker and imported into containerd.
+
+If Kata is installed through [`kata-deploy`](https://github.com/kata-containers/kata-containers/blob/stable-2.0/tools/packaging/kata-deploy/README.md)
+there will be multiple `configuration.toml` files associated with different 
+hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and 
+kernel modules to each `configuration.toml` as the default, instead use
+[annotations](https://github.com/kata-containers/kata-containers/blob/stable-2.0/docs/how-to/how-to-load-kernel-modules-with-kata.md)
+in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The 
+easy way to do this is to use `kata-deploy` which will install the Kata binaries
+to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation 
+support. However, the `configuration.toml` needs to enable support for
+annotations as well. The following configures both QEMU and Cloud Hypervisor
+`configuration.toml` files that are currently available with Kata Container 
+versions 2.0 and higher.
+
+```bash
+$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
+$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
 ```

-Test with CPU acceleration
+Export the OpenSSL image from Docker and import into containerd.
+
+```bash
+$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
+$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
+```
+
+The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/master/cmd/qat_plugin/README.md)
+needs to be started so that the virtual functions can be discovered and
+used by Kubernetes. 
+
+The following YAML file can be used to start a Kata container with Intel® QAT
+support. If Kata is installed with `kata-deploy`, then the containerd 
+`configuration.toml` should have all of the Kata runtime classes already 
+populated and annotations supported. To use a Intel® QAT virtual function, the 
+Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as 
+described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot). 
+Edit the following to point to the correct Kata kernel and rootfs location 
+built with Intel® QAT support.
+
+```bash
+$ cat << EOF > kata-openssl-qat.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kata-openssl-qat
+  labels:
+    app: kata-openssl-qat
+  annotations:
+    io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
+    io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
+    io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
+spec:
+  runtimeClassName: kata-qemu
+  containers:
+  - name: kata-openssl-qat
+    image: docker.io/library/openssl-qat-engine:latest
+    imagePullPolicy: IfNotPresent
+    resources:
+      limits:
+        qat.intel.com/generic: 1
+        cpu: 1
+    securityContext:
+      capabilities:
+        add: ["IPC_LOCK", "SYS_ADMIN"]
+    volumeMounts:
+      - mountPath: /etc/c6xxvf_dev0.conf
+        name: etc-mount
+      - mountPath: /dev
+        name: dev-mount
+  volumes:
+    - name: dev-mount
+      hostPath:
+        path: /dev
+    - name: etc-mount
+      hostPath:
+        path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
+EOF
+```
+
+Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is 
+working with the Intel® QAT engine.
+```bash
+$ kubectl apply -f kata-openssl-qat.yaml
+```

 ```sh
-bash-5.0# openssl speed -elapsed rsa2048
+$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
+Restarting all devices.
+Processing /etc/c6xxvf_dev0.conf
+
+$ kubectl exec -it kata-openssl-qat -- adf_ctl status
+Checking status of all devices.
+There is 1 QAT acceleration device(s) in the system:
+ qat_dev0 - type: c6xxvf,  inst_id: 0,  node_id: 0,  bsf: 0000:01:01.0,  #accel: 1 #engines: 1 state: up
+
+$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
+(qat-hw) Reference implementation of QAT crypto engine v0.6.1
+ [RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
+     [ available ]
 ```

 ### Troubleshooting
@@ -412,9 +577,9 @@ c6xxvf_dev10.conf  c6xxvf_dev13.conf  c6xxvf_dev2.conf   c6xxvf_dev5.conf c6xxvf
 ```

 * Check `dmesg` inside the container to see if there are any issues with the 
-Intel QAT driver.
+Intel® QAT driver.

-* If there are issues building the OpenSSL Intel QAT container image, then 
+* If there are issues building the OpenSSL Intel® QAT container image, then 
 check to make sure that runc is the default runtime for building container.

 ```sh
@@ -425,17 +590,18 @@ Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"

 ## Optional Scripts

-### Verify Intel QAT card counters are incremented
+### Verify Intel® QAT card counters are incremented

-Use the `lspci` command to figure out which PCI bus the Intel QAT accelerators
-are on. The counters will increase when the accelerator is actively being
-used. To verify QAT is actively accelerating the containerized application,
-use the following instructions to check if any of the counters are
-incrementing. You will have to change the PCI device ID to match your system.
+To check the built in firmware counters, the Intel® QAT driver has to be compiled 
+and installed to the host and can't rely on the built in host driver. The 
+counters will increase when the accelerator is actively being used. To verify 
+Intel® QAT is actively accelerating the containerized application, use the 
+following instructions to check if any of the counters increment. Make 
+sure to change the PCI Device ID to match whats in the system.

-```sh
+```bash
 $ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
-```
+```
--- a/docs/use-cases/zun_kata.md
+++ b/docs/use-cases/zun_kata.md
@@ -10,9 +10,6 @@ Currently, the instructions are based on the following links:

 - https://docs.openstack.org/zun/latest/admin/clear-containers.html

- ../install/ubuntu-installation-guide.md
-
-
 ## Install Git to use with DevStack

 ```sh
@@ -54,7 +51,7 @@ $ zun delete test

 ## Install Kata Containers

-Follow [these instructions](../install/ubuntu-installation-guide.md)
+Follow [these instructions](../install/README.md)
 to install the Kata Containers components.

 ## Update Docker with new Kata Containers runtime
--- a/pkg/logging/src/lib.rs
+++ b/pkg/logging/src/lib.rs
@@ -21,7 +21,12 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[
 ];

 // XXX: 'writer' param used to make testing possible.
-pub fn create_logger<W>(name: &str, source: &str, level: slog::Level, writer: W) -> slog::Logger
+pub fn create_logger<W>(
+    name: &str,
+    source: &str,
+    level: slog::Level,
+    writer: W,
+) -> (slog::Logger, slog_async::AsyncGuard)
 where
    W: Write + Send + Sync + 'static,
 {
@@ -37,17 +42,21 @@ where
    let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();

    // Ensure the logger is thread-safe
-    let async_drain = slog_async::Async::new(filter_drain).build().fuse();
+    let (async_drain, guard) = slog_async::Async::new(filter_drain)
+        .thread_name("slog-async-logger".into())
+        .build_with_guard();

    // Add some "standard" fields
-    slog::Logger::root(
+    let logger = slog::Logger::root(
        async_drain.fuse(),
        o!("version" => env!("CARGO_PKG_VERSION"),
            "subsystem" => "root",
            "pid" => process::id().to_string(),
            "name" => name.to_string(),
            "source" => source.to_string()),
-    )
+    );
+
+    (logger, guard)
 }

 pub fn get_log_levels() -> Vec<&'static str> {
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -69,7 +69,7 @@ parts:
      tar -xf ${tarfile} --strip-components=1

  image:
-    after: [godeps]
+    after: [godeps, qemu, kernel]
    plugin: nil
    build-packages:
      - docker.io
@@ -89,6 +89,8 @@ parts:
      export GOROOT=${SNAPCRAFT_STAGE}
      export PATH="${GOROOT}/bin:${PATH}"

+      http_proxy=${http_proxy:-""}
+      https_proxy=${https_proxy:-""}
      if [ -n "$http_proxy" ]; then
        echo "Setting proxy $http_proxy"
        sudo -E systemctl set-environment http_proxy=$http_proxy || true
@@ -169,7 +171,7 @@ parts:
      fi

  kernel:
-    after: [godeps, image]
+    after: [godeps]
    plugin: nil
    build-packages:
      - libelf-dev
@@ -183,8 +185,8 @@ parts:

      cd ${kata_dir}/tools/packaging/kernel

-      # Say 'no' to everithing, fix issues with incomplete .config files
-      yes "n" | ./build-kernel.sh setup
+      # Setup and build kernel
+      ./build-kernel.sh -d setup
      kernel_dir_prefix="kata-linux-"
      cd ${kernel_dir_prefix}*
      version=$(basename ${PWD} | sed 's|'"${kernel_dir_prefix}"'||' | cut -d- -f1)
@@ -206,7 +208,7 @@ parts:

  qemu:
    plugin: make
-    after: [godeps, runtime]
+    after: [godeps]
    build-packages:
      - gcc
      - python3
@@ -226,6 +228,7 @@ parts:
      - libffi-dev
      - libmount-dev
      - libselinux1-dev
+      - ninja-build
    override-build: |
      yq=${SNAPCRAFT_STAGE}/yq
      export GOPATH=${SNAPCRAFT_STAGE}/gopath
@@ -242,10 +245,11 @@ parts:
        ;;

        *)
-          branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.tag)"
+          branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)"
          url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)"
          commit=""
          patches_dir="${kata_dir}/tools/packaging/qemu/patches/$(echo ${branch} | sed -e 's/.[[:digit:]]*$//' -e 's/^v//').x"
+          patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}"
        ;;
      esac

@@ -258,31 +262,23 @@ parts:
      [ -n "$(ls -A ui/keycodemapdb)" ] || git clone https://github.com/qemu/keycodemapdb ui/keycodemapdb/
      [ -n "$(ls -A capstone)" ] || git clone https://github.com/qemu/capstone capstone

-      # Apply patches
-      for patch in ${patches_dir}/*.patch; do
-        echo "Applying $(basename "$patch") ..."
-        patch \
-          --batch \
-          --forward \
-          --strip 1 \
-          --input "$patch"
-      done
+      # Apply branch patches
+      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_dir}"
+      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"

      # Only x86_64 supports libpmem
      [ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev libseccomp-dev

      configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
      chmod +x ${configure_hypervisor}
-      # static build
-      echo "$(${configure_hypervisor} -s qemu) \
-        --disable-rbd
-        --prefix=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr \
-        --datadir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share \
-        --libexecdir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/libexec/qemu" \
+      # static build. The --prefix, --libdir, --libexecdir, --datadir arguments are
+      # based on PREFIX and set by configure-hypervisor.sh
+      echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \
+        --disable-rbd " \
        | xargs ./configure

      # Copy QEMU configurations (Kconfigs)
-      cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/
+      cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/

      # build and install
      make -j $(($(nproc)-1))
@@ -293,7 +289,6 @@ parts:
      - -usr/bin/qemu-pr-helper
      - -usr/bin/virtfs-proxy-helper
      - -usr/include/
-      - -usr/libexec/
      - -usr/share/applications/
      - -usr/share/icons/
      - -usr/var/
--- a/src/agent/.gitignore
+++ b/src/agent/.gitignore
@@ -0,0 +1 @@
+tarpaulin-report.html
--- a/src/agent/.rustfmt.toml
+++ b/src/agent/.rustfmt.toml
@@ -0,0 +1 @@
+edition = "2018"
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -9,19 +9,29 @@ oci = { path = "oci" }
 logging = { path = "../../pkg/logging" }
 rustjail = { path = "rustjail" }
 protocols = { path = "protocols" }
-netlink = { path = "netlink", features = ["with-log", "with-agent-handler"] }
 lazy_static = "1.3.0"
-ttrpc = "0.3.0"
+ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
 protobuf = "=2.14.0"
 libc = "0.2.58"
 nix = "0.17.0"
 prctl = "1.0.0"
 serde_json = "1.0.39"
-signal-hook = "0.1.9"
 scan_fmt = "0.2.3"
 scopeguard = "1.0.0"
 regex = "1"

+async-trait = "0.1.42"
+tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
+futures = "0.3.12"
+netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
+tokio-vsock = "0.3.1"
+# Because the author has no time to maintain the crate, we switch the dependency to github,
+# Once the new version released on crates.io, we switch it back.
+# https://github.com/little-dude/netlink/issues/161
+rtnetlink = { git = "https://github.com/little-dude/netlink", rev = "a9367bc4700496ddebc088110c28f40962923326" }
+netlink-packet-utils = "0.4.0"
+ipnetwork = "0.17.0"
+
 # slog:
 # - Dynamic keys required to allow HashMap keys to be slog::Serialized.
 # - The 'max_*' features allow changing the log level at runtime
@@ -38,11 +48,10 @@ tempfile = "3.1.0"
 prometheus = { version = "0.9.0", features = ["process"] }
 procfs = "0.7.9"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.0" }
+cgroups = { package = "cgroups-rs", version = "0.2.5" }

 [workspace]
 members = [
-    "netlink",
    "oci",
    "protocols",
    "rustjail",
--- a/src/agent/Makefile
+++ b/src/agent/Makefile
@@ -3,6 +3,11 @@
 # SPDX-License-Identifier: Apache-2.0
 #

+# To show variables or targets help on `make help`
+# Use the following format:
+# '##VAR VARIABLE_NAME: help about variable'
+# '##TARGET TARGET_NAME: help about target'
+
 PROJECT_NAME = Kata Containers
 PROJECT_URL = https://github.com/kata-containers
 PROJECT_COMPONENT = kata-agent
@@ -16,16 +21,18 @@ SOURCES := \
 VERSION_FILE := ./VERSION
 VERSION := $(shell grep -v ^\# $(VERSION_FILE))
 COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true)
-COMMIT_NO_SHORT := $(shell git rev-parse --short HEAD 2>/dev/null || true)
 COMMIT := $(if $(shell git status --porcelain --untracked-files=no 2>/dev/null || true),${COMMIT_NO}-dirty,${COMMIT_NO})
 COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)

 # Exported to allow cargo to see it
 export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION))

+##VAR BUILD_TYPE=release|debug type of rust build
 BUILD_TYPE = release

+##VAR ARCH=arch target to build (format: uname -m)
 ARCH = $(shell uname -m)
+##VAR LIBC=musl|gnu
 LIBC ?= musl
 ifneq ($(LIBC),musl)
    ifeq ($(LIBC),gnu)
@@ -41,6 +48,11 @@ ifeq ($(ARCH), ppc64le)
    $(warning "WARNING: powerpc64le-unknown-linux-musl target is unavailable")
 endif

+ifeq ($(ARCH), s390x)
+    override LIBC = gnu
+    $(warning "WARNING: s390x-unknown-linux-musl target is unavailable")
+endif
+

 EXTRA_RUSTFLAGS :=
 ifeq ($(ARCH), aarch64)
@@ -52,10 +64,12 @@ TRIPLE = $(ARCH)-unknown-linux-$(LIBC)

 TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)

+##VAR DESTDIR=<path> is a directory prepended to each installed target file
 DESTDIR :=
+##VAR BINDIR=<path> is a directory for installing executable programs
 BINDIR := /usr/bin

-# Define if agent will be installed as init
+##VAR INIT=yes|no define if agent will be installed as init
 INIT := no

 # Path to systemd unit directory if installed as not init.
@@ -103,6 +117,7 @@ define INSTALL_FILE
 	install -D -m 644 $1 $(DESTDIR)$2/$1 || exit 1;
 endef

+##TARGET default: build code
 default: $(TARGET) show-header

 $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
@@ -110,42 +125,55 @@ $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
 $(TARGET_PATH): $(SOURCES) | show-summary
 	@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)

+$(GENERATED_FILES): %: %.in
+	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
+
+##TARGET optimize: optimized  build
 optimize: $(SOURCES) | show-summary show-header
 	@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)

-show-header:
-	@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"

+##TARGET clippy: run clippy linter
 clippy: $(GENERATED_CODE)
 	cargo clippy --all-targets --all-features --release \
 		-- \
 		-Aclippy::redundant_allocation \
 		-D warnings

-$(GENERATED_FILES): %: %.in
-	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
+format:
+	cargo fmt -- --check

-install: build-service
+
+##TARGET install: install agent
+install: install-services
 	@install -D $(TARGET_PATH) $(DESTDIR)/$(BINDIR)/$(TARGET)

+##TARGET clean: clean build
 clean:
 	@cargo clean
 	@rm -f $(GENERATED_FILES)
+	@rm -f tarpaulin-report.html

+#TARGET test: run cargo tests
 test:
 	@cargo test --all --target $(TRIPLE)

-check: test
+##TARGET check: run test
+check: clippy format

+##TARGET run: build and run agent
 run:
 	@cargo run --target $(TRIPLE)

-build-service: $(GENERATED_FILES)
+install-services: $(GENERATED_FILES)
 ifeq ($(INIT),no)
 	@echo "Installing systemd unit files..."
 	$(foreach f,$(UNIT_FILES),$(call INSTALL_FILE,$f,$(UNIT_DIR)))
 endif

+show-header:
+	@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"
+
 show-summary: show-header
 	@printf "project:\n"
 	@printf "  name: $(PROJECT_NAME)\n"
@@ -161,7 +189,35 @@ show-summary: show-header
 	@printf "  %s\n" "$(call get_toolchain_version)"
 	@printf "\n"

-help: show-summary
+## help: Show help comments that start with `##VAR` and `##TARGET`
+help: Makefile show-summary
+	@echo "==========================Help============================="
+	@echo "Variables:"
+	@sed -n 's/^##VAR//p' $< | sort
+	@echo ""
+	@echo "Targets:"
+	@sed -n 's/^##TARGET//p' $< | sort
+
+TARPAULIN_ARGS:=-v --workspace
+install-tarpaulin:
+	cargo install cargo-tarpaulin
+
+# Check if cargo tarpaulin is installed
+HAS_TARPAULIN:= $(shell cargo --list | grep tarpaulin 2>/dev/null)
+check_tarpaulin:
+ifndef  HAS_TARPAULIN
+	$(error "tarpaulin is not available please: run make install-tarpaulin ")
+else
+	$(info OK: tarpaulin installed)
+endif
+
+##TARGET codecov: Generate code coverage report
+codecov: check_tarpaulin
+	cargo tarpaulin $(TARPAULIN_ARGS)
+
+##TARGET codecov-html: Generate code coverage html report
+codecov-html: check_tarpaulin
+	cargo tarpaulin $(TARPAULIN_ARGS) -o Html

 .PHONY: \
 	help \
@@ -169,5 +225,6 @@ help: show-summary
 	show-summary \
 	optimize

+##TARGET generate-protocols: generate/update grpc agent protocols
 generate-protocols:
 	protocols/hack/update-generated-proto.sh all
--- a/src/agent/README.md
+++ b/src/agent/README.md
@@ -49,6 +49,11 @@ $ rustup target add "${arch}-unknown-linux-musl"
 $ sudo ln -s /usr/bin/g++ /bin/musl-g++
 ```

+ppc64le-only: Manually install `protoc`, e.g.
+```bash
+$ sudo dnf install protobuf-compiler
+```
+
 Download the source files in the Kata containers repository and build the agent:
 ```bash
 $ GOPATH="${GOPATH:-$HOME/go}"
--- a/src/agent/VERSION
+++ b/src/agent/VERSION
@@ -1 +0,0 @@
-2.0.0
--- a/src/agent/VERSION
+++ b/src/agent/VERSION
@@ -0,0 +1 @@
+../../VERSION
--- a/src/agent/kata-agent.service.in
+++ b/src/agent/kata-agent.service.in
@@ -15,7 +15,7 @@ Wants=kata-containers.target
 StandardOutput=tty
 Type=simple
 ExecStart=@BINDIR@/@AGENT_NAME@
-LimitNOFILE=infinity
+LimitNOFILE=1048576
 # ExecStop is required for static agent tracing; in all other scenarios
 # the runtime handles shutting down the VM.
 ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff
--- a/src/agent/netlink/Cargo.toml
+++ b/src/agent/netlink/Cargo.toml
@@ -1,20 +0,0 @@
-[package]
-name = "netlink"
-version = "0.1.0"
-authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-libc = "0.2.58"
-nix = "0.17.0"
-
-protobuf = { version = "=2.14.0", optional = true }
-protocols = { path = "../protocols", optional = true }
-slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"], optional = true }
-slog-scope = { version = "4.1.2", optional = true }
-
-[features]
-with-log = ["slog", "slog-scope"]
-with-agent-handler = ["protobuf", "protocols"]
--- a/src/agent/netlink/src/agent_handler.rs
+++ b/src/agent/netlink/src/agent_handler.rs
@@ -1,572 +0,0 @@
-// Copyright (c) 2020 Ant Financial
-// Copyright (C) 2020 Alibaba Cloud. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-//! Dedicated Netlink interfaces for Kata agent protocol handler.
-
-use std::convert::TryFrom;
-
-use protobuf::RepeatedField;
-use protocols::types::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
-
-use super::*;
-
-#[cfg(feature = "with-log")]
-// Convenience macro to obtain the scope logger
-macro_rules! sl {
-    () => {
-        slog_scope::logger().new(o!("subsystem" => "netlink"))
-    };
-}
-
-impl super::RtnlHandle {
-    pub fn update_interface(&mut self, iface: &Interface) -> Result<Interface> {
-        // the reliable way to find link is using hardware address
-        // as filter. However, hardware filter might not be supported
-        // by netlink, we may have to dump link list and the find the
-        // target link. filter using name or family is supported, but
-        // we cannot use that to find target link.
-        // let's try if hardware address filter works. -_-
-
-        let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
-
-        // bring down interface if it is up
-        if ifinfo.ifi_flags & libc::IFF_UP as u32 != 0 {
-            self.set_link_status(&ifinfo, false)?;
-        }
-
-        // delete all addresses associated with the link
-        let del_addrs: Vec<RtIPAddr> = self.get_link_addresses(&ifinfo)?;
-        self.delete_all_addrs(&ifinfo, del_addrs.as_ref())?;
-
-        // add new ip addresses in request
-        for grpc_addr in &iface.IPAddresses {
-            let rtip = RtIPAddr::try_from(grpc_addr.clone())?;
-            self.add_one_address(&ifinfo, &rtip)?;
-        }
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
-
-        // set name, set mtu, IFF_NOARP. in one rtnl_talk.
-        nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>() as u32) as __u32;
-        nlh.nlmsg_type = RTM_NEWLINK;
-        nlh.nlmsg_flags = NLM_F_REQUEST;
-        self.assign_seqnum(nlh);
-
-        ifi.ifi_family = ifinfo.ifi_family;
-        ifi.ifi_type = ifinfo.ifi_type;
-        ifi.ifi_index = ifinfo.ifi_index;
-        if iface.raw_flags & libc::IFF_NOARP as u32 != 0 {
-            ifi.ifi_change |= libc::IFF_NOARP as u32;
-            ifi.ifi_flags |= libc::IFF_NOARP as u32;
-        }
-
-        // Safe because we have allocated enough buffer space.
-        unsafe {
-            nlh.addattr32(IFLA_MTU, iface.mtu as u32);
-
-            // if str is null terminated, use addattr_var.
-            // otherwise, use addattr_str
-            nlh.addattr_var(IFLA_IFNAME, iface.name.as_ref());
-        }
-
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        // TODO: why the result is ignored here?
-        let _ = self.set_link_status(&ifinfo, true);
-
-        Ok(iface.clone())
-    }
-
-    /// Delete this interface/link per request
-    pub fn remove_interface(&mut self, iface: &Interface) -> Result<Interface> {
-        let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
-
-        self.set_link_status(&ifinfo, false)?;
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
-
-        // No attributes needed?
-        nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>()) as __u32;
-        nlh.nlmsg_type = RTM_DELLINK;
-        nlh.nlmsg_flags = NLM_F_REQUEST;
-        self.assign_seqnum(nlh);
-
-        ifi.ifi_family = ifinfo.ifi_family;
-        ifi.ifi_index = ifinfo.ifi_index;
-        ifi.ifi_type = ifinfo.ifi_type;
-
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        Ok(iface.clone())
-    }
-
-    pub fn list_interfaces(&mut self) -> Result<Vec<Interface>> {
-        let mut ifaces: Vec<Interface> = Vec::new();
-        let (_slv, lv) = self.dump_all_links()?;
-        let (_sav, av) = self.dump_all_addresses(0)?;
-
-        for link in &lv {
-            // Safe because dump_all_links() returns valid pointers.
-            let nlh = unsafe { &**link };
-            if nlh.nlmsg_type != RTM_NEWLINK && nlh.nlmsg_type != RTM_DELLINK {
-                continue;
-            }
-
-            if nlh.nlmsg_len < NLMSG_SPACE!(mem::size_of::<ifinfomsg>()) {
-                info!(
-                    sl!(),
-                    "invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}",
-                    nlh.nlmsg_len,
-                    NLMSG_SPACE!(mem::size_of::<ifinfomsg>())
-                );
-                break;
-            }
-
-            // Safe because we have just validated available buffer space above.
-            let ifi = unsafe { &*(NLMSG_DATA!(nlh) as *const ifinfomsg) };
-            let rta: *mut rtattr = IFLA_RTA!(ifi as *const ifinfomsg) as *mut rtattr;
-            let rtalen = IFLA_PAYLOAD!(nlh) as u32;
-            let attrs = unsafe { parse_attrs(rta, rtalen, (IFLA_MAX + 1) as usize)? };
-
-            // fill out some fields of Interface,
-            let mut iface: Interface = Interface::default();
-
-            // Safe because parse_attrs() returns valid pointers.
-            unsafe {
-                if !attrs[IFLA_IFNAME as usize].is_null() {
-                    let t = attrs[IFLA_IFNAME as usize];
-                    iface.name = String::from_utf8(getattr_var(t as *const rtattr))?;
-                }
-
-                if !attrs[IFLA_MTU as usize].is_null() {
-                    let t = attrs[IFLA_MTU as usize];
-                    iface.mtu = getattr32(t) as u64;
-                }
-
-                if !attrs[IFLA_ADDRESS as usize].is_null() {
-                    let alen = RTA_PAYLOAD!(attrs[IFLA_ADDRESS as usize]);
-                    let a: *const u8 = RTA_DATA!(attrs[IFLA_ADDRESS as usize]) as *const u8;
-                    iface.hwAddr = parser::format_address(a, alen as u32)?;
-                }
-            }
-
-            // get ip address info from av
-            let mut ads: Vec<IPAddress> = Vec::new();
-            for address in &av {
-                // Safe because dump_all_addresses() returns valid pointers.
-                let alh = unsafe { &**address };
-                if alh.nlmsg_type != RTM_NEWADDR {
-                    continue;
-                }
-
-                let tlen = NLMSG_SPACE!(mem::size_of::<ifaddrmsg>());
-                if alh.nlmsg_len < tlen {
-                    info!(
-                        sl!(),
-                        "invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}", alh.nlmsg_len, tlen
-                    );
-                    break;
-                }
-
-                // Safe becahse we have checked avialable buffer space by NLMSG_SPACE above.
-                let ifa = unsafe { &*(NLMSG_DATA!(alh) as *const ifaddrmsg) };
-                let arta: *mut rtattr = IFA_RTA!(ifa) as *mut rtattr;
-                let artalen = IFA_PAYLOAD!(alh) as u32;
-
-                if ifa.ifa_index as u32 == ifi.ifi_index as u32 {
-                    // found target addresses, parse attributes and fill out Interface
-                    let addrs = unsafe { parse_attrs(arta, artalen, (IFA_MAX + 1) as usize)? };
-
-                    // fill address field of Interface
-                    let mut one: IPAddress = IPAddress::default();
-                    let tattr: *const rtattr = if !addrs[IFA_ADDRESS as usize].is_null() {
-                        addrs[IFA_ADDRESS as usize]
-                    } else {
-                        addrs[IFA_LOCAL as usize]
-                    };
-
-                    one.mask = format!("{}", ifa.ifa_prefixlen);
-                    one.family = IPFamily::v4;
-                    if ifa.ifa_family == libc::AF_INET6 as u8 {
-                        one.family = IPFamily::v6;
-                    }
-
-                    // Safe because parse_attrs() returns valid pointers.
-                    unsafe {
-                        let a: *const u8 = RTA_DATA!(tattr) as *const u8;
-                        let alen = RTA_PAYLOAD!(tattr);
-                        one.address = parser::format_address(a, alen as u32)?;
-                    }
-
-                    ads.push(one);
-                }
-            }
-
-            iface.IPAddresses = RepeatedField::from_vec(ads);
-            ifaces.push(iface);
-        }
-
-        Ok(ifaces)
-    }
-
-    pub fn update_routes(&mut self, rt: &[Route]) -> Result<Vec<Route>> {
-        let rs = self.get_all_routes()?;
-        self.delete_all_routes(&rs)?;
-
-        for grpcroute in rt {
-            if grpcroute.gateway.as_str() == "" {
-                let r = RtRoute::try_from(grpcroute.clone())?;
-                if r.index == -1 {
-                    continue;
-                }
-                self.add_one_route(&r)?;
-            }
-        }
-
-        for grpcroute in rt {
-            if grpcroute.gateway.as_str() != "" {
-                let r = RtRoute::try_from(grpcroute.clone())?;
-                if r.index == -1 {
-                    continue;
-                }
-                self.add_one_route(&r)?;
-            }
-        }
-
-        Ok(rt.to_owned())
-    }
-
-    pub fn list_routes(&mut self) -> Result<Vec<Route>> {
-        // currently, only dump routes from main table for ipv4
-        // ie, rtmsg.rtmsg_family = AF_INET, set RT_TABLE_MAIN
-        // attribute in dump request
-        // Fix Me: think about othe tables, ipv6..
-        let mut rs: Vec<Route> = Vec::new();
-        let (_srv, rv) = self.dump_all_routes()?;
-
-        // parse out routes and store in rs
-        for r in &rv {
-            // Safe because dump_all_routes() returns valid pointers.
-            let nlh = unsafe { &**r };
-            if nlh.nlmsg_type != RTM_NEWROUTE && nlh.nlmsg_type != RTM_DELROUTE {
-                info!(sl!(), "not route message!");
-                continue;
-            }
-            let tlen = NLMSG_SPACE!(mem::size_of::<rtmsg>());
-            if nlh.nlmsg_len < tlen {
-                info!(
-                    sl!(),
-                    "invalid nlmsg! nlmsg_len: {}, nlmsg_spae: {}", nlh.nlmsg_len, tlen
-                );
-                break;
-            }
-
-            // Safe because we have just validated available buffer space above.
-            let rtm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut rtmsg) };
-            if rtm.rtm_table != RT_TABLE_MAIN as u8 {
-                continue;
-            }
-            let rta: *mut rtattr = RTM_RTA!(rtm) as *mut rtattr;
-            let rtalen = RTM_PAYLOAD!(nlh) as u32;
-            let attrs = unsafe { parse_attrs(rta, rtalen, (RTA_MAX + 1) as usize)? };
-
-            let t = attrs[RTA_TABLE as usize];
-            if !t.is_null() {
-                // Safe because parse_attrs() returns valid pointers
-                let table = unsafe { getattr32(t) };
-                if table != RT_TABLE_MAIN {
-                    continue;
-                }
-            }
-
-            // find source, destination, gateway, scope, and and device name
-            let mut t = attrs[RTA_DST as usize];
-            let mut rte: Route = Route::default();
-
-            // Safe because parse_attrs() returns valid pointers
-            unsafe {
-                // destination
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.dest =
-                        format!("{}/{}", parser::format_address(data, len)?, rtm.rtm_dst_len);
-                }
-
-                // gateway
-                t = attrs[RTA_GATEWAY as usize];
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.gateway = parser::format_address(data, len)?;
-
-                    // for gateway, destination is 0.0.0.0
-                    rte.dest = "0.0.0.0".to_string();
-                }
-
-                // source
-                t = attrs[RTA_SRC as usize];
-                if t.is_null() {
-                    t = attrs[RTA_PREFSRC as usize];
-                }
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.source = parser::format_address(data, len)?;
-
-                    if rtm.rtm_src_len != 0 {
-                        rte.source = format!("{}/{}", rte.source.as_str(), rtm.rtm_src_len);
-                    }
-                }
-
-                // scope
-                rte.scope = rtm.rtm_scope as u32;
-
-                // oif
-                t = attrs[RTA_OIF as usize];
-                if !t.is_null() {
-                    let data = &*(RTA_DATA!(t) as *const i32);
-                    assert_eq!(RTA_PAYLOAD!(t), 4);
-
-                    rte.device = self
-                        .get_name_by_index(*data)
-                        .unwrap_or_else(|_| "unknown".to_string());
-                }
-            }
-
-            rs.push(rte);
-        }
-
-        Ok(rs)
-    }
-
-    pub fn add_arp_neighbors(&mut self, neighs: &[ARPNeighbor]) -> Result<()> {
-        for neigh in neighs {
-            self.add_one_arp_neighbor(&neigh)?;
-        }
-
-        Ok(())
-    }
-
-    pub fn add_one_arp_neighbor(&mut self, neigh: &ARPNeighbor) -> Result<()> {
-        let to_ip = match neigh.toIPAddress.as_ref() {
-            None => return nix_errno(Errno::EINVAL),
-            Some(v) => {
-                if v.address.is_empty() {
-                    return nix_errno(Errno::EINVAL);
-                }
-                v.address.as_ref()
-            }
-        };
-
-        let dev = self.find_link_by_name(&neigh.device)?;
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ndm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ndmsg) };
-
-        nlh.nlmsg_len = NLMSG_LENGTH!(std::mem::size_of::<ndmsg>()) as u32;
-        nlh.nlmsg_type = RTM_NEWNEIGH;
-        nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-        self.assign_seqnum(nlh);
-
-        ndm.ndm_family = libc::AF_UNSPEC as __u8;
-        ndm.ndm_state = IFA_F_PERMANENT as __u16;
-        // process lladdr
-        if neigh.lladdr != "" {
-            let llabuf = parser::parse_mac_addr(&neigh.lladdr)?;
-
-            // Safe because we have allocated enough buffer space.
-            unsafe { nlh.addattr_var(NDA_LLADDR, llabuf.as_ref()) };
-        }
-
-        let (family, ip_data) = parser::parse_ip_addr_with_family(&to_ip)?;
-        ndm.ndm_family = family;
-        // Safe because we have allocated enough buffer space.
-        unsafe { nlh.addattr_var(NDA_DST, ip_data.as_ref()) };
-
-        // process state
-        if neigh.state != 0 {
-            ndm.ndm_state = neigh.state as __u16;
-        }
-
-        // process flags
-        ndm.ndm_flags = (*ndm).ndm_flags | neigh.flags as __u8;
-
-        // process dev
-        ndm.ndm_ifindex = dev.ifi_index;
-
-        // send
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        Ok(())
-    }
-}
-
-impl TryFrom<IPAddress> for RtIPAddr {
-    type Error = nix::Error;
-
-    fn try_from(ipi: IPAddress) -> std::result::Result<Self, Self::Error> {
-        let ip_family = if ipi.family == IPFamily::v4 {
-            libc::AF_INET
-        } else {
-            libc::AF_INET6
-        } as __u8;
-
-        let ip_mask = parser::parse_u8(ipi.mask.as_str(), 10)?;
-        let addr = parser::parse_ip_addr(ipi.address.as_ref())?;
-
-        Ok(Self {
-            ip_family,
-            ip_mask,
-            addr,
-        })
-    }
-}
-
-impl TryFrom<Route> for RtRoute {
-    type Error = nix::Error;
-
-    fn try_from(r: Route) -> std::result::Result<Self, Self::Error> {
-        // only handle ipv4
-
-        let index = {
-            let mut rh = RtnlHandle::new(NETLINK_ROUTE, 0)?;
-            match rh.find_link_by_name(r.device.as_str()) {
-                Ok(ifi) => ifi.ifi_index,
-                Err(_) => -1,
-            }
-        };
-
-        let (dest, dst_len) = if r.dest.is_empty() {
-            (Some(vec![0 as u8; 4]), 0)
-        } else {
-            let (dst, mask) = parser::parse_cidr(r.dest.as_str())?;
-            (Some(dst), mask)
-        };
-
-        let (source, src_len) = if r.source.is_empty() {
-            (None, 0)
-        } else {
-            let (src, mask) = parser::parse_cidr(r.source.as_str())?;
-            (Some(src), mask)
-        };
-
-        let gateway = if r.gateway.is_empty() {
-            None
-        } else {
-            Some(parser::parse_ip_addr(r.gateway.as_str())?)
-        };
-
-        Ok(Self {
-            dest,
-            source,
-            src_len,
-            dst_len,
-            index,
-            gateway,
-            scope: r.scope as u8,
-            protocol: RTPROTO_UNSPEC,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{RtnlHandle, NETLINK_ROUTE};
-    use protocols::types::IPAddress;
-    use std::process::Command;
-
-    fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
-        // ip link delete dummy
-        Command::new("ip")
-            .args(&["link", "delete", dummy_name])
-            .output()
-            .expect("prepare: failed to delete dummy");
-
-        // ip neigh del dev dummy ip
-        Command::new("ip")
-            .args(&["neigh", "del", dummy_name, ip])
-            .output()
-            .expect("prepare: failed to delete neigh");
-    }
-
-    fn prepare_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
-        clean_env_for_test_add_one_arp_neighbor(dummy_name, ip);
-        // modprobe dummy
-        Command::new("modprobe")
-            .arg("dummy")
-            .output()
-            .expect("failed to run modprobe dummy");
-
-        // ip link add dummy type dummy
-        Command::new("ip")
-            .args(&["link", "add", dummy_name, "type", "dummy"])
-            .output()
-            .expect("failed to add dummy interface");
-
-        // ip addr add 192.168.0.2/16 dev dummy
-        Command::new("ip")
-            .args(&["addr", "add", "192.168.0.2/16", "dev", dummy_name])
-            .output()
-            .expect("failed to add ip for dummy");
-
-        // ip link set dummy up;
-        Command::new("ip")
-            .args(&["link", "set", dummy_name, "up"])
-            .output()
-            .expect("failed to up dummy");
-    }
-
-    #[test]
-    fn test_add_one_arp_neighbor() {
-        // skip_if_not_root
-        if !nix::unistd::Uid::effective().is_root() {
-            println!("INFO: skipping {} which needs root", module_path!());
-            return;
-        }
-
-        let mac = "6a:92:3a:59:70:aa";
-        let to_ip = "169.254.1.1";
-        let dummy_name = "dummy_for_arp";
-
-        prepare_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
-
-        let mut ip_address = IPAddress::new();
-        ip_address.set_address(to_ip.to_string());
-
-        let mut neigh = ARPNeighbor::new();
-        neigh.set_toIPAddress(ip_address);
-        neigh.set_device(dummy_name.to_string());
-        neigh.set_lladdr(mac.to_string());
-        neigh.set_state(0x80);
-
-        let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
-
-        rtnl.add_one_arp_neighbor(&neigh).unwrap();
-
-        // ip neigh show dev dummy ip
-        let stdout = Command::new("ip")
-            .args(&["neigh", "show", "dev", dummy_name, to_ip])
-            .output()
-            .expect("failed to show neigh")
-            .stdout;
-
-        let stdout = std::str::from_utf8(&stdout).expect("failed to conveert stdout");
-
-        assert_eq!(stdout, format!("{} lladdr {} PERMANENT\n", to_ip, mac));
-
-        clean_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
-    }
-}
--- a/src/agent/netlink/src/lib.rs
+++ b/src/agent/netlink/src/lib.rs
--- a/src/agent/netlink/src/parser.rs
+++ b/src/agent/netlink/src/parser.rs
@@ -1,201 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-
-//! Parser for IPv4/IPv6/MAC addresses.
-
-use std::net::{Ipv4Addr, Ipv6Addr};
-use std::str::FromStr;
-
-use super::{Errno, Result, __u8, nix_errno};
-
-#[inline]
-pub(crate) fn parse_u8(s: &str, radix: u32) -> Result<u8> {
-    if radix >= 2 && radix <= 36 {
-        u8::from_str_radix(s, radix).map_err(|_| nix::Error::Sys(Errno::EINVAL))
-    } else {
-        u8::from_str(s).map_err(|_| nix::Error::Sys(Errno::EINVAL))
-    }
-}
-
-pub fn parse_ipv4_addr(s: &str) -> Result<Vec<u8>> {
-    match Ipv4Addr::from_str(s) {
-        Ok(v) => Ok(Vec::from(v.octets().as_ref())),
-        Err(_e) => nix_errno(Errno::EINVAL),
-    }
-}
-
-pub fn parse_ip_addr(s: &str) -> Result<Vec<u8>> {
-    if let Ok(v6) = Ipv6Addr::from_str(s) {
-        Ok(Vec::from(v6.octets().as_ref()))
-    } else {
-        parse_ipv4_addr(s)
-    }
-}
-
-pub fn parse_ip_addr_with_family(ip_address: &str) -> Result<(__u8, Vec<u8>)> {
-    if let Ok(v6) = Ipv6Addr::from_str(ip_address) {
-        Ok((libc::AF_INET6 as __u8, Vec::from(v6.octets().as_ref())))
-    } else {
-        parse_ipv4_addr(ip_address).map(|v| (libc::AF_INET as __u8, v))
-    }
-}
-
-pub fn parse_ipv4_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
-    let fields: Vec<&str> = s.split('/').collect();
-
-    if fields.len() != 2 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok((parse_ipv4_addr(fields[0])?, parse_u8(fields[1], 10)?))
-    }
-}
-
-pub fn parse_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
-    let fields: Vec<&str> = s.split('/').collect();
-
-    if fields.len() != 2 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok((parse_ip_addr(fields[0])?, parse_u8(fields[1], 10)?))
-    }
-}
-
-pub fn parse_mac_addr(hwaddr: &str) -> Result<Vec<u8>> {
-    let fields: Vec<&str> = hwaddr.split(':').collect();
-
-    if fields.len() != 6 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok(vec![
-            parse_u8(fields[0], 16)?,
-            parse_u8(fields[1], 16)?,
-            parse_u8(fields[2], 16)?,
-            parse_u8(fields[3], 16)?,
-            parse_u8(fields[4], 16)?,
-            parse_u8(fields[5], 16)?,
-        ])
-    }
-}
-
-/// Format an IPv4/IPv6/MAC address.
-///
-/// # Safety
-/// Caller needs to ensure that addr and len are valid.
-pub unsafe fn format_address(addr: *const u8, len: u32) -> Result<String> {
-    let mut a: String;
-    if len == 4 {
-        // ipv4
-        let mut i = 1;
-        let mut p = addr as i64;
-
-        a = format!("{}", *(p as *const u8));
-        while i < len {
-            p += 1;
-            i += 1;
-            a.push_str(format!(".{}", *(p as *const u8)).as_str());
-        }
-
-        return Ok(a);
-    }
-
-    if len == 6 {
-        // hwaddr
-        let mut i = 1;
-        let mut p = addr as i64;
-
-        a = format!("{:0>2X}", *(p as *const u8));
-        while i < len {
-            p += 1;
-            i += 1;
-            a.push_str(format!(":{:0>2X}", *(p as *const u8)).as_str());
-        }
-
-        return Ok(a);
-    }
-
-    if len == 16 {
-        // ipv6
-        let p = addr as *const u8 as *const libc::c_void;
-        let mut ar: [u8; 16] = [0; 16];
-        let mut v: Vec<u8> = vec![0; 16];
-        let dp: *mut libc::c_void = v.as_mut_ptr() as *mut libc::c_void;
-        libc::memcpy(dp, p, 16);
-
-        ar.copy_from_slice(v.as_slice());
-
-        return Ok(Ipv6Addr::from(ar).to_string());
-    }
-
-    nix_errno(Errno::EINVAL)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use libc;
-
-    #[test]
-    fn test_ip_addr() {
-        let ip = parse_ipv4_addr("1.2.3.4").unwrap();
-        assert_eq!(ip, vec![0x1u8, 0x2u8, 0x3u8, 0x4u8]);
-        parse_ipv4_addr("1.2.3.4.5").unwrap_err();
-        parse_ipv4_addr("1.2.3-4").unwrap_err();
-        parse_ipv4_addr("1.2.3.a").unwrap_err();
-        parse_ipv4_addr("1.2.3.x").unwrap_err();
-        parse_ipv4_addr("-1.2.3.4").unwrap_err();
-        parse_ipv4_addr("+1.2.3.4").unwrap_err();
-
-        let (family, _) = parse_ip_addr_with_family("192.168.1.1").unwrap();
-        assert_eq!(family, libc::AF_INET as __u8);
-
-        let (family, ip) =
-            parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap();
-        assert_eq!(family, libc::AF_INET6 as __u8);
-        assert_eq!(ip.len(), 16);
-        parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:73345").unwrap_err();
-
-        let ip = parse_ip_addr("::1").unwrap();
-        assert_eq!(ip[0], 0x0);
-        assert_eq!(ip[15], 0x1);
-    }
-
-    #[test]
-    fn test_parse_cidr() {
-        let (_, mask) = parse_ipv4_cidr("1.2.3.4/31").unwrap();
-        assert_eq!(mask, 31);
-
-        parse_ipv4_cidr("1.2.3/4/31").unwrap_err();
-        parse_ipv4_cidr("1.2.3.4/f").unwrap_err();
-        parse_ipv4_cidr("1.2.3/8").unwrap_err();
-        parse_ipv4_cidr("1.2.3.4.8").unwrap_err();
-
-        let (ip, mask) = parse_cidr("2001:db8:a::123/64").unwrap();
-        assert_eq!(mask, 64);
-        assert_eq!(ip[0], 0x20);
-        assert_eq!(ip[15], 0x23);
-    }
-
-    #[test]
-    fn test_parse_mac_addr() {
-        let mac = parse_mac_addr("FF:FF:FF:FF:FF:FE").unwrap();
-        assert_eq!(mac.len(), 6);
-        assert_eq!(mac[0], 0xff);
-        assert_eq!(mac[5], 0xfe);
-
-        parse_mac_addr("FF:FF:FF:FF:FF:FE:A0").unwrap_err();
-        parse_mac_addr("FF:FF:FF:FF:FF:FX").unwrap_err();
-        parse_mac_addr("FF:FF:FF:FF:FF").unwrap_err();
-    }
-
-    #[test]
-    fn test_format_address() {
-        let buf = [1u8, 2u8, 3u8, 4u8];
-        let addr = unsafe { format_address(&buf as *const u8, 4).unwrap() };
-        assert_eq!(addr, "1.2.3.4");
-
-        let buf = [1u8, 2u8, 3u8, 4u8, 5u8, 6u8];
-        let addr = unsafe { format_address(&buf as *const u8, 6).unwrap() };
-        assert_eq!(addr, "01:02:03:04:05:06");
-    }
-}
--- a/src/agent/oci/src/lib.rs
+++ b/src/agent/oci/src/lib.rs
@@ -8,7 +8,7 @@ extern crate serde;
 extern crate serde_derive;
 extern crate serde_json;

-use libc::mode_t;
+use libc::{self, mode_t};
 use std::collections::HashMap;

 mod serialize;
@@ -27,6 +27,10 @@ where
    *d == T::default()
 }

+fn default_seccomp_errno() -> u32 {
+    libc::EPERM as u32
+}
+
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Spec {
    #[serde(
@@ -54,7 +58,7 @@ pub struct Spec {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub windows: Option<Windows<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub vm: Option<VM>,
+    pub vm: Option<Vm>,
 }

 impl Spec {
@@ -67,7 +71,7 @@ impl Spec {
    }
 }

-pub type LinuxRlimit = POSIXRlimit;
+pub type LinuxRlimit = PosixRlimit;

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Process {
@@ -89,7 +93,7 @@ pub struct Process {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub capabilities: Option<LinuxCapabilities>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub rlimits: Vec<POSIXRlimit>,
+    pub rlimits: Vec<PosixRlimit>,
    #[serde(default, rename = "noNewPrivileges")]
    pub no_new_privileges: bool,
    #[serde(
@@ -195,9 +199,9 @@ pub struct Hooks {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct Linux {
    #[serde(default, rename = "uidMappings", skip_serializing_if = "Vec::is_empty")]
-    pub uid_mappings: Vec<LinuxIDMapping>,
+    pub uid_mappings: Vec<LinuxIdMapping>,
    #[serde(default, rename = "gidMappings", skip_serializing_if = "Vec::is_empty")]
-    pub gid_mappings: Vec<LinuxIDMapping>,
+    pub gid_mappings: Vec<LinuxIdMapping>,
    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
    pub sysctl: HashMap<String, String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -257,7 +261,7 @@ pub const UTSNAMESPACE: &str = "uts";
 pub const CGROUPNAMESPACE: &str = "cgroup";

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxIDMapping {
+pub struct LinuxIdMapping {
    #[serde(default, rename = "containerID")]
    pub container_id: u32,
    #[serde(default, rename = "hostID")]
@@ -267,7 +271,7 @@ pub struct LinuxIDMapping {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct POSIXRlimit {
+pub struct PosixRlimit {
    #[serde(default)]
    pub r#type: String,
    #[serde(default)]
@@ -293,7 +297,7 @@ pub struct LinuxInterfacePriority {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxBlockIODevice {
+pub struct LinuxBlockIoDevice {
    #[serde(default)]
    pub major: i64,
    #[serde(default)]
@@ -303,7 +307,7 @@ pub struct LinuxBlockIODevice {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct LinuxWeightDevice {
    #[serde(flatten)]
-    pub blk: LinuxBlockIODevice,
+    pub blk: LinuxBlockIoDevice,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub weight: Option<u16>,
    #[serde(
@@ -317,13 +321,13 @@ pub struct LinuxWeightDevice {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
 pub struct LinuxThrottleDevice {
    #[serde(flatten)]
-    pub blk: LinuxBlockIODevice,
+    pub blk: LinuxBlockIoDevice,
    #[serde(default)]
    pub rate: u64,
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxBlockIO {
+pub struct LinuxBlockIo {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub weight: Option<u16>,
    #[serde(
@@ -387,7 +391,7 @@ pub struct LinuxMemory {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct LinuxCPU {
+pub struct LinuxCpu {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub shares: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -449,11 +453,11 @@ pub struct LinuxResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub memory: Option<LinuxMemory>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub cpu: Option<LinuxCPU>,
+    pub cpu: Option<LinuxCpu>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub pids: Option<LinuxPids>,
    #[serde(skip_serializing_if = "Option::is_none", rename = "blockIO")]
-    pub block_io: Option<LinuxBlockIO>,
+    pub block_io: Option<LinuxBlockIo>,
    #[serde(
        default,
        skip_serializing_if = "Vec::is_empty",
@@ -513,7 +517,7 @@ pub struct Solaris {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub anet: Vec<SolarisAnet>,
    #[serde(default, skip_serializing_if = "Option::is_none", rename = "cappedCPU")]
-    pub capped_cpu: Option<SolarisCappedCPU>,
+    pub capped_cpu: Option<SolarisCappedCpu>,
    #[serde(
        default,
        skip_serializing_if = "Option::is_none",
@@ -523,7 +527,7 @@ pub struct Solaris {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct SolarisCappedCPU {
+pub struct SolarisCappedCpu {
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub ncpus: String,
 }
@@ -601,7 +605,7 @@ pub struct WindowsResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub memory: Option<WindowsMemoryResources>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub cpu: Option<WindowsCPUResources>,
+    pub cpu: Option<WindowsCpuResources>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub storage: Option<WindowsStorageResources>,
 }
@@ -613,7 +617,7 @@ pub struct WindowsMemoryResources {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct WindowsCPUResources {
+pub struct WindowsCpuResources {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub count: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@@ -671,14 +675,14 @@ pub struct WindowsHyperV {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VM {
-    pub hypervisor: VMHypervisor,
-    pub kernel: VMKernel,
-    pub image: VMImage,
+pub struct Vm {
+    pub hypervisor: VmHypervisor,
+    pub kernel: VmKernel,
+    pub image: VmImage,
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMHypervisor {
+pub struct VmHypervisor {
    #[serde(default)]
    pub path: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -686,7 +690,7 @@ pub struct VMHypervisor {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMKernel {
+pub struct VmKernel {
    #[serde(default)]
    pub path: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -696,7 +700,7 @@ pub struct VMKernel {
 }

 #[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
-pub struct VMImage {
+pub struct VmImage {
    #[serde(default)]
    pub path: String,
    #[serde(default)]
@@ -710,6 +714,8 @@ pub struct LinuxSeccomp {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub architectures: Vec<Arch>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub flags: Vec<LinuxSeccompFlag>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub syscalls: Vec<LinuxSyscall>,
 }

@@ -733,14 +739,20 @@ pub const ARCHS390: &str = "SCMP_ARCH_S390";
 pub const ARCHS390X: &str = "SCMP_ARCH_S390X";
 pub const ARCHPARISC: &str = "SCMP_ARCH_PARISC";
 pub const ARCHPARISC64: &str = "SCMP_ARCH_PARISC64";
+pub const ARCHRISCV64: &str = "SCMP_ARCH_RISCV64";
+
+pub type LinuxSeccompFlag = String;

 pub type LinuxSeccompAction = String;

 pub const ACTKILL: &str = "SCMP_ACT_KILL";
+pub const ACTKILLPROCESS: &str = "SCMP_ACT_KILL_PROCESS";
+pub const ACTKILLTHREAD: &str = "SCMP_ACT_KILL_THREAD";
 pub const ACTTRAP: &str = "SCMP_ACT_TRAP";
 pub const ACTERRNO: &str = "SCMP_ACT_ERRNO";
 pub const ACTTRACE: &str = "SCMP_ACT_TRACE";
 pub const ACTALLOW: &str = "SCMP_ACT_ALLOW";
+pub const ACTLOG: &str = "SCMP_ACT_LOG";

 pub type LinuxSeccompOperator = String;

@@ -770,6 +782,8 @@ pub struct LinuxSyscall {
    pub names: Vec<String>,
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub action: LinuxSeccompAction,
+    #[serde(default = "default_seccomp_errno", rename = "errnoRet")]
+    pub errno_ret: u32,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub args: Vec<LinuxSeccompArg>,
 }
@@ -784,7 +798,17 @@ pub struct LinuxIntelRdt {
    pub l3_cache_schema: String,
 }

-#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ContainerState {
+    Creating,
+    Created,
+    Running,
+    Stopped,
+    Paused,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct State {
    #[serde(
        default,
@@ -794,8 +818,7 @@ pub struct State {
    pub version: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub id: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    pub status: String,
+    pub status: ContainerState,
    #[serde(default)]
    pub pid: i32,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -806,6 +829,8 @@ pub struct State {

 #[cfg(test)]
 mod tests {
+    use super::*;
+
    #[test]
    fn test_deserialize_state() {
        let data = r#"{
@@ -818,10 +843,10 @@ mod tests {
                "myKey": "myValue"
            }
        }"#;
-        let expected = crate::State {
+        let expected = State {
            version: "0.2.0".to_string(),
            id: "oci-container1".to_string(),
-            status: "running".to_string(),
+            status: ContainerState::Running,
            pid: 4422,
            bundle: "/containers/redis".to_string(),
            annotations: [("myKey".to_string(), "myValue".to_string())]
@@ -1246,12 +1271,12 @@ mod tests {
                    ambient: vec!["CAP_NET_BIND_SERVICE".to_string()],
                }),
                rlimits: vec![
-                    crate::POSIXRlimit {
+                    crate::PosixRlimit {
                        r#type: "RLIMIT_CORE".to_string(),
                        hard: 1024,
                        soft: 1024,
                    },
-                    crate::POSIXRlimit {
+                    crate::PosixRlimit {
                        r#type: "RLIMIT_NOFILE".to_string(),
                        hard: 1024,
                        soft: 1024,
@@ -1383,12 +1408,12 @@ mod tests {
            .cloned()
            .collect(),
            linux: Some(crate::Linux {
-                uid_mappings: vec![crate::LinuxIDMapping {
+                uid_mappings: vec![crate::LinuxIdMapping {
                    container_id: 0,
                    host_id: 1000,
                    size: 32000,
                }],
-                gid_mappings: vec![crate::LinuxIDMapping {
+                gid_mappings: vec![crate::LinuxIdMapping {
                    container_id: 0,
                    host_id: 1000,
                    size: 32000,
@@ -1433,7 +1458,7 @@ mod tests {
                        swappiness: Some(0),
                        disable_oom_killer: Some(false),
                    }),
-                    cpu: Some(crate::LinuxCPU {
+                    cpu: Some(crate::LinuxCpu {
                        shares: Some(1024),
                        quota: Some(1000000),
                        period: Some(500000),
@@ -1443,17 +1468,17 @@ mod tests {
                        mems: "0-7".to_string(),
                    }),
                    pids: Some(crate::LinuxPids { limit: 32771 }),
-                    block_io: Some(crate::LinuxBlockIO {
+                    block_io: Some(crate::LinuxBlockIo {
                        weight: Some(10),
                        leaf_weight: Some(10),
                        weight_device: vec![
                            crate::LinuxWeightDevice {
-                                blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
+                                blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
                                weight: Some(500),
                                leaf_weight: Some(300),
                            },
                            crate::LinuxWeightDevice {
-                                blk: crate::LinuxBlockIODevice {
+                                blk: crate::LinuxBlockIoDevice {
                                    major: 8,
                                    minor: 16,
                                },
@@ -1462,13 +1487,13 @@ mod tests {
                            },
                        ],
                        throttle_read_bps_device: vec![crate::LinuxThrottleDevice {
-                            blk: crate::LinuxBlockIODevice { major: 8, minor: 0 },
+                            blk: crate::LinuxBlockIoDevice { major: 8, minor: 0 },
                            rate: 600,
                        }],
                        throttle_write_bps_device: vec![],
                        throttle_read_iops_device: vec![],
                        throttle_write_iops_device: vec![crate::LinuxThrottleDevice {
-                            blk: crate::LinuxBlockIODevice {
+                            blk: crate::LinuxBlockIoDevice {
                                major: 8,
                                minor: 16,
                            },
@@ -1554,9 +1579,11 @@ mod tests {
                seccomp: Some(crate::LinuxSeccomp {
                    default_action: "SCMP_ACT_ALLOW".to_string(),
                    architectures: vec!["SCMP_ARCH_X86".to_string(), "SCMP_ARCH_X32".to_string()],
+                    flags: vec![],
                    syscalls: vec![crate::LinuxSyscall {
                        names: vec!["getcwd".to_string(), "chmod".to_string()],
                        action: "SCMP_ACT_ERRNO".to_string(),
+                        errno_ret: crate::default_seccomp_errno(),
                        args: vec![],
                    }],
                }),
--- a/src/agent/protocols/Cargo.toml
+++ b/src/agent/protocols/Cargo.toml
@@ -5,9 +5,9 @@ authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
 edition = "2018"

 [dependencies]
-ttrpc = "0.3.0"
+ttrpc = { version = "0.5.0", features = ["async"] }
+async-trait = "0.1.42"
 protobuf = "=2.14.0"
-futures = "0.1.27"

 [build-dependencies]
-ttrpc-codegen = "0.1.2"
+ttrpc-codegen = "0.2.0"
--- a/src/agent/protocols/build.rs
+++ b/src/agent/protocols/build.rs
@@ -3,8 +3,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use std::fs::File;
-use std::io::{Read, Write};
+use std::fs;
+use ttrpc_codegen::{Codegen, Customize};

 fn main() {
    let protos = vec![
@@ -15,16 +15,15 @@ fn main() {
        "protos/oci.proto",
    ];

-    // Tell Cargo that if the .proto files changed, to rerun this build script.
-    protos
-        .iter()
-        .for_each(|p| println!("cargo:rerun-if-changed={}", &p));
-
-    ttrpc_codegen::Codegen::new()
+    Codegen::new()
        .out_dir("src")
        .inputs(&protos)
        .include("protos")
        .rust_protobuf()
+        .customize(Customize {
+            async_server: true,
+            ..Default::default()
+        })
        .run()
        .expect("Gen codes failed.");

@@ -40,16 +39,6 @@ fn main() {
 }

 fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> {
-    let mut src = File::open(file_name)?;
-    let mut contents = String::new();
-    src.read_to_string(&mut contents).unwrap();
-    drop(src);
-
-    let new_contents = contents.replace(from, to);
-
-    let mut dst = File::create(&file_name)?;
-    dst.write_all(new_contents.as_bytes())?;
-
-    Ok(())
+    let new_contents = fs::read_to_string(file_name)?.replace(from, to);
+    fs::write(&file_name, new_contents.as_bytes())
 }
-
--- a/src/agent/protocols/hack/update-generated-proto.sh
+++ b/src/agent/protocols/hack/update-generated-proto.sh
@@ -65,7 +65,7 @@ $GOPATH/src/github.com/kata-containers/kata-containers/src/agent/protocols/proto
 }

 if [ "$(basename $(pwd))" != "agent" ]; then
-	die "Please go to directory of protocols before execute this shell"
+	die "Please go to root directory of agent before execute this shell"
 fi

 # Protocol buffer files required to generate golang/rust bindings.
--- a/src/agent/protocols/protos/agent.proto
+++ b/src/agent/protocols/protos/agent.proto
@@ -32,7 +32,6 @@ service AgentService {
 	rpc ExecProcess(ExecProcessRequest) returns (google.protobuf.Empty);
 	rpc SignalProcess(SignalProcessRequest) returns (google.protobuf.Empty);
 	rpc WaitProcess(WaitProcessRequest) returns (WaitProcessResponse); // wait & reap like waitpid(2)
-	rpc ListProcesses(ListProcessesRequest) returns (ListProcessesResponse);
 	rpc UpdateContainer(UpdateContainerRequest) returns (google.protobuf.Empty);
 	rpc StatsContainer(StatsContainerRequest) returns (StatsContainerResponse);
 	rpc PauseContainer(PauseContainerRequest) returns (google.protobuf.Empty);
@@ -126,18 +125,6 @@ message WaitProcessResponse {
 	int32 status = 1;
 }

-// ListProcessesRequest contains the options used to list running processes inside the container
-message ListProcessesRequest {
-	string container_id = 1;
-	string format = 2;
-	repeated string args = 3;
-}
-
-// ListProcessesResponse represents the list of running processes inside the container
-message ListProcessesResponse {
-	bytes process_list = 1;
-}
-
 message UpdateContainerRequest {
 	string container_id = 1;
 	LinuxResources resources = 2;
--- a/src/agent/protocols/protos/oci.proto
+++ b/src/agent/protocols/protos/oci.proto
@@ -441,7 +441,8 @@ message LinuxInterfacePriority {
 message LinuxSeccomp {
 	string DefaultAction = 1;
 	repeated string Architectures = 2;
-	repeated LinuxSyscall Syscalls = 3  [(gogoproto.nullable) = false];
+	repeated string Flags = 3;
+	repeated LinuxSyscall Syscalls = 4  [(gogoproto.nullable) = false];
 }

 message LinuxSeccompArg {
@@ -454,7 +455,10 @@ message LinuxSeccompArg {
 message LinuxSyscall {
 	repeated string Names = 1;
 	string Action = 2;
-	repeated LinuxSeccompArg Args = 3  [(gogoproto.nullable) = false];
+	oneof ErrnoRet {
+		uint32 errnoret = 3;
+	}
+	repeated LinuxSeccompArg Args = 4  [(gogoproto.nullable) = false];
 }

 message LinuxIntelRdt {
--- a/src/agent/protocols/protos/types.proto
+++ b/src/agent/protocols/protos/types.proto
@@ -29,10 +29,8 @@ message Interface {
 	uint64 mtu = 4;
 	string hwAddr = 5;

-	// pciAddr is the PCI address in the format  "bridgeAddr/deviceAddr".
-	// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
-	// while deviceAddr is the address at which the network device is attached on the bridge.
-	string pciAddr = 6;
+	// PCI path for the device (see the pci::Path (Rust) or types.PciPath (Go) type for format details)
+	string pciPath = 6;

 	// Type defines the type of interface described by this structure.
 	// The expected values are the one that are defined by the netlink
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -10,23 +10,27 @@ serde_json = "1.0.39"
 serde_derive = "1.0.91"
 oci = { path = "../oci" }
 protocols = { path ="../protocols" }
-caps = "0.3.0"
+caps = "0.5.0"
 nix = "0.17.0"
 scopeguard = "1.0.0"
 prctl = "1.0.0"
 lazy_static = "1.3.0"
 libc = "0.2.58"
-protobuf = "2.8.1"
+protobuf = "=2.14.0"
 slog = "2.5.2"
 slog-scope = "4.1.2"
 scan_fmt = "0.2"
 regex = "1.1"
 path-absolutize = "1.2.0"
-dirs = "3.0.1"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.0" }
+cgroups = { package = "cgroups-rs", version = "0.2.5" }
 tempfile = "3.1.0"
-epoll = "4.3.1"
+rlimit = "0.5.3"
+
+tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
+futures = "0.3"
+async-trait = "0.1.31"
+inotify = "0.9.2"

 [dev-dependencies]
 serial_test = "0.5.0"
--- a/src/agent/rustjail/src/capabilities.rs
+++ b/src/agent/rustjail/src/capabilities.rs
@@ -9,97 +9,44 @@
 use crate::log_child;
 use crate::sync::write_count;
 use anyhow::{anyhow, Result};
-use caps::{self, CapSet, Capability, CapsHashSet};
+use caps::{self, runtime, CapSet, Capability, CapsHashSet};
 use oci::LinuxCapabilities;
-use std::collections::HashMap;
 use std::os::unix::io::RawFd;
-
-lazy_static! {
-    pub static ref CAPSMAP: HashMap<String, Capability> = {
-        let mut m = HashMap::new();
-        m.insert("CAP_CHOWN".to_string(), Capability::CAP_CHOWN);
-        m.insert("CAP_DAC_OVERRIDE".to_string(), Capability::CAP_DAC_OVERRIDE);
-        m.insert(
-            "CAP_DAC_READ_SEARCH".to_string(),
-            Capability::CAP_DAC_READ_SEARCH,
-        );
-        m.insert("CAP_FOWNER".to_string(), Capability::CAP_FOWNER);
-        m.insert("CAP_FSETID".to_string(), Capability::CAP_FSETID);
-        m.insert("CAP_KILL".to_string(), Capability::CAP_KILL);
-        m.insert("CAP_SETGID".to_string(), Capability::CAP_SETGID);
-        m.insert("CAP_SETUID".to_string(), Capability::CAP_SETUID);
-        m.insert("CAP_SETPCAP".to_string(), Capability::CAP_SETPCAP);
-        m.insert(
-            "CAP_LINUX_IMMUTABLE".to_string(),
-            Capability::CAP_LINUX_IMMUTABLE,
-        );
-        m.insert(
-            "CAP_NET_BIND_SERVICE".to_string(),
-            Capability::CAP_NET_BIND_SERVICE,
-        );
-        m.insert(
-            "CAP_NET_BROADCAST".to_string(),
-            Capability::CAP_NET_BROADCAST,
-        );
-        m.insert("CAP_NET_ADMIN".to_string(), Capability::CAP_NET_ADMIN);
-        m.insert("CAP_NET_RAW".to_string(), Capability::CAP_NET_RAW);
-        m.insert("CAP_IPC_LOCK".to_string(), Capability::CAP_IPC_LOCK);
-        m.insert("CAP_IPC_OWNER".to_string(), Capability::CAP_IPC_OWNER);
-        m.insert("CAP_SYS_MODULE".to_string(), Capability::CAP_SYS_MODULE);
-        m.insert("CAP_SYS_RAWIO".to_string(), Capability::CAP_SYS_RAWIO);
-        m.insert("CAP_SYS_CHROOT".to_string(), Capability::CAP_SYS_CHROOT);
-        m.insert("CAP_SYS_PTRACE".to_string(), Capability::CAP_SYS_PTRACE);
-        m.insert("CAP_SYS_PACCT".to_string(), Capability::CAP_SYS_PACCT);
-        m.insert("CAP_SYS_ADMIN".to_string(), Capability::CAP_SYS_ADMIN);
-        m.insert("CAP_SYS_BOOT".to_string(), Capability::CAP_SYS_BOOT);
-        m.insert("CAP_SYS_NICE".to_string(), Capability::CAP_SYS_NICE);
-        m.insert("CAP_SYS_RESOURCE".to_string(), Capability::CAP_SYS_RESOURCE);
-        m.insert("CAP_SYS_TIME".to_string(), Capability::CAP_SYS_TIME);
-        m.insert(
-            "CAP_SYS_TTY_CONFIG".to_string(),
-            Capability::CAP_SYS_TTY_CONFIG,
-        );
-        m.insert("CAP_MKNOD".to_string(), Capability::CAP_MKNOD);
-        m.insert("CAP_LEASE".to_string(), Capability::CAP_LEASE);
-        m.insert("CAP_AUDIT_WRITE".to_string(), Capability::CAP_AUDIT_WRITE);
-        m.insert("CAP_AUDIT_CONTROL".to_string(), Capability::CAP_AUDIT_WRITE);
-        m.insert("CAP_SETFCAP".to_string(), Capability::CAP_SETFCAP);
-        m.insert("CAP_MAC_OVERRIDE".to_string(), Capability::CAP_MAC_OVERRIDE);
-        m.insert("CAP_SYSLOG".to_string(), Capability::CAP_SYSLOG);
-        m.insert("CAP_WAKE_ALARM".to_string(), Capability::CAP_WAKE_ALARM);
-        m.insert(
-            "CAP_BLOCK_SUSPEND".to_string(),
-            Capability::CAP_BLOCK_SUSPEND,
-        );
-        m.insert("CAP_AUDIT_READ".to_string(), Capability::CAP_AUDIT_READ);
-        m
-    };
-}
+use std::str::FromStr;

 fn to_capshashset(cfd_log: RawFd, caps: &[String]) -> CapsHashSet {
    let mut r = CapsHashSet::new();

    for cap in caps.iter() {
-        let c = CAPSMAP.get(cap);
-
-        if c.is_none() {
-            log_child!(cfd_log, "{} is not a cap", cap);
-            continue;
-        }
-
-        r.insert(*c.unwrap());
+        match Capability::from_str(cap) {
+            Err(_) => {
+                log_child!(cfd_log, "{} is not a cap", cap);
+                continue;
+            }
+            Ok(c) => r.insert(c),
+        };
    }

    r
 }

+pub fn get_all_caps() -> CapsHashSet {
+    let mut caps_set =
+        runtime::procfs_all_supported(None).unwrap_or_else(|_| runtime::thread_all_supported());
+    if caps_set.is_empty() {
+        caps_set = caps::all();
+    }
+    caps_set
+}
+
 pub fn reset_effective() -> Result<()> {
-    caps::set(None, CapSet::Effective, caps::all()).map_err(|e| anyhow!(e.to_string()))?;
+    let all = get_all_caps();
+    caps::set(None, CapSet::Effective, &all).map_err(|e| anyhow!(e.to_string()))?;
    Ok(())
 }

 pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
-    let all = caps::all();
+    let all = get_all_caps();

    for c in all.difference(&to_capshashset(cfd_log, caps.bounding.as_ref())) {
        caps::drop(None, CapSet::Bounding, *c).map_err(|e| anyhow!(e.to_string()))?;
@@ -108,26 +55,26 @@ pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
    caps::set(
        None,
        CapSet::Effective,
-        to_capshashset(cfd_log, caps.effective.as_ref()),
+        &to_capshashset(cfd_log, caps.effective.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;
    caps::set(
        None,
        CapSet::Permitted,
-        to_capshashset(cfd_log, caps.permitted.as_ref()),
+        &to_capshashset(cfd_log, caps.permitted.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;
    caps::set(
        None,
        CapSet::Inheritable,
-        to_capshashset(cfd_log, caps.inheritable.as_ref()),
+        &to_capshashset(cfd_log, caps.inheritable.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;

    let _ = caps::set(
        None,
        CapSet::Ambient,
-        to_capshashset(cfd_log, caps.ambient.as_ref()),
+        &to_capshashset(cfd_log, caps.ambient.as_ref()),
    )
    .map_err(|_| log_child!(cfd_log, "failed to set ambient capability"));

--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -24,7 +24,7 @@ use anyhow::{anyhow, Context, Result};
 use libc::{self, pid_t};
 use nix::errno::Errno;
 use oci::{
-    LinuxBlockIO, LinuxCPU, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
+    LinuxBlockIo, LinuxCpu, LinuxDevice, LinuxDeviceCgroup, LinuxHugepageLimit, LinuxMemory,
    LinuxNetwork, LinuxPids, LinuxResources,
 };

@@ -37,6 +37,8 @@ use std::collections::HashMap;
 use std::fs;
 use std::path::Path;

+const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online";
+
 // Convenience macro to obtain the scope logger
 macro_rules! sl {
    () => {
@@ -60,7 +62,6 @@ pub struct Manager {
    pub cpath: String,
    #[serde(skip)]
    cgroup: cgroups::Cgroup,
-    relative_paths: HashMap<String, String>,
 }

 // set_resource is used to set reources by cgroup controller.
@@ -104,21 +105,21 @@ impl CgroupManager for Manager {

        // set block_io resources
        if let Some(blkio) = &r.block_io {
-            set_block_io_resources(&self.cgroup, blkio, res)?;
+            set_block_io_resources(&self.cgroup, blkio, res);
        }

        // set hugepages resources
        if !r.hugepage_limits.is_empty() {
-            set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res)?;
+            set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res);
        }

        // set network resources
        if let Some(network) = &r.network {
-            set_network_resources(&self.cgroup, network, res)?;
+            set_network_resources(&self.cgroup, network, res);
        }

        // set devices resources
-        set_devices_resources(&self.cgroup, &r.devices, res)?;
+        set_devices_resources(&self.cgroup, &r.devices, res);
        info!(sl!(), "resources after processed {:?}", res);

        // apply resources
@@ -199,7 +200,7 @@ fn set_network_resources(
    _cg: &cgroups::Cgroup,
    network: &LinuxNetwork,
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set network");

    // set classid
@@ -220,14 +221,13 @@ fn set_network_resources(
    }

    res.network.priorities = priorities;
-    Ok(())
 }

 fn set_devices_resources(
    _cg: &cgroups::Cgroup,
    device_resources: &[LinuxDeviceCgroup],
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set devices");
    let mut devices = vec![];

@@ -250,15 +250,13 @@ fn set_devices_resources(
    }

    res.devices.devices = devices;
-
-    Ok(())
 }

 fn set_hugepages_resources(
    _cg: &cgroups::Cgroup,
    hugepage_limits: &[LinuxHugepageLimit],
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set hugepage");
    let mut limits = vec![];

@@ -270,41 +268,25 @@ fn set_hugepages_resources(
        limits.push(hr);
    }
    res.hugepages.limits = limits;
-
-    Ok(())
 }

 fn set_block_io_resources(
-    cg: &cgroups::Cgroup,
-    blkio: &LinuxBlockIO,
+    _cg: &cgroups::Cgroup,
+    blkio: &LinuxBlockIo,
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set block io");

-    if cg.v2() {
-        res.blkio.weight = convert_blk_io_to_v2_value(blkio.weight);
-        res.blkio.leaf_weight = convert_blk_io_to_v2_value(blkio.leaf_weight);
-    } else {
-        res.blkio.weight = blkio.weight;
-        res.blkio.leaf_weight = blkio.leaf_weight;
-    }
+    res.blkio.weight = blkio.weight;
+    res.blkio.leaf_weight = blkio.leaf_weight;

    let mut blk_device_resources = vec![];
    for d in blkio.weight_device.iter() {
-        let (w, lw) = if cg.v2() {
-            (
-                convert_blk_io_to_v2_value(blkio.weight),
-                convert_blk_io_to_v2_value(blkio.leaf_weight),
-            )
-        } else {
-            (blkio.weight, blkio.leaf_weight)
-        };
-
        let dr = BlkIoDeviceResource {
            major: d.blk.major as u64,
            minor: d.blk.minor as u64,
-            weight: w,
-            leaf_weight: lw,
+            weight: blkio.weight,
+            leaf_weight: blkio.leaf_weight,
        };
        blk_device_resources.push(dr);
    }
@@ -318,11 +300,9 @@ fn set_block_io_resources(
        build_blk_io_device_throttle_resource(&blkio.throttle_read_iops_device);
    res.blkio.throttle_write_iops_device =
        build_blk_io_device_throttle_resource(&blkio.throttle_write_iops_device);
-
-    Ok(())
 }

-fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
+fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCpu) -> Result<()> {
    info!(sl!(), "cgroup manager set cpu");

    let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
@@ -369,14 +349,34 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        mem_controller.set_kmem_limit(-1)?;
    }

-    set_resource!(mem_controller, set_limit, memory, limit);
-    set_resource!(mem_controller, set_soft_limit, memory, reservation);
-    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
-    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+    // If the memory update is set to -1 we should also
+    // set swap to -1, it means unlimited memory.
+    let mut swap = memory.swap.unwrap_or(0);
+    if memory.limit == Some(-1) {
+        swap = -1;
+    }

-    if let Some(swap) = memory.swap {
-        // set memory swap
-        let swap = if cg.v2() {
+    if memory.limit.is_some() && swap != 0 {
+        let memstat = get_memory_stats(cg)
+            .into_option()
+            .ok_or_else(|| anyhow!("failed to get the cgroup memory stats"))?;
+        let memusage = memstat.get_usage();
+
+        // When update memory limit, the kernel would check the current memory limit
+        // set against the new swap setting, if the current memory limit is large than
+        // the new swap, then set limit first, otherwise the kernel would complain and
+        // refused to set; on the other hand, if the current memory limit is smaller than
+        // the new swap, then we should set the swap first and then set the memor limit.
+        if swap == -1 || memusage.get_limit() < swap as u64 {
+            mem_controller.set_memswap_limit(swap)?;
+            set_resource!(mem_controller, set_limit, memory, limit);
+        } else {
+            set_resource!(mem_controller, set_limit, memory, limit);
+            mem_controller.set_memswap_limit(swap)?;
+        }
+    } else {
+        set_resource!(mem_controller, set_limit, memory, limit);
+        swap = if cg.v2() {
            convert_memory_swap_to_v2_value(swap, memory.limit.unwrap_or(0))?
        } else {
            swap
@@ -386,8 +386,12 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        }
    }

+    set_resource!(mem_controller, set_soft_limit, memory, reservation);
+    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
+    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+
    if let Some(swappiness) = memory.swappiness {
-        if swappiness >= 0 && swappiness <= 100 {
+        if (0..=100).contains(&swappiness) {
            mem_controller.set_swappiness(swappiness as u64)?;
        } else {
            return Err(anyhow!(
@@ -509,63 +513,61 @@ lazy_static! {
    };

    pub static ref DEFAULT_ALLOWED_DEVICES: Vec<LinuxDeviceCgroup> = {
-        let mut v = Vec::new();
+        vec![
+            // all mknod to all char devices
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(WILDCARD),
+                minor: Some(WILDCARD),
+                access: "m".to_string(),
+            },

-        // all mknod to all char devices
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(WILDCARD),
-            minor: Some(WILDCARD),
-            access: "m".to_string(),
-        });
+            // all mknod to all block devices
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "b".to_string(),
+                major: Some(WILDCARD),
+                minor: Some(WILDCARD),
+                access: "m".to_string(),
+            },

-        // all mknod to all block devices
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "b".to_string(),
-            major: Some(WILDCARD),
-            minor: Some(WILDCARD),
-            access: "m".to_string(),
-        });
+            // all read/write/mknod to char device /dev/console
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(5),
+                minor: Some(1),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/console
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(5),
-            minor: Some(1),
-            access: "rwm".to_string(),
-        });
+            // all read/write/mknod to char device /dev/pts/<N>
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(136),
+                minor: Some(WILDCARD),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/pts/<N>
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(136),
-            minor: Some(WILDCARD),
-            access: "rwm".to_string(),
-        });
+            // all read/write/mknod to char device /dev/ptmx
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(5),
+                minor: Some(2),
+                access: "rwm".to_string(),
+            },

-        // all read/write/mknod to char device /dev/ptmx
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(5),
-            minor: Some(2),
-            access: "rwm".to_string(),
-        });
-
-        // all read/write/mknod to char device /dev/net/tun
-        v.push(LinuxDeviceCgroup {
-            allow: true,
-            r#type: "c".to_string(),
-            major: Some(10),
-            minor: Some(200),
-            access: "rwm".to_string(),
-        });
-
-        v
+            // all read/write/mknod to char device /dev/net/tun
+            LinuxDeviceCgroup {
+                allow: true,
+                r#type: "c".to_string(),
+                major: Some(10),
+                minor: Some(200),
+                access: "rwm".to_string(),
+            },
+        ]
    };
 }

@@ -946,38 +948,28 @@ pub fn get_mounts() -> Result<HashMap<String, String>> {
    Ok(m)
 }

-fn new_cgroup(
-    h: Box<dyn cgroups::Hierarchy>,
-    path: &str,
-    relative_paths: HashMap<String, String>,
-) -> Cgroup {
+fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Cgroup {
    let valid_path = path.trim_start_matches('/').to_string();
-    cgroups::Cgroup::new_with_relative_paths(h, valid_path.as_str(), relative_paths)
+    cgroups::Cgroup::new(h, valid_path.as_str())
 }

 impl Manager {
    pub fn new(cpath: &str) -> Result<Self> {
        let mut m = HashMap::new();
-        let mut relative_paths = HashMap::new();

        let paths = get_paths()?;
        let mounts = get_mounts()?;

-        for (key, value) in &paths {
+        for key in paths.keys() {
            let mnt = mounts.get(key);

            if mnt.is_none() {
                continue;
            }

-            let p = if value == "/" {
-                format!("{}/{}", mnt.unwrap(), cpath)
-            } else {
-                format!("{}{}/{}", mnt.unwrap(), value, cpath)
-            };
+            let p = format!("{}/{}", mnt.unwrap(), cpath);

            m.insert(key.to_string(), p);
-            relative_paths.insert(key.to_string(), value.to_string());
        }

        Ok(Self {
@@ -985,13 +977,12 @@ impl Manager {
            mounts,
            // rels: paths,
            cpath: cpath.to_string(),
-            cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath, relative_paths.clone()),
-            relative_paths,
+            cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath),
        })
    }

    pub fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> {
-        if guest_cpuset == "" {
+        if guest_cpuset.is_empty() {
            return Ok(());
        }
        info!(sl!(), "update_cpuset_path to: {}", guest_cpuset);
@@ -1031,11 +1022,7 @@ impl Manager {
                .unwrap()
                .trim_start_matches(root_path.to_str().unwrap());
            info!(sl!(), "updating cpuset for parent path {:?}", &r_path);
-            let cg = new_cgroup(
-                cgroups::hierarchies::auto(),
-                &r_path,
-                self.relative_paths.clone(),
-            );
+            let cg = new_cgroup(cgroups::hierarchies::auto(), &r_path);
            let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
            cpuset_controller.set_cpus(guest_cpuset)?;
        }
@@ -1064,23 +1051,10 @@ impl Manager {
    }
 }

+// get the guest's online cpus.
 pub fn get_guest_cpuset() -> Result<String> {
-    // for cgroup v2
-    if cgroups::hierarchies::is_cgroup2_unified_mode() {
-        let c = fs::read_to_string("/sys/fs/cgroup/cpuset.cpus.effective")?;
-        return Ok(c);
-    }
-
-    // for cgroup v1
-    let m = get_mounts()?;
-    if m.get("cpuset").is_none() {
-        warn!(sl!(), "no cpuset cgroup!");
-        return Err(nix::Error::Sys(Errno::ENOENT).into());
-    }
-
-    let p = format!("{}/cpuset.cpus", m.get("cpuset").unwrap());
-    let c = fs::read_to_string(p.as_str())?;
-    Ok(c)
+    let c = fs::read_to_string(GUEST_CPUS_PATH)?;
+    Ok(c.trim().to_string())
 }

 // Since the OCI spec is designed for cgroup v1, in some cases
@@ -1123,20 +1097,6 @@ fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result<i64>
    Ok(memory_swap - memory)
 }

-// Since the OCI spec is designed for cgroup v1, in some cases
-// there is need to convert from the cgroup v1 configuration to cgroup v2
-// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
-// convert linearly from [10-1000] to [1-10000]
-// https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c86414/libcontainer/cgroups/utils.go#L382
-fn convert_blk_io_to_v2_value(blk_io_weight: Option<u16>) -> Option<u16> {
-    let v = blk_io_weight.unwrap_or(0);
-    if v != 0 {
-        return None;
-    }
-
-    Some(1 + (v - 10) * 9999 / 990 as u16)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/agent/rustjail/src/cgroups/notifier.rs
+++ b/src/agent/rustjail/src/cgroups/notifier.rs
@@ -3,16 +3,18 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, Context, Result};
 use eventfd::{eventfd, EfdFlags};
 use nix::sys::eventfd;
-use nix::sys::inotify::{AddWatchFlags, InitFlags, Inotify};
 use std::fs::{self, File};
-use std::io::Read;
 use std::os::unix::io::{AsRawFd, FromRawFd};
-use std::path::{Path, PathBuf};
-use std::sync::mpsc::{self, Receiver};
-use std::thread;
+use std::path::Path;
+
+use crate::pipestream::PipeStream;
+use futures::StreamExt as _;
+use inotify::{Inotify, WatchMask};
+use tokio::io::AsyncReadExt;
+use tokio::sync::mpsc::{channel, Receiver};

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -21,11 +23,11 @@ macro_rules! sl {
    };
 }

-pub fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
+pub async fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
    if cgroups::hierarchies::is_cgroup2_unified_mode() {
-        return notify_on_oom_v2(cid, cg_dir);
+        return notify_on_oom_v2(cid, cg_dir).await;
    }
-    notify_on_oom(cid, cg_dir)
+    notify_on_oom(cid, cg_dir).await
 }

 // get_value_from_cgroup parse cgroup file with `Flat keyed`
@@ -33,7 +35,7 @@ pub fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
 // Flat keyed file format:
 //   KEY0 VAL0\n
 //   KEY1 VAL1\n
-fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {
+fn get_value_from_cgroup(path: &Path, key: &str) -> Result<i64> {
    let content = fs::read_to_string(path)?;
    info!(
        sl!(),
@@ -52,11 +54,11 @@ fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {

 // notify_on_oom returns channel on which you can expect event about OOM,
 // if process died without OOM this channel will be closed.
-pub fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
-    register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events")
+pub async fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
+    register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events").await
 }

-fn register_memory_event_v2(
+async fn register_memory_event_v2(
    containere_id: &str,
    cg_dir: String,
    memory_event_name: &str,
@@ -73,49 +75,49 @@ fn register_memory_event_v2(
        "register_memory_event_v2 cgroup_event_control_path: {:?}", &cgroup_event_control_path
    );

-    let fd = Inotify::init(InitFlags::empty()).unwrap();
+    let mut inotify = Inotify::init().context("Failed to initialize inotify")?;

    // watching oom kill
-    let ev_fd = fd
-        .add_watch(&event_control_path, AddWatchFlags::IN_MODIFY)
-        .unwrap();
+    let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?;
    // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
-    let cg_fd = fd
-        .add_watch(&cgroup_event_control_path, AddWatchFlags::IN_MODIFY)
-        .unwrap();
-    info!(sl!(), "ev_fd: {:?}", ev_fd);
-    info!(sl!(), "cg_fd: {:?}", cg_fd);
+    let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?;

-    let (sender, receiver) = mpsc::channel();
+    info!(sl!(), "ev_wd: {:?}", ev_wd);
+    info!(sl!(), "cg_wd: {:?}", cg_wd);
+
+    let (sender, receiver) = channel(100);
    let containere_id = containere_id.to_string();

-    thread::spawn(move || {
-        loop {
-            let events = fd.read_events().unwrap();
+    tokio::spawn(async move {
+        let mut buffer = [0; 32];
+        let mut stream = inotify
+            .event_stream(&mut buffer)
+            .expect("create inotify event stream failed");
+
+        while let Some(event_or_error) = stream.next().await {
+            let event = event_or_error.unwrap();
            info!(
                sl!(),
-                "container[{}] get events for container: {:?}", &containere_id, &events
+                "container[{}] get event for container: {:?}", &containere_id, &event
            );
+            // info!("is1: {}", event.wd == wd1);
+            info!(sl!(), "event.wd: {:?}", event.wd);

-            for event in events {
-                if event.mask & AddWatchFlags::IN_MODIFY != AddWatchFlags::IN_MODIFY {
-                    continue;
+            if event.wd == ev_wd {
+                let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
+                if oom.unwrap_or(0) > 0 {
+                    let _ = sender.send(containere_id.clone()).await.map_err(|e| {
+                        error!(sl!(), "send containere_id failed, error: {:?}", e);
+                    });
+                    return;
                }
-                info!(sl!(), "event.wd: {:?}", event.wd);
-
-                if event.wd == ev_fd {
-                    let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
-                    if oom.unwrap_or(0) > 0 {
-                        sender.send(containere_id.clone()).unwrap();
-                        return;
-                    }
-                } else if event.wd == cg_fd {
-                    let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
-                    if pids.unwrap_or(-1) == 0 {
-                        return;
-                    }
+            } else if event.wd == cg_wd {
+                let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
+                if pids.unwrap_or(-1) == 0 {
+                    return;
                }
            }
+
            // When a cgroup is destroyed, an event is sent to eventfd.
            // So if the control path is gone, return instead of notifying.
            if !Path::new(&event_control_path).exists() {
@@ -129,17 +131,17 @@ fn register_memory_event_v2(

 // notify_on_oom returns channel on which you can expect event about OOM,
 // if process died without OOM this channel will be closed.
-fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
-    if dir == "" {
+async fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
+    if dir.is_empty() {
        return Err(anyhow!("memory controller missing"));
    }

-    register_memory_event(cid, dir, "memory.oom_control", "")
+    register_memory_event(cid, dir, "memory.oom_control", "").await
 }

 // level is one of "low", "medium", or "critical"
-fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
-    if dir == "" {
+async fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
+    if dir.is_empty() {
        return Err(anyhow!("memory controller missing"));
    }

@@ -147,10 +149,10 @@ fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receive
        return Err(anyhow!("invalid pressure level {}", level));
    }

-    register_memory_event(cid, dir, "memory.pressure_level", level)
+    register_memory_event(cid, dir, "memory.pressure_level", level).await
 }

-fn register_memory_event(
+async fn register_memory_event(
    cid: &str,
    cg_dir: String,
    event_name: &str,
@@ -163,7 +165,7 @@ fn register_memory_event(

    let event_control_path = Path::new(&cg_dir).join("cgroup.event_control");
    let data;
-    if arg == "" {
+    if arg.is_empty() {
        data = format!("{} {}", eventfd, event_file.as_raw_fd());
    } else {
        data = format!("{} {} {}", eventfd, event_file.as_raw_fd(), arg);
@@ -171,15 +173,16 @@ fn register_memory_event(

    fs::write(&event_control_path, data)?;

-    let mut eventfd_file = unsafe { File::from_raw_fd(eventfd) };
+    let mut eventfd_stream = unsafe { PipeStream::from_raw_fd(eventfd) };

-    let (sender, receiver) = mpsc::channel();
+    let (sender, receiver) = tokio::sync::mpsc::channel(100);
    let containere_id = cid.to_string();

-    thread::spawn(move || {
+    tokio::spawn(async move {
        loop {
-            let mut buf = [0; 8];
-            match eventfd_file.read(&mut buf) {
+            let sender = sender.clone();
+            let mut buf = [0u8; 8];
+            match eventfd_stream.read(&mut buf).await {
                Err(err) => {
                    warn!(sl!(), "failed to read from eventfd: {:?}", err);
                    return;
@@ -188,7 +191,10 @@ fn register_memory_event(
                    let content = fs::read_to_string(path.clone());
                    info!(
                        sl!(),
-                        "OOM event for container: {}, content: {:?}", &containere_id, content
+                        "cgroup event for container: {}, path: {:?}, content: {:?}",
+                        &containere_id,
+                        &path,
+                        content
                    );
                }
            }
@@ -198,7 +204,10 @@ fn register_memory_event(
            if !Path::new(&event_control_path).exists() {
                return;
            }
-            sender.send(containere_id.clone()).unwrap();
+
+            let _ = sender.send(containere_id.clone()).await.map_err(|e| {
+                error!(sl!(), "send containere_id failed, error: {:?}", e);
+            });
        }
    });

--- a/src/agent/rustjail/src/configs/mod.rs
+++ b/src/agent/rustjail/src/configs/mod.rs
@@ -54,6 +54,8 @@ pub struct Seccomp {
    #[serde(default)]
    architectures: Vec<String>,
    #[serde(default)]
+    flags: Vec<String>,
+    #[serde(default)]
    syscalls: Vec<Syscall>,
 }

@@ -74,9 +76,11 @@ pub struct Arg {
 #[derive(Serialize, Deserialize, Debug)]
 pub struct Syscall {
    #[serde(default, skip_serializing_if = "String::is_empty")]
-    name: String,
+    names: String,
    #[serde(default)]
    action: Action,
+    #[serde(default, rename = "errnoRet")]
+    errno_ret: u32,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    args: Vec<Arg>,
 }
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -40,12 +40,13 @@ pub mod capabilities;
 pub mod cgroups;
 pub mod container;
 pub mod mount;
+pub mod pipestream;
 pub mod process;
-pub mod reaper;
 pub mod specconv;
 pub mod sync;
+pub mod sync_with_async;
+pub mod utils;
 pub mod validator;
-
 // pub mod factory;
 //pub mod configs;
 // pub mod devices;
@@ -57,24 +58,16 @@ pub mod validator;
 // pub mod user;
 //pub mod intelrdt;

-// construtc ociSpec from grpcSpec, which is needed for hook
-// execution. since hooks read config.json
-
-use oci::{
-    Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities,
-    Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot,
-    Spec as ociSpec, User as ociUser,
-};
-use protocols::oci::{
-    Hooks as grpcHooks, Linux as grpcLinux, Mount as grpcMount, Process as grpcProcess,
-    Root as grpcRoot, Spec as grpcSpec,
-};
 use std::collections::HashMap;

-pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
+use protocols::oci as grpc;
+
+// construct ociSpec from grpc::Spec, which is needed for hook
+// execution. since hooks read config.json
+pub fn process_grpc_to_oci(p: &grpc::Process) -> oci::Process {
    let console_size = if p.ConsoleSize.is_some() {
        let c = p.ConsoleSize.as_ref().unwrap();
-        Some(ociBox {
+        Some(oci::Box {
            height: c.Height,
            width: c.Width,
        })
@@ -84,14 +77,14 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {

    let user = if p.User.is_some() {
        let u = p.User.as_ref().unwrap();
-        ociUser {
+        oci::User {
            uid: u.UID,
            gid: u.GID,
            additional_gids: u.AdditionalGids.clone(),
            username: u.Username.clone(),
        }
    } else {
-        ociUser {
+        oci::User {
            uid: 0,
            gid: 0,
            additional_gids: vec![],
@@ -102,7 +95,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    let capabilities = if p.Capabilities.is_some() {
        let cap = p.Capabilities.as_ref().unwrap();

-        Some(ociLinuxCapabilities {
+        Some(oci::LinuxCapabilities {
            bounding: cap.Bounding.clone().into_vec(),
            effective: cap.Effective.clone().into_vec(),
            inheritable: cap.Inheritable.clone().into_vec(),
@@ -116,7 +109,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    let rlimits = {
        let mut r = Vec::new();
        for lm in p.Rlimits.iter() {
-            r.push(ociPOSIXRlimit {
+            r.push(oci::PosixRlimit {
                r#type: lm.Type.clone(),
                hard: lm.Hard,
                soft: lm.Soft,
@@ -125,7 +118,7 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
        r
    };

-    ociProcess {
+    oci::Process {
        terminal: p.Terminal,
        console_size,
        user,
@@ -141,15 +134,15 @@ pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess {
    }
 }

-fn root_grpc_to_oci(root: &grpcRoot) -> ociRoot {
-    ociRoot {
+fn root_grpc_to_oci(root: &grpc::Root) -> oci::Root {
+    oci::Root {
        path: root.Path.clone(),
        readonly: root.Readonly,
    }
 }

-fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
-    ociMount {
+fn mount_grpc_to_oci(m: &grpc::Mount) -> oci::Mount {
+    oci::Mount {
        destination: m.destination.clone(),
        r#type: m.field_type.clone(),
        source: m.source.clone(),
@@ -157,13 +150,12 @@ fn mount_grpc_to_oci(m: &grpcMount) -> ociMount {
    }
 }

-use oci::Hook as ociHook;
 use protocols::oci::Hook as grpcHook;

-fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
+fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
    let mut r = Vec::new();
    for e in h.iter() {
-        r.push(ociHook {
+        r.push(oci::Hook {
            path: e.Path.clone(),
            args: e.Args.clone().into_vec(),
            env: e.Env.clone().into_vec(),
@@ -173,39 +165,29 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<ociHook> {
    r
 }

-fn hooks_grpc_to_oci(h: &grpcHooks) -> ociHooks {
+fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
    let prestart = hook_grpc_to_oci(h.Prestart.as_ref());

    let poststart = hook_grpc_to_oci(h.Poststart.as_ref());

    let poststop = hook_grpc_to_oci(h.Poststop.as_ref());

-    ociHooks {
+    oci::Hooks {
        prestart,
        poststart,
        poststop,
    }
 }

-use oci::{
-    LinuxDevice as ociLinuxDevice, LinuxIDMapping as ociLinuxIDMapping,
-    LinuxIntelRdt as ociLinuxIntelRdt, LinuxNamespace as ociLinuxNamespace,
-    LinuxResources as ociLinuxResources, LinuxSeccomp as ociLinuxSeccomp,
-};
-use protocols::oci::{
-    LinuxIDMapping as grpcLinuxIDMapping, LinuxResources as grpcLinuxResources,
-    LinuxSeccomp as grpcLinuxSeccomp,
-};
-
-fn idmap_grpc_to_oci(im: &grpcLinuxIDMapping) -> ociLinuxIDMapping {
-    ociLinuxIDMapping {
+fn idmap_grpc_to_oci(im: &grpc::LinuxIDMapping) -> oci::LinuxIdMapping {
+    oci::LinuxIdMapping {
        container_id: im.ContainerID,
        host_id: im.HostID,
        size: im.Size,
    }
 }

-fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
+fn idmaps_grpc_to_oci(ims: &[grpc::LinuxIDMapping]) -> Vec<oci::LinuxIdMapping> {
    let mut r = Vec::new();
    for im in ims.iter() {
        r.push(idmap_grpc_to_oci(im));
@@ -213,24 +195,13 @@ fn idmaps_grpc_to_oci(ims: &[grpcLinuxIDMapping]) -> Vec<ociLinuxIDMapping> {
    r
 }

-use oci::{
-    LinuxBlockIO as ociLinuxBlockIO, LinuxBlockIODevice as ociLinuxBlockIODevice,
-    LinuxCPU as ociLinuxCPU, LinuxDeviceCgroup as ociLinuxDeviceCgroup,
-    LinuxHugepageLimit as ociLinuxHugepageLimit,
-    LinuxInterfacePriority as ociLinuxInterfacePriority, LinuxMemory as ociLinuxMemory,
-    LinuxNetwork as ociLinuxNetwork, LinuxPids as ociLinuxPids,
-    LinuxThrottleDevice as ociLinuxThrottleDevice, LinuxWeightDevice as ociLinuxWeightDevice,
-};
-use protocols::oci::{
-    LinuxBlockIO as grpcLinuxBlockIO, LinuxThrottleDevice as grpcLinuxThrottleDevice,
-    LinuxWeightDevice as grpcLinuxWeightDevice,
-};
-
-fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinuxThrottleDevice> {
+fn throttle_devices_grpc_to_oci(
+    tds: &[grpc::LinuxThrottleDevice],
+) -> Vec<oci::LinuxThrottleDevice> {
    let mut r = Vec::new();
    for td in tds.iter() {
-        r.push(ociLinuxThrottleDevice {
-            blk: ociLinuxBlockIODevice {
+        r.push(oci::LinuxThrottleDevice {
+            blk: oci::LinuxBlockIoDevice {
                major: td.Major,
                minor: td.Minor,
            },
@@ -240,11 +211,11 @@ fn throttle_devices_grpc_to_oci(tds: &[grpcLinuxThrottleDevice]) -> Vec<ociLinux
    r
 }

-fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeightDevice> {
+fn weight_devices_grpc_to_oci(wds: &[grpc::LinuxWeightDevice]) -> Vec<oci::LinuxWeightDevice> {
    let mut r = Vec::new();
    for wd in wds.iter() {
-        r.push(ociLinuxWeightDevice {
-            blk: ociLinuxBlockIODevice {
+        r.push(oci::LinuxWeightDevice {
+            blk: oci::LinuxBlockIoDevice {
                major: wd.Major,
                minor: wd.Minor,
            },
@@ -255,7 +226,7 @@ fn weight_devices_grpc_to_oci(wds: &[grpcLinuxWeightDevice]) -> Vec<ociLinuxWeig
    r
 }

-fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
+fn blockio_grpc_to_oci(blk: &grpc::LinuxBlockIO) -> oci::LinuxBlockIo {
    let weight_device = weight_devices_grpc_to_oci(blk.WeightDevice.as_ref());
    let throttle_read_bps_device = throttle_devices_grpc_to_oci(blk.ThrottleReadBpsDevice.as_ref());
    let throttle_write_bps_device =
@@ -265,7 +236,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
    let throttle_write_iops_device =
        throttle_devices_grpc_to_oci(blk.ThrottleWriteIOPSDevice.as_ref());

-    ociLinuxBlockIO {
+    oci::LinuxBlockIo {
        weight: Some(blk.Weight as u16),
        leaf_weight: Some(blk.LeafWeight as u16),
        weight_device,
@@ -276,7 +247,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO {
    }
 }

-pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
+pub fn resources_grpc_to_oci(res: &grpc::LinuxResources) -> oci::LinuxResources {
    let devices = {
        let mut d = Vec::new();
        for dev in res.Devices.iter() {
@@ -291,7 +262,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
            } else {
                Some(dev.Minor)
            };
-            d.push(ociLinuxDeviceCgroup {
+            d.push(oci::LinuxDeviceCgroup {
                allow: dev.Allow,
                r#type: dev.Type.clone(),
                major,
@@ -304,7 +275,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let memory = if res.Memory.is_some() {
        let mem = res.Memory.as_ref().unwrap();
-        Some(ociLinuxMemory {
+        Some(oci::LinuxMemory {
            limit: Some(mem.Limit),
            reservation: Some(mem.Reservation),
            swap: Some(mem.Swap),
@@ -319,7 +290,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let cpu = if res.CPU.is_some() {
        let c = res.CPU.as_ref().unwrap();
-        Some(ociLinuxCPU {
+        Some(oci::LinuxCpu {
            shares: Some(c.Shares),
            quota: Some(c.Quota),
            period: Some(c.Period),
@@ -334,7 +305,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {

    let pids = if res.Pids.is_some() {
        let p = res.Pids.as_ref().unwrap();
-        Some(ociLinuxPids { limit: p.Limit })
+        Some(oci::LinuxPids { limit: p.Limit })
    } else {
        None
    };
@@ -350,7 +321,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
    let hugepage_limits = {
        let mut r = Vec::new();
        for hl in res.HugepageLimits.iter() {
-            r.push(ociLinuxHugepageLimit {
+            r.push(oci::LinuxHugepageLimit {
                page_size: hl.Pagesize.clone(),
                limit: hl.Limit,
            });
@@ -363,14 +334,14 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
        let priorities = {
            let mut r = Vec::new();
            for pr in net.Priorities.iter() {
-                r.push(ociLinuxInterfacePriority {
+                r.push(oci::LinuxInterfacePriority {
                    name: pr.Name.clone(),
                    priority: pr.Priority,
                });
            }
            r
        };
-        Some(ociLinuxNetwork {
+        Some(oci::LinuxNetwork {
            class_id: Some(net.ClassID),
            priorities,
        })
@@ -378,7 +349,7 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
        None
    };

-    ociLinuxResources {
+    oci::LinuxResources {
        devices,
        memory,
        cpu,
@@ -390,17 +361,22 @@ pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources {
    }
 }

-use oci::{LinuxSeccompArg as ociLinuxSeccompArg, LinuxSyscall as ociLinuxSyscall};
-
-fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
+fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp {
    let syscalls = {
        let mut r = Vec::new();

        for sys in sec.Syscalls.iter() {
            let mut args = Vec::new();
+            let errno_ret: u32;
+
+            if sys.has_errnoret() {
+                errno_ret = sys.get_errnoret();
+            } else {
+                errno_ret = libc::EPERM as u32;
+            }

            for arg in sys.Args.iter() {
-                args.push(ociLinuxSeccompArg {
+                args.push(oci::LinuxSeccompArg {
                    index: arg.Index as u32,
                    value: arg.Value,
                    value_two: arg.ValueTwo,
@@ -408,23 +384,25 @@ fn seccomp_grpc_to_oci(sec: &grpcLinuxSeccomp) -> ociLinuxSeccomp {
                });
            }

-            r.push(ociLinuxSyscall {
+            r.push(oci::LinuxSyscall {
                names: sys.Names.clone().into_vec(),
                action: sys.Action.clone(),
+                errno_ret,
                args,
            });
        }
        r
    };

-    ociLinuxSeccomp {
+    oci::LinuxSeccomp {
        default_action: sec.DefaultAction.clone(),
        architectures: sec.Architectures.clone().into_vec(),
+        flags: sec.Flags.clone().into_vec(),
        syscalls,
    }
 }

-fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
+fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux {
    let uid_mappings = idmaps_grpc_to_oci(l.UIDMappings.as_ref());
    let gid_mappings = idmaps_grpc_to_oci(l.GIDMappings.as_ref());

@@ -444,7 +422,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
        let mut r = Vec::new();

        for ns in l.Namespaces.iter() {
-            r.push(ociLinuxNamespace {
+            r.push(oci::LinuxNamespace {
                r#type: ns.Type.clone(),
                path: ns.Path.clone(),
            });
@@ -456,7 +434,7 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
        let mut r = Vec::new();

        for d in l.Devices.iter() {
-            r.push(ociLinuxDevice {
+            r.push(oci::LinuxDevice {
                path: d.Path.clone(),
                r#type: d.Type.clone(),
                major: d.Major,
@@ -472,14 +450,14 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
    let intel_rdt = if l.IntelRdt.is_some() {
        let rdt = l.IntelRdt.as_ref().unwrap();

-        Some(ociLinuxIntelRdt {
+        Some(oci::LinuxIntelRdt {
            l3_cache_schema: rdt.L3CacheSchema.clone(),
        })
    } else {
        None
    };

-    ociLinux {
+    oci::Linux {
        uid_mappings,
        gid_mappings,
        sysctl: l.Sysctl.clone(),
@@ -496,11 +474,11 @@ fn linux_grpc_to_oci(l: &grpcLinux) -> ociLinux {
    }
 }

-fn linux_oci_to_grpc(_l: &ociLinux) -> grpcLinux {
-    grpcLinux::default()
+fn linux_oci_to_grpc(_l: &oci::Linux) -> grpc::Linux {
+    grpc::Linux::default()
 }

-pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
+pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
    // process
    let process = if grpc.Process.is_some() {
        Some(process_grpc_to_oci(grpc.Process.as_ref().unwrap()))
@@ -538,7 +516,7 @@ pub fn grpc_to_oci(grpc: &grpcSpec) -> ociSpec {
        None
    };

-    ociSpec {
+    oci::Spec {
        version: grpc.Version.clone(),
        process,
        root,
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -52,10 +52,12 @@ const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
 const PROC_PATH: &str = "/proc";

 // since libc didn't defined this const for musl, thus redefined it here.
-#[cfg(all(target_os = "linux", target_env = "gnu"))]
+#[cfg(all(target_os = "linux", target_env = "gnu", not(target_arch = "s390x")))]
 const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
 #[cfg(all(target_os = "linux", target_env = "musl"))]
 const PROC_SUPER_MAGIC: libc::c_ulong = 0x00009fa0;
+#[cfg(all(target_os = "linux", target_env = "gnu", target_arch = "s390x"))]
+const PROC_SUPER_MAGIC: libc::c_uint = 0x00009fa0;

 lazy_static! {
    static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
@@ -66,6 +68,8 @@ lazy_static! {
        m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
        m.insert("slave", MsFlags::MS_SLAVE);
        m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
+        m.insert("unbindable", MsFlags::MS_UNBINDABLE);
+        m.insert("runbindable", MsFlags::MS_UNBINDABLE | MsFlags::MS_REC);
        m
    };
    static ref OPTIONS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -91,17 +95,6 @@ lazy_static! {
        m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME));
        m.insert("bind", (false, MsFlags::MS_BIND));
        m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC));
-        m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE));
-        m.insert(
-            "runbindable",
-            (false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC),
-        );
-        m.insert("private", (false, MsFlags::MS_PRIVATE));
-        m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC));
-        m.insert("shared", (false, MsFlags::MS_SHARED));
-        m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC));
-        m.insert("slave", (false, MsFlags::MS_SLAVE));
-        m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC));
        m.insert("relatime", (false, MsFlags::MS_RELATIME));
        m.insert("norelatime", (true, MsFlags::MS_RELATIME));
        m.insert("strictatime", (false, MsFlags::MS_STRICTATIME));
@@ -190,7 +183,7 @@ pub fn init_rootfs(

    let mut bind_mount_dev = false;
    for m in &spec.mounts {
-        let (mut flags, data) = parse_mount(&m);
+        let (mut flags, pgflags, data) = parse_mount(&m);
        if !m.destination.starts_with('/') || m.destination.contains("..") {
            return Err(anyhow!(
                "the mount destination {} is invalid",
@@ -232,13 +225,15 @@ pub fn init_rootfs(
            // effective.
            // first check that we have non-default options required before attempting a
            // remount
-            if m.r#type == "bind" {
-                for o in &m.options {
-                    if let Some(fl) = PROPAGATION.get(o.as_str()) {
-                        let dest = format!("{}{}", &rootfs, &m.destination);
-                        mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
-                    }
-                }
+            if m.r#type == "bind" && !pgflags.is_empty() {
+                let dest = secure_join(rootfs, &m.destination);
+                mount(
+                    None::<&str>,
+                    dest.as_str(),
+                    None::<&str>,
+                    pgflags,
+                    None::<&str>,
+                )?;
            }
        }
    }
@@ -655,26 +650,73 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
    Ok(true)
 }

-fn parse_mount(m: &Mount) -> (MsFlags, String) {
+fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
    let mut flags = MsFlags::empty();
+    let mut pgflags = MsFlags::empty();
    let mut data = Vec::new();

    for o in &m.options {
-        match OPTIONS.get(o.as_str()) {
-            Some(v) => {
-                let (clear, fl) = *v;
-                if clear {
-                    flags &= !fl;
-                } else {
-                    flags |= fl;
-                }
+        if let Some(v) = OPTIONS.get(o.as_str()) {
+            let (clear, fl) = *v;
+            if clear {
+                flags &= !fl;
+            } else {
+                flags |= fl;
            }
-
-            None => data.push(o.clone()),
+        } else if let Some(fl) = PROPAGATION.get(o.as_str()) {
+            pgflags |= *fl;
+        } else {
+            data.push(o.clone());
        }
    }

-    (flags, data.join(","))
+    (flags, pgflags, data.join(","))
+}
+
+// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
+// The resulting path is guaranteed to be ("below" / "in a directory under") the `rootfs` directory.
+//
+// Parameters:
+//
+// - `rootfs` is the absolute path to the root of the containers root filesystem directory.
+// - `unsafe_path` is path inside a container. It is unsafe since it may try to "escape" from the containers
+//    rootfs by using one or more "../" path elements or is its a symlink to path.
+fn secure_join(rootfs: &str, unsafe_path: &str) -> String {
+    let mut path = PathBuf::from(format!("{}/", rootfs));
+    let unsafe_p = Path::new(&unsafe_path);
+
+    for it in unsafe_p.iter() {
+        let it_p = Path::new(&it);
+
+        // if it_p leads with "/", path.push(it) will be replace as it, so ignore "/"
+        if it_p.has_root() {
+            continue;
+        };
+
+        path.push(it);
+        if let Ok(v) = path.read_link() {
+            if v.is_absolute() {
+                path = PathBuf::from(format!("{}{}", rootfs, v.to_str().unwrap().to_string()));
+            } else {
+                path.pop();
+                for it in v.iter() {
+                    path.push(it);
+                    if path.exists() {
+                        path = path.canonicalize().unwrap();
+                        if !path.starts_with(rootfs) {
+                            path = PathBuf::from(rootfs.to_string());
+                        }
+                    }
+                }
+            }
+        }
+        // skip any ".."
+        if path.ends_with("..") {
+            path.pop();
+        }
+    }
+
+    path.to_str().unwrap().to_string()
 }

 fn mount_from(
@@ -686,14 +728,14 @@ fn mount_from(
    _label: &str,
 ) -> Result<()> {
    let d = String::from(data);
-    let dest = format!("{}{}", rootfs, &m.destination);
+    let dest = secure_join(rootfs, &m.destination);

    let src = if m.r#type.as_str() == "bind" {
        let src = fs::canonicalize(m.source.as_str())?;
-        let dir = if src.is_file() {
-            Path::new(&dest).parent().unwrap()
-        } else {
+        let dir = if src.is_dir() {
            Path::new(&dest)
+        } else {
+            Path::new(&dest).parent().unwrap()
        };

        let _ = fs::create_dir_all(&dir).map_err(|e| {
@@ -706,7 +748,7 @@ fn mount_from(
        });

        // make sure file exists so we can bind over it
-        if src.is_file() {
+        if !src.is_dir() {
            let _ = OpenOptions::new().create(true).write(true).open(&dest);
        }
        src.to_str().unwrap().to_string()
@@ -874,7 +916,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {

    for m in spec.mounts.iter() {
        if m.destination == "/dev" {
-            let (flags, _) = parse_mount(m);
+            let (flags, _, _) = parse_mount(m);
            if flags.contains(MsFlags::MS_RDONLY) {
                mount(
                    Some("/dev"),
@@ -970,6 +1012,10 @@ fn readonly_path(path: &str) -> Result<()> {
 mod tests {
    use super::*;
    use crate::skip_if_not_root;
+    use std::fs::create_dir;
+    use std::fs::create_dir_all;
+    use std::fs::remove_dir_all;
+    use std::os::unix::fs;
    use std::os::unix::io::AsRawFd;
    use tempfile::tempdir;

@@ -999,7 +1045,7 @@ mod tests {
        );

        let rootfs = tempdir().unwrap();
-        let ret = fs::create_dir(rootfs.path().join("dev"));
+        let ret = create_dir(rootfs.path().join("dev"));
        assert!(ret.is_ok(), "Got: {:?}", ret);

        spec.root = Some(oci::Root {
@@ -1010,8 +1056,8 @@ mod tests {
        // there is no spec.mounts, but should pass
        let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // Adding bad mount point to spec.mounts
        spec.mounts.push(oci::Mount {
@@ -1029,8 +1075,8 @@ mod tests {
            ret
        );
        spec.mounts.pop();
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // mounting a cgroup
        spec.mounts.push(oci::Mount {
@@ -1043,8 +1089,8 @@ mod tests {
        let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
        spec.mounts.pop();
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // mounting /dev
        spec.mounts.push(oci::Mount {
@@ -1081,11 +1127,11 @@ mod tests {
        cgroup_mounts.insert("cpu".to_string(), "cpu".to_string());
        cgroup_mounts.insert("memory".to_string(), "memory".to_string());

-        let ret = fs::create_dir_all(tempdir.path().join("cgroups"));
+        let ret = create_dir_all(tempdir.path().join("cgroups"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
-        let ret = fs::create_dir_all(tempdir.path().join("cpu"));
+        let ret = create_dir_all(tempdir.path().join("cpu"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
-        let ret = fs::create_dir_all(tempdir.path().join("memory"));
+        let ret = create_dir_all(tempdir.path().join("memory"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);

        let ret = mount_cgroups(
@@ -1233,4 +1279,89 @@ mod tests {

        assert!(check_proc_mount(&mount).is_err());
    }
+
+    #[test]
+    fn test_secure_join() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            name: &'a str,
+            rootfs: &'a str,
+            unsafe_path: &'a str,
+            symlink_path: &'a str,
+            result: &'a str,
+        }
+
+        // create tempory directory to simulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path().to_str().unwrap();
+
+        let tests = &[
+            TestData {
+                name: "rootfs_not_exist",
+                rootfs: "/home/rootfs",
+                unsafe_path: "a/b/c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "relative_path",
+                rootfs: "/home/rootfs",
+                unsafe_path: "../../../a/b/c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "skip any ..",
+                rootfs: "/home/rootfs",
+                unsafe_path: "../../../a/../../b/../../c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "rootfs is null",
+                rootfs: "",
+                unsafe_path: "",
+                symlink_path: "",
+                result: "/",
+            },
+            TestData {
+                name: "relative softlink beyond container rootfs",
+                rootfs: rootfs_path,
+                unsafe_path: "1",
+                symlink_path: "../../../",
+                result: rootfs_path,
+            },
+            TestData {
+                name: "abs softlink points to the non-exist directory",
+                rootfs: rootfs_path,
+                unsafe_path: "2",
+                symlink_path: "/dddd",
+                result: &format!("{}/dddd", rootfs_path).as_str().to_owned(),
+            },
+            TestData {
+                name: "abs softlink points to the root",
+                rootfs: rootfs_path,
+                unsafe_path: "3",
+                symlink_path: "/",
+                result: &format!("{}/", rootfs_path).as_str().to_owned(),
+            },
+        ];
+
+        for (i, t) in tests.iter().enumerate() {
+            // Create a string containing details of the test
+            let msg = format!("test[{}]: {:?}", i, t);
+
+            // if is_symlink, then should be prepare the softlink environment
+            if t.symlink_path != "" {
+                fs::symlink(t.symlink_path, format!("{}/{}", t.rootfs, t.unsafe_path)).unwrap();
+            }
+            let result = secure_join(t.rootfs, t.unsafe_path);
+
+            // Update the test details string with the results of the call
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            // Perform the checks
+            assert!(result == t.result, "{}", msg);
+        }
+    }
 }
--- a/src/agent/rustjail/src/pipestream.rs
+++ b/src/agent/rustjail/src/pipestream.rs
@@ -0,0 +1,203 @@
+// Copyright (c) 2020 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! Async support for pipe or something has file descriptor
+
+use nix::unistd;
+use std::{
+    fmt, io,
+    io::{Read, Result, Write},
+    mem,
+    os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd},
+    pin::Pin,
+    task::{Context, Poll},
+};
+
+use futures::ready;
+use tokio::io::{unix::AsyncFd, AsyncRead, AsyncWrite, ReadBuf};
+
+fn set_nonblocking(fd: RawFd) {
+    unsafe {
+        libc::fcntl(fd, libc::F_SETFL, libc::O_NONBLOCK);
+    }
+}
+
+struct StreamFd(RawFd);
+
+impl io::Read for &StreamFd {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match unistd::read(self.0, buf) {
+            Ok(l) => Ok(l),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+}
+
+impl io::Write for &StreamFd {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match unistd::write(self.0, buf) {
+            Ok(l) => Ok(l),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+impl StreamFd {
+    fn close(&mut self) -> io::Result<()> {
+        match unistd::close(self.0) {
+            Ok(()) => Ok(()),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+}
+
+impl Drop for StreamFd {
+    fn drop(&mut self) {
+        self.close().ok();
+    }
+}
+
+impl AsRawFd for StreamFd {
+    fn as_raw_fd(&self) -> RawFd {
+        self.0
+    }
+}
+
+pub struct PipeStream(AsyncFd<StreamFd>);
+
+impl PipeStream {
+    pub fn new(fd: RawFd) -> Result<Self> {
+        set_nonblocking(fd);
+        Ok(Self(AsyncFd::new(StreamFd(fd))?))
+    }
+
+    pub fn from_fd(fd: RawFd) -> Self {
+        unsafe { Self::from_raw_fd(fd) }
+    }
+}
+
+impl AsRawFd for PipeStream {
+    fn as_raw_fd(&self) -> RawFd {
+        self.0.as_raw_fd()
+    }
+}
+
+impl IntoRawFd for PipeStream {
+    fn into_raw_fd(self) -> RawFd {
+        let fd = self.as_raw_fd();
+        mem::forget(self);
+        fd
+    }
+}
+
+impl FromRawFd for PipeStream {
+    unsafe fn from_raw_fd(fd: RawFd) -> Self {
+        Self::new(fd).unwrap()
+    }
+}
+
+impl fmt::Debug for PipeStream {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "PipeStream({})", self.as_raw_fd())
+    }
+}
+
+impl AsyncRead for PipeStream {
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<Result<()>> {
+        let b;
+        unsafe {
+            b = &mut *(buf.unfilled_mut() as *mut [mem::MaybeUninit<u8>] as *mut [u8]);
+        };
+
+        loop {
+            let mut guard = ready!(self.0.poll_read_ready(cx))?;
+
+            match guard.try_io(|inner| inner.get_ref().read(b)) {
+                Ok(Ok(n)) => {
+                    unsafe {
+                        buf.assume_init(n);
+                    }
+                    buf.advance(n);
+                    return Ok(()).into();
+                }
+                Ok(Err(e)) => return Err(e).into(),
+                Err(_would_block) => {
+                    continue;
+                }
+            }
+        }
+    }
+}
+
+impl AsyncWrite for PipeStream {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<io::Result<usize>> {
+        loop {
+            let mut guard = ready!(self.0.poll_write_ready(cx))?;
+
+            match guard.try_io(|inner| inner.get_ref().write(buf)) {
+                Ok(result) => return Poll::Ready(result),
+                Err(_would_block) => continue,
+            }
+        }
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        Poll::Ready(Ok(()))
+    }
+
+    fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        // Do nothing in shutdown is very important
+        // The only right way to shutdown pipe is drop it
+        // Otherwise PipeStream will conflict with its twins
+        // Because they both have same fd, and both registered.
+        Poll::Ready(Ok(()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nix::fcntl::OFlag;
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    #[tokio::test]
+    // Shutdown should never close the inner fd.
+    async fn test_pipestream_shutdown() {
+        let (_, wfd1) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
+        let mut writer1 = PipeStream::new(wfd1).unwrap();
+
+        // if close fd in shutdown, the fd will be reused
+        // and the test will failed
+        let _ = writer1.shutdown().await.unwrap();
+
+        // let _ = unistd::close(wfd1);
+
+        let (rfd2, wfd2) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); // reuse fd number, rfd2 == wfd1
+
+        let mut reader2 = PipeStream::new(rfd2).unwrap();
+        let mut writer2 = PipeStream::new(wfd2).unwrap();
+
+        // deregister writer1, then reader2 which has the same fd will be deregistered from epoll
+        drop(writer1);
+
+        let _ = writer2.write(b"1").await;
+
+        let mut content = vec![0u8; 1];
+        // Will Block here if shutdown close the fd.
+        let _ = reader2.read(&mut content).await;
+    }
+}
--- a/src/agent/rustjail/src/process.rs
+++ b/src/agent/rustjail/src/process.rs
@@ -6,7 +6,7 @@
 use libc::pid_t;
 use std::fs::File;
 use std::os::unix::io::RawFd;
-use std::sync::mpsc::Sender;
+use tokio::sync::mpsc::Sender;

 use nix::fcntl::{fcntl, FcntlArg, OFlag};
 use nix::sys::signal::{self, Signal};
@@ -14,18 +14,38 @@ use nix::sys::wait::{self, WaitStatus};
 use nix::unistd::{self, Pid};
 use nix::Result;

-use crate::reaper::Epoller;
 use oci::Process as OCIProcess;
 use slog::Logger;

+use crate::pipestream::PipeStream;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::io::{split, ReadHalf, WriteHalf};
+use tokio::sync::Mutex;
+use tokio::sync::Notify;
+
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+pub enum StreamType {
+    Stdin,
+    Stdout,
+    Stderr,
+    TermMaster,
+    ParentStdin,
+    ParentStdout,
+    ParentStderr,
+}
+
+type Reader = Arc<Mutex<ReadHalf<PipeStream>>>;
+type Writer = Arc<Mutex<WriteHalf<PipeStream>>>;
+
 #[derive(Debug)]
 pub struct Process {
    pub exec_id: String,
    pub stdin: Option<RawFd>,
    pub stdout: Option<RawFd>,
    pub stderr: Option<RawFd>,
-    pub exit_pipe_r: Option<RawFd>,
-    pub exit_pipe_w: Option<RawFd>,
+    pub exit_tx: Option<tokio::sync::watch::Sender<bool>>,
+    pub exit_rx: Option<tokio::sync::watch::Receiver<bool>>,
    pub extra_files: Vec<File>,
    pub term_master: Option<RawFd>,
    pub tty: bool,
@@ -41,7 +61,10 @@ pub struct Process {
    pub exit_watchers: Vec<Sender<i32>>,
    pub oci: OCIProcess,
    pub logger: Logger,
-    pub epoller: Option<Epoller>,
+    pub term_exit_notifier: Arc<Notify>,
+
+    readers: HashMap<StreamType, Reader>,
+    writers: HashMap<StreamType, Writer>,
 }

 pub trait ProcessOperations {
@@ -73,14 +96,15 @@ impl Process {
        pipe_size: i32,
    ) -> Result<Self> {
        let logger = logger.new(o!("subsystem" => "process"));
+        let (exit_tx, exit_rx) = tokio::sync::watch::channel(false);

        let mut p = Process {
            exec_id: String::from(id),
            stdin: None,
            stdout: None,
            stderr: None,
-            exit_pipe_w: None,
-            exit_pipe_r: None,
+            exit_tx: Some(exit_tx),
+            exit_rx: Some(exit_rx),
            extra_files: Vec::new(),
            tty: ocip.terminal,
            term_master: None,
@@ -93,7 +117,9 @@ impl Process {
            exit_watchers: Vec::new(),
            oci: ocip.clone(),
            logger: logger.clone(),
-            epoller: None,
+            term_exit_notifier: Arc::new(Notify::new()),
+            readers: HashMap::new(),
+            writers: HashMap::new(),
        };

        info!(logger, "before create console socket!");
@@ -116,27 +142,58 @@ impl Process {
        Ok(p)
    }

-    pub fn close_epoller(&mut self) {
-        if let Some(epoller) = self.epoller.take() {
-            epoller.close();
+    pub fn notify_term_close(&mut self) {
+        let notify = self.term_exit_notifier.clone();
+        notify.notify_one();
+    }
+
+    fn get_fd(&self, stream_type: &StreamType) -> Option<RawFd> {
+        match stream_type {
+            StreamType::Stdin => self.stdin,
+            StreamType::Stdout => self.stdout,
+            StreamType::Stderr => self.stderr,
+            StreamType::TermMaster => self.term_master,
+            StreamType::ParentStdin => self.parent_stdin,
+            StreamType::ParentStdout => self.parent_stdout,
+            StreamType::ParentStderr => self.parent_stderr,
        }
    }

-    pub fn create_epoller(&mut self) -> anyhow::Result<()> {
-        match self.term_master {
-            Some(term_master) => {
-                // add epoller to process
-                let epoller = Epoller::new(&self.logger, term_master)?;
-                self.epoller = Some(epoller)
-            }
-            None => {
-                info!(
-                    self.logger,
-                    "try to add epoller to a process without a term master fd"
-                );
-            }
+    fn get_stream_and_store(&mut self, stream_type: StreamType) -> Option<(Reader, Writer)> {
+        let fd = self.get_fd(&stream_type)?;
+        let stream = PipeStream::from_fd(fd);
+
+        let (reader, writer) = split(stream);
+        let reader = Arc::new(Mutex::new(reader));
+        let writer = Arc::new(Mutex::new(writer));
+
+        self.readers.insert(stream_type.clone(), reader.clone());
+        self.writers.insert(stream_type, writer.clone());
+
+        Some((reader, writer))
+    }
+
+    pub fn get_reader(&mut self, stream_type: StreamType) -> Option<Reader> {
+        if let Some(reader) = self.readers.get(&stream_type) {
+            return Some(reader.clone());
        }
-        Ok(())
+
+        let (reader, _) = self.get_stream_and_store(stream_type)?;
+        Some(reader)
+    }
+
+    pub fn get_writer(&mut self, stream_type: StreamType) -> Option<Writer> {
+        if let Some(writer) = self.writers.get(&stream_type) {
+            return Some(writer.clone());
+        }
+
+        let (_, writer) = self.get_stream_and_store(stream_type)?;
+        Some(writer)
+    }
+
+    pub fn close_stream(&mut self, stream_type: StreamType) {
+        let _ = self.readers.remove(&stream_type);
+        let _ = self.writers.remove(&stream_type);
    }
 }

@@ -195,7 +252,6 @@ mod tests {

        // -1 by default
        assert_eq!(process.pid, -1);
-        assert!(process.wait().is_err());
        // signal to every process in the process
        // group of the calling process.
        process.pid = 0;
--- a/src/agent/rustjail/src/reaper.rs
+++ b/src/agent/rustjail/src/reaper.rs
@@ -1,150 +0,0 @@
-// Copyright (c) 2020 Ant Group
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use nix::fcntl::OFlag;
-use slog::Logger;
-
-use nix::unistd;
-use std::os::unix::io::RawFd;
-
-use anyhow::Result;
-
-const MAX_EVENTS: usize = 2;
-
-#[derive(Debug, Clone)]
-pub struct Epoller {
-    logger: Logger,
-    epoll_fd: RawFd,
-    // rfd and wfd are a pipe's files two ends, this pipe is
-    // used to sync between the readStdio and the process exits.
-    // once the process exits, it will close one end to notify
-    // the readStdio that the process has exited and it should not
-    // wait on the process's terminal which has been inherited
-    // by it's children and hasn't exited.
-    rfd: RawFd,
-    wfd: RawFd,
-}
-
-impl Epoller {
-    pub fn new(logger: &Logger, fd: RawFd) -> Result<Epoller> {
-        let epoll_fd = epoll::create(true)?;
-        let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-
-        let mut epoller = Self {
-            logger: logger.clone(),
-            epoll_fd,
-            rfd,
-            wfd,
-        };
-
-        epoller.add(rfd)?;
-        epoller.add(fd)?;
-
-        Ok(epoller)
-    }
-
-    pub fn close_wfd(&self) {
-        let _ = unistd::close(self.wfd);
-    }
-
-    pub fn close(&self) {
-        let _ = unistd::close(self.rfd);
-        let _ = unistd::close(self.wfd);
-        let _ = unistd::close(self.epoll_fd);
-    }
-
-    fn add(&mut self, fd: RawFd) -> Result<()> {
-        info!(self.logger, "Epoller add fd {}", fd);
-        // add creates an epoll which is used to monitor the process's pty's master and
-        // one end of its exit notify pipe. Those files will be registered with level-triggered
-        // notification.
-        epoll::ctl(
-            self.epoll_fd,
-            epoll::ControlOptions::EPOLL_CTL_ADD,
-            fd,
-            epoll::Event::new(
-                epoll::Events::EPOLLHUP
-                    | epoll::Events::EPOLLIN
-                    | epoll::Events::EPOLLERR
-                    | epoll::Events::EPOLLRDHUP,
-                fd as u64,
-            ),
-        )?;
-
-        Ok(())
-    }
-
-    // There will be three cases on the epoller once it poll:
-    // a: only pty's master get an event(other than self.rfd);
-    // b: only the pipe get an event(self.rfd);
-    // c: both of pty and pipe have event occur;
-    // for case a, it means there is output in process's terminal and what needed to do is
-    // just read the terminal and send them out; for case b, it means the process has exited
-    // and there is no data in the terminal, thus just return the "EOF" to end the io;
-    // for case c, it means the process has exited but there is some data in the terminal which
-    // hasn't been send out, thus it should send those data out first and then send "EOF" last to
-    // end the io.
-    pub fn poll(&self) -> Result<RawFd> {
-        let mut rfd = self.rfd;
-        let mut epoll_events = vec![epoll::Event::new(epoll::Events::empty(), 0); MAX_EVENTS];
-
-        loop {
-            let event_count = match epoll::wait(self.epoll_fd, -1, epoll_events.as_mut_slice()) {
-                Ok(ec) => ec,
-                Err(e) => {
-                    info!(self.logger, "loop wait err {:?}", e);
-                    // EINTR: The call was interrupted by a signal handler before either
-                    // any of the requested events occurred or the timeout expired
-                    if e.kind() == std::io::ErrorKind::Interrupted {
-                        continue;
-                    }
-                    return Err(e.into());
-                }
-            };
-
-            for event in epoll_events.iter().take(event_count) {
-                let fd = event.data as i32;
-                // fd has been assigned with one end of process's exited pipe by default, and
-                // here to check is there any event occur on process's terminal, if "yes", it
-                // should be dealt first, otherwise, it means the process has exited and there
-                // is nothing left in the process's terminal needed to be read.
-                if fd != rfd {
-                    rfd = fd;
-                    break;
-                }
-            }
-            break;
-        }
-
-        Ok(rfd)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Epoller;
-    use nix::fcntl::OFlag;
-    use nix::unistd;
-    use std::thread;
-
-    #[test]
-    fn test_epoller_poll() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
-        let epoller = Epoller::new(&logger, rfd).unwrap();
-
-        let child = thread::spawn(move || {
-            let _ = unistd::write(wfd, "temporary file's content".as_bytes());
-        });
-
-        // wait write to finish
-        let _ = child.join();
-
-        let fd = epoller.poll().unwrap();
-        assert_eq!(fd, rfd, "Should get rfd");
-
-        epoller.close();
-    }
-}
--- a/src/agent/rustjail/src/sync.rs
+++ b/src/agent/rustjail/src/sync.rs
@@ -14,8 +14,8 @@ pub const SYNC_SUCCESS: i32 = 1;
 pub const SYNC_FAILED: i32 = 2;
 pub const SYNC_DATA: i32 = 3;

-const DATA_SIZE: usize = 100;
-const MSG_SIZE: usize = mem::size_of::<i32>();
+pub const DATA_SIZE: usize = 100;
+pub const MSG_SIZE: usize = mem::size_of::<i32>();

 #[macro_export]
 macro_rules! log_child {
--- a/src/agent/rustjail/src/sync_with_async.rs
+++ b/src/agent/rustjail/src/sync_with_async.rs
@@ -0,0 +1,140 @@
+// Copyright (c) 2020 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! The async version of sync module used for IPC
+
+use crate::pipestream::PipeStream;
+use anyhow::{anyhow, Result};
+use nix::errno::Errno;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+use crate::sync::{DATA_SIZE, MSG_SIZE, SYNC_DATA, SYNC_FAILED, SYNC_SUCCESS};
+
+async fn write_count(pipe_w: &mut PipeStream, buf: &[u8], count: usize) -> Result<usize> {
+    let mut len = 0;
+
+    loop {
+        match pipe_w.write(&buf[len..]).await {
+            Ok(l) => {
+                len += l;
+                if len == count {
+                    break;
+                }
+            }
+
+            Err(e) => {
+                if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
+                    return Err(e.into());
+                }
+            }
+        }
+    }
+
+    Ok(len)
+}
+
+async fn read_count(pipe_r: &mut PipeStream, count: usize) -> Result<Vec<u8>> {
+    let mut v: Vec<u8> = vec![0; count];
+    let mut len = 0;
+
+    loop {
+        match pipe_r.read(&mut v[len..]).await {
+            Ok(l) => {
+                len += l;
+                if len == count || l == 0 {
+                    break;
+                }
+            }
+
+            Err(e) => {
+                if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
+                    return Err(e.into());
+                }
+            }
+        }
+    }
+
+    Ok(v[0..len].to_vec())
+}
+
+pub async fn read_async(pipe_r: &mut PipeStream) -> Result<Vec<u8>> {
+    let buf = read_count(pipe_r, MSG_SIZE).await?;
+    if buf.len() != MSG_SIZE {
+        return Err(anyhow!(
+            "process: {} failed to receive async message from peer: got msg length: {}, expected: {}",
+            std::process::id(),
+            buf.len(),
+            MSG_SIZE
+        ));
+    }
+    let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
+    let msg: i32 = i32::from_be_bytes(buf_array);
+    match msg {
+        SYNC_SUCCESS => Ok(Vec::new()),
+        SYNC_DATA => {
+            let buf = read_count(pipe_r, MSG_SIZE).await?;
+            let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
+            let msg_length: i32 = i32::from_be_bytes(buf_array);
+            let data_buf = read_count(pipe_r, msg_length as usize).await?;
+
+            Ok(data_buf)
+        }
+        SYNC_FAILED => {
+            let mut error_buf = vec![];
+            loop {
+                let buf = read_count(pipe_r, DATA_SIZE).await?;
+
+                error_buf.extend(&buf);
+                if DATA_SIZE == buf.len() {
+                    continue;
+                } else {
+                    break;
+                }
+            }
+
+            let error_str = match std::str::from_utf8(&error_buf) {
+                Ok(v) => String::from(v),
+                Err(e) => {
+                    return Err(
+                        anyhow!(e).context("receive error message from child process failed")
+                    );
+                }
+            };
+
+            Err(anyhow!(error_str))
+        }
+        _ => Err(anyhow!("error in receive sync message")),
+    }
+}
+
+pub async fn write_async(pipe_w: &mut PipeStream, msg_type: i32, data_str: &str) -> Result<()> {
+    let buf = msg_type.to_be_bytes();
+    let count = write_count(pipe_w, &buf, MSG_SIZE).await?;
+    if count != MSG_SIZE {
+        return Err(anyhow!("error in send sync message"));
+    }
+
+    match msg_type {
+        SYNC_FAILED => {
+            if let Err(e) = write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
+                return Err(anyhow!(e).context("error in send message to process"));
+            }
+        }
+        SYNC_DATA => {
+            let length: i32 = data_str.len() as i32;
+            write_count(pipe_w, &length.to_be_bytes(), MSG_SIZE)
+                .await
+                .map_err(|e| anyhow!(e).context("error in send message to process"))?;
+
+            write_count(pipe_w, data_str.as_bytes(), data_str.len())
+                .await
+                .map_err(|e| anyhow!(e).context("error in send message to process"))?;
+        }
+
+        _ => (),
+    };
+
+    Ok(())
+}
--- a/src/agent/rustjail/src/utils.rs
+++ b/src/agent/rustjail/src/utils.rs
@@ -0,0 +1,119 @@
+// Copyright (c) 2021 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+use anyhow::{anyhow, Context, Result};
+use libc::gid_t;
+use libc::uid_t;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+
+const PASSWD_FILE: &str = "/etc/passwd";
+
+// An entry from /etc/passwd
+#[derive(Debug, PartialEq, PartialOrd)]
+pub struct PasswdEntry {
+    // username
+    pub name: String,
+    // user password
+    pub passwd: String,
+    // user id
+    pub uid: uid_t,
+    // group id
+    pub gid: gid_t,
+    // user Information
+    pub gecos: String,
+    // home directory
+    pub dir: String,
+    // User's Shell
+    pub shell: String,
+}
+
+// get an entry for a given `uid` from `/etc/passwd`
+fn get_entry_by_uid(uid: uid_t, path: &str) -> Result<PasswdEntry> {
+    let file = File::open(path).with_context(|| format!("open file {}", path))?;
+    let mut reader = BufReader::new(file);
+
+    let mut line = String::new();
+    loop {
+        line.clear();
+        match reader.read_line(&mut line) {
+            Ok(0) => return Err(anyhow!(format!("file {} is empty", path))),
+            Ok(_) => (),
+            Err(e) => {
+                return Err(anyhow!(format!(
+                    "failed to read file {} with {:?}",
+                    path, e
+                )))
+            }
+        }
+
+        if line.starts_with('#') {
+            continue;
+        }
+
+        let parts: Vec<&str> = line.split(':').map(|part| part.trim()).collect();
+        if parts.len() != 7 {
+            continue;
+        }
+
+        match parts[2].parse() {
+            Err(_e) => continue,
+            Ok(new_uid) => {
+                if uid != new_uid {
+                    continue;
+                }
+
+                let entry = PasswdEntry {
+                    name: parts[0].to_string(),
+                    passwd: parts[1].to_string(),
+                    uid: new_uid,
+                    gid: parts[3].parse().unwrap_or(0),
+                    gecos: parts[4].to_string(),
+                    dir: parts[5].to_string(),
+                    shell: parts[6].to_string(),
+                };
+
+                return Ok(entry);
+            }
+        }
+    }
+}
+
+pub fn home_dir(uid: uid_t) -> Result<String> {
+    get_entry_by_uid(uid, PASSWD_FILE).map(|entry| entry.dir)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+    use tempfile::Builder;
+
+    #[test]
+    fn test_get_entry_by_uid() {
+        let tmpdir = Builder::new().tempdir().unwrap();
+        let tmpdir_path = tmpdir.path().to_str().unwrap();
+        let temp_passwd = format!("{}/passwd", tmpdir_path);
+
+        let mut tempf = File::create(temp_passwd.as_str()).unwrap();
+        writeln!(tempf, "root:x:0:0:root:/root0:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:1:0:root:/root1:/bin/bash").unwrap();
+        writeln!(tempf, "#root:x:1:0:root:/rootx:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:2:0:root:/root2:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:3:0:root:/root3").unwrap();
+        writeln!(tempf, "root:x:3:0:root:/root3:/bin/bash").unwrap();
+
+        let entry = get_entry_by_uid(0, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root0");
+
+        let entry = get_entry_by_uid(1, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root1");
+
+        let entry = get_entry_by_uid(2, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root2");
+
+        let entry = get_entry_by_uid(3, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root3");
+    }
+}
--- a/src/agent/rustjail/src/validator.rs
+++ b/src/agent/rustjail/src/validator.rs
@@ -6,7 +6,7 @@
 use crate::container::Config;
 use anyhow::{anyhow, Context, Error, Result};
 use nix::errno::Errno;
-use oci::{Linux, LinuxIDMapping, LinuxNamespace, Spec};
+use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
 use std::collections::HashMap;
 use std::path::{Component, PathBuf};

@@ -78,12 +78,8 @@ fn rootfs(root: &str) -> Result<()> {
    Ok(())
 }

-fn network(_oci: &Spec) -> Result<()> {
-    Ok(())
-}
-
 fn hostname(oci: &Spec) -> Result<()> {
-    if oci.hostname.is_empty() || oci.hostname == "" {
+    if oci.hostname.is_empty() {
        return Ok(());
    }

@@ -111,7 +107,7 @@ fn security(oci: &Spec) -> Result<()> {
    Ok(())
 }

-fn idmapping(maps: &[LinuxIDMapping]) -> Result<()> {
+fn idmapping(maps: &[LinuxIdMapping]) -> Result<()> {
    for map in maps {
        if map.size > 0 {
            return Ok(());
@@ -242,7 +238,7 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
    Ok(())
 }

-fn has_idmapping(maps: &[LinuxIDMapping], id: u32) -> bool {
+fn has_idmapping(maps: &[LinuxIdMapping], id: u32) -> bool {
    for map in maps {
        if id >= map.container_id && id < map.container_id + map.size {
            return true;
@@ -301,7 +297,6 @@ pub fn validate(conf: &Config) -> Result<()> {
    };

    rootfs(root).context("rootfs")?;
-    network(oci).context("network")?;
    hostname(oci).context("hostname")?;
    security(oci).context("security")?;
    usernamespace(oci).context("usernamespace")?;
@@ -446,7 +441,7 @@ mod tests {
        usernamespace(&spec).unwrap();

        let mut linux = Linux::default();
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 0,
@@ -455,7 +450,7 @@ mod tests {
        usernamespace(&spec).unwrap_err();

        let mut linux = Linux::default();
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 100,
@@ -502,12 +497,12 @@ mod tests {
                path: "/sys/cgroups/user".to_owned(),
            },
        ];
-        linux.uid_mappings = vec![LinuxIDMapping {
+        linux.uid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 1000,
        }];
-        linux.gid_mappings = vec![LinuxIDMapping {
+        linux.gid_mappings = vec![LinuxIdMapping {
            container_id: 0,
            host_id: 1000,
            size: 1000,
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
@@ -10,6 +10,7 @@ use std::time;
 const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
 const DEV_MODE_FLAG: &str = "agent.devmode";
 const LOG_LEVEL_OPTION: &str = "agent.log";
+const SERVER_ADDR_OPTION: &str = "agent.server_addr";
 const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
 const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
 const LOG_VPORT_OPTION: &str = "agent.log_vport";
@@ -26,12 +27,24 @@ const VSOCK_PORT: u16 = 1024;
 const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
 const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";

-// FIXME: unused
-const TRACE_MODE_FLAG: &str = "agent.trace";
-const USE_VSOCK_FLAG: &str = "agent.use_vsock";
+const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
+const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
+const ERR_INVALID_GET_VALUE_PARAM: &str = "expected name=value";
+const ERR_INVALID_GET_VALUE_NO_NAME: &str = "name=value parameter missing name";
+const ERR_INVALID_GET_VALUE_NO_VALUE: &str = "name=value parameter missing value";
+const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
+
+const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
+const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
+const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
+
+const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
+const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
+const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
+const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";

 #[derive(Debug)]
-pub struct agentConfig {
+pub struct AgentConfig {
    pub debug_console: bool,
    pub dev_mode: bool,
    pub log_level: slog::Level,
@@ -73,9 +86,9 @@ macro_rules! parse_cmdline_param {
    };
 }

-impl agentConfig {
-    pub fn new() -> agentConfig {
-        agentConfig {
+impl AgentConfig {
+    pub fn new() -> AgentConfig {
+        AgentConfig {
            debug_console: false,
            dev_mode: false,
            log_level: DEFAULT_LOG_LEVEL,
@@ -98,6 +111,12 @@ impl agentConfig {

            // parse cmdline options
            parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
+            parse_cmdline_param!(
+                param,
+                SERVER_ADDR_OPTION,
+                self.server_addr,
+                get_string_value
+            );

            // ensure the timeout is a positive value
            parse_cmdline_param!(
@@ -105,7 +124,7 @@ impl agentConfig {
                HOTPLUG_TIMOUT_OPTION,
                self.hotplug_timeout,
                get_hotplug_timeout,
-                |hotplugTimeout: time::Duration| hotplugTimeout.as_secs() > 0
+                |hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
            );

            // vsock port should be positive values
@@ -181,7 +200,7 @@ fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
        "trace" => slog::Level::Trace,

        _ => {
-            return Err(anyhow!("invalid log level"));
+            return Err(anyhow!(ERR_INVALID_LOG_LEVEL));
        }
    };

@@ -192,11 +211,11 @@ fn get_log_level(param: &str) -> Result<slog::Level> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid log level parameter"));
+        return Err(anyhow!(ERR_INVALID_LOG_LEVEL_PARAM));
    }

    if fields[0] != LOG_LEVEL_OPTION {
-        Err(anyhow!("invalid log level key name"))
+        Err(anyhow!(ERR_INVALID_LOG_LEVEL_KEY))
    } else {
        Ok(logrus_to_slog_level(fields[1])?)
    }
@@ -206,17 +225,17 @@ fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid hotplug timeout parameter"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT));
    }

    let key = fields[0];
    if key != HOTPLUG_TIMOUT_OPTION {
-        return Err(anyhow!("invalid hotplug timeout key name"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_KEY));
    }

    let value = fields[1].parse::<u64>();
    if value.is_err() {
-        return Err(anyhow!("unable to parse hotplug timeout"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_PARAM));
    }

    Ok(time::Duration::from_secs(value.unwrap()))
@@ -238,26 +257,54 @@ fn get_bool_value(param: &str) -> Result<bool> {
    })
 }

+// Return the value from a "name=value" string.
+//
+// Note:
+//
+// - A name *and* a value is required.
+// - A value can contain any number of equal signs.
+// - We could/should maybe check if the name is pure whitespace
+//   since this is considered to be invalid.
+fn get_string_value(param: &str) -> Result<String> {
+    let fields: Vec<&str> = param.split('=').collect();
+
+    if fields.len() < 2 {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_PARAM));
+    }
+
+    // We need name (but the value can be blank)
+    if fields[0].is_empty() {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_NAME));
+    }
+
+    let value = fields[1..].join("=");
+    if value.is_empty() {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_VALUE));
+    }
+
+    Ok(value)
+}
+
 fn get_container_pipe_size(param: &str) -> Result<i32> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid container pipe size parameter"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE));
    }

    let key = fields[0];
    if key != CONTAINER_PIPE_SIZE_OPTION {
-        return Err(anyhow!("invalid container pipe size key name"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_KEY));
    }

    let res = fields[1].parse::<i32>();
    if res.is_err() {
-        return Err(anyhow!("unable to parse container pipe size"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM));
    }

    let value = res.unwrap();
    if value < 0 {
-        return Err(anyhow!("container pipe size should not be negative"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_NEGATIVE));
    }

    Ok(value)
@@ -272,19 +319,6 @@ mod tests {
    use std::time;
    use tempfile::tempdir;

-    const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
-    const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
-    const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
-
-    const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
-    const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
-    const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
-
-    const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
-    const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
-    const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
-    const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
-
    // helper function to make errors less crazy-long
    fn make_err(desc: &str) -> Error {
        anyhow!(desc.to_string())
@@ -300,22 +334,25 @@ mod tests {
            if $expected_result.is_ok() {
                let expected_level = $expected_result.as_ref().unwrap();
                let actual_level = $actual_result.unwrap();
-                assert!(*expected_level == actual_level, $msg);
+                assert!(*expected_level == actual_level, "{}", $msg);
            } else {
                let expected_error = $expected_result.as_ref().unwrap_err();
-                let actual_error = $actual_result.unwrap_err();
-
                let expected_error_msg = format!("{:?}", expected_error);
-                let actual_error_msg = format!("{:?}", actual_error);

-                assert!(expected_error_msg == actual_error_msg, $msg);
+                if let Err(actual_error) = $actual_result {
+                    let actual_error_msg = format!("{:?}", actual_error);
+
+                    assert!(expected_error_msg == actual_error_msg, "{}", $msg);
+                } else {
+                    assert!(expected_error_msg == "expected error, got OK", "{}", $msg);
+                }
            }
        };
    }

    #[test]
    fn test_new() {
-        let config = agentConfig::new();
+        let config = AgentConfig::new();
        assert_eq!(config.debug_console, false);
        assert_eq!(config.dev_mode, false);
        assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -813,6 +850,61 @@ mod tests {
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
            },
+            TestData {
+                contents: "server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: "agent.server_address=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: "agent.server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: " agent.server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: " agent.server_addr=unix:///tmp/foo.socket a",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+            },
        ];

        let dir = tempdir().expect("failed to create tmpdir");
@@ -822,7 +914,7 @@ mod tests {

        let filename = file_path.to_str().expect("failed to create filename");

-        let mut config = agentConfig::new();
+        let mut config = AgentConfig::new();
        let result = config.parse_cmdline(&filename.to_owned());
        assert!(result.is_err());

@@ -854,7 +946,7 @@ mod tests {
                vars_to_unset.push(name);
            }

-            let mut config = agentConfig::new();
+            let mut config = AgentConfig::new();
            assert_eq!(config.debug_console, false, "{}", msg);
            assert_eq!(config.dev_mode, false, "{}", msg);
            assert_eq!(config.unified_cgroup_hierarchy, false, "{}", msg);
@@ -1199,4 +1291,82 @@ mod tests {
            assert_result!(d.result, result, msg);
        }
    }
+
+    #[test]
+    fn test_get_string_value() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            param: &'a str,
+            result: Result<String>,
+        }
+
+        let tests = &[
+            TestData {
+                param: "",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_PARAM)),
+            },
+            TestData {
+                param: "=",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
+            },
+            TestData {
+                param: "==",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
+            },
+            TestData {
+                param: "x=",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_VALUE)),
+            },
+            TestData {
+                param: "x==",
+                result: Ok("=".into()),
+            },
+            TestData {
+                param: "x===",
+                result: Ok("==".into()),
+            },
+            TestData {
+                param: "x==x",
+                result: Ok("=x".into()),
+            },
+            TestData {
+                param: "x=x",
+                result: Ok("x".into()),
+            },
+            TestData {
+                param: "x=x=",
+                result: Ok("x=".into()),
+            },
+            TestData {
+                param: "x=x=x",
+                result: Ok("x=x".into()),
+            },
+            TestData {
+                param: "foo=bar",
+                result: Ok("bar".into()),
+            },
+            TestData {
+                param: "x= =",
+                result: Ok(" =".into()),
+            },
+            TestData {
+                param: "x= =",
+                result: Ok(" =".into()),
+            },
+            TestData {
+                param: "x= = ",
+                result: Ok(" = ".into()),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = get_string_value(d.param);
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            assert_result!(d.result, result, msg);
+        }
+    }
 }
--- a/src/agent/src/console.rs
+++ b/src/agent/src/console.rs
@@ -0,0 +1,294 @@
+// Copyright (c) 2021 Ant Group
+// Copyright (c) 2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::util;
+use anyhow::{anyhow, Result};
+use nix::fcntl::{self, FcntlArg, FdFlag, OFlag};
+use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
+use nix::pty::{openpty, OpenptyResult};
+use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
+use nix::sys::stat::Mode;
+use nix::sys::wait;
+use nix::unistd::{self, close, dup2, fork, setsid, ForkResult, Pid};
+use rustjail::pipestream::PipeStream;
+use slog::Logger;
+use std::ffi::CString;
+use std::os::unix::io::{FromRawFd, RawFd};
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::sync::Mutex as SyncMutex;
+
+use futures::StreamExt;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::select;
+use tokio::sync::watch::Receiver;
+
+const CONSOLE_PATH: &str = "/dev/console";
+
+lazy_static! {
+    static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
+        let mut v = Vec::new();
+
+        if !cfg!(test) {
+            v.push("/bin/bash".to_string());
+            v.push("/bin/sh".to_string());
+        }
+
+        Arc::new(SyncMutex::new(v))
+    };
+}
+
+pub fn initialize() {
+    lazy_static::initialize(&SHELLS);
+}
+
+pub async fn debug_console_handler(
+    logger: Logger,
+    port: u32,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "debug-console"));
+
+    let shells = SHELLS.lock().unwrap().to_vec();
+
+    let shell = shells
+        .into_iter()
+        .find(|sh| PathBuf::from(sh).exists())
+        .ok_or_else(|| anyhow!("no shell found to launch debug console"))?;
+
+    if port > 0 {
+        let listenfd = socket::socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )?;
+        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
+        socket::bind(listenfd, &addr)?;
+        socket::listen(listenfd, 1)?;
+
+        let mut incoming = util::get_vsock_incoming(listenfd);
+
+        loop {
+            select! {
+                _ = shutdown.changed() => {
+                    info!(logger, "debug console got shutdown request");
+                    break;
+                }
+
+                conn = incoming.next() => {
+                    if let Some(conn) = conn {
+                        // Accept a new connection
+                        match conn {
+                            Ok(stream) => {
+                                let logger = logger.clone();
+                                let shell = shell.clone();
+                                // Do not block(await) here, or we'll never receive the shutdown signal
+                                tokio::spawn(async move {
+                                    let _ = run_debug_console_vsock(logger, shell, stream).await;
+                                });
+                            }
+                            Err(e) => {
+                                error!(logger, "{:?}", e);
+                            }
+                        }
+                    } else {
+                        break;
+                    }
+                }
+            }
+        }
+    } else {
+        let mut flags = OFlag::empty();
+        flags.insert(OFlag::O_RDWR);
+        flags.insert(OFlag::O_CLOEXEC);
+
+        let fd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
+
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "debug console got shutdown request");
+            }
+
+            result = run_debug_console_serial(shell.clone(), fd) => {
+               match result {
+                   Ok(_) => {
+                       info!(logger, "run_debug_console_shell session finished");
+                   }
+                   Err(err) => {
+                       error!(logger, "run_debug_console_shell failed: {:?}", err);
+                   }
+               }
+            }
+        }
+    };
+
+    Ok(())
+}
+
+fn run_in_child(slave_fd: libc::c_int, shell: String) -> Result<()> {
+    // create new session with child as session leader
+    setsid()?;
+
+    // dup stdin, stdout, stderr to let child act as a terminal
+    dup2(slave_fd, STDIN_FILENO)?;
+    dup2(slave_fd, STDOUT_FILENO)?;
+    dup2(slave_fd, STDERR_FILENO)?;
+
+    // set tty
+    unsafe {
+        libc::ioctl(0, libc::TIOCSCTTY);
+    }
+
+    let cmd = CString::new(shell).unwrap();
+
+    // run shell
+    let _ = unistd::execvp(cmd.as_c_str(), &[]).map_err(|e| match e {
+        nix::Error::Sys(errno) => {
+            std::process::exit(errno as i32);
+        }
+        _ => std::process::exit(-2),
+    });
+
+    Ok(())
+}
+
+async fn run_in_parent<T: AsyncRead + AsyncWrite>(
+    logger: Logger,
+    stream: T,
+    pseudo: OpenptyResult,
+    child_pid: Pid,
+) -> Result<()> {
+    info!(logger, "get debug shell pid {:?}", child_pid);
+
+    let master_fd = pseudo.master;
+    let _ = close(pseudo.slave);
+
+    let (mut socket_reader, mut socket_writer) = tokio::io::split(stream);
+    let (mut master_reader, mut master_writer) = tokio::io::split(PipeStream::from_fd(master_fd));
+
+    select! {
+        res = tokio::io::copy(&mut master_reader, &mut socket_writer) => {
+            debug!(
+                logger,
+                "master closed: {:?}", res
+            );
+        }
+        res = tokio::io::copy(&mut socket_reader, &mut master_writer) => {
+            info!(
+                logger,
+                "socket closed: {:?}", res
+            );
+        }
+    }
+
+    let wait_status = wait::waitpid(child_pid, None);
+    info!(logger, "debug console process exit code: {:?}", wait_status);
+
+    Ok(())
+}
+
+async fn run_debug_console_vsock<T: AsyncRead + AsyncWrite>(
+    logger: Logger,
+    shell: String,
+    stream: T,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "debug-console-shell"));
+
+    let pseudo = openpty(None, None)?;
+    let _ = fcntl::fcntl(pseudo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
+    let _ = fcntl::fcntl(pseudo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
+
+    let slave_fd = pseudo.slave;
+
+    match fork() {
+        Ok(ForkResult::Child) => run_in_child(slave_fd, shell),
+        Ok(ForkResult::Parent { child: child_pid }) => {
+            run_in_parent(logger.clone(), stream, pseudo, child_pid).await
+        }
+        Err(err) => Err(anyhow!("fork error: {:?}", err)),
+    }
+}
+
+async fn run_debug_console_serial(shell: String, fd: RawFd) -> Result<()> {
+    let mut child = match tokio::process::Command::new(shell)
+        .arg("-i")
+        .kill_on_drop(true)
+        .stdin(unsafe { Stdio::from_raw_fd(fd) })
+        .stdout(unsafe { Stdio::from_raw_fd(fd) })
+        .stderr(unsafe { Stdio::from_raw_fd(fd) })
+        .spawn()
+    {
+        Ok(c) => c,
+        Err(_) => return Err(anyhow!("failed to spawn shell")),
+    };
+
+    child.wait().await?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+    use tokio::sync::watch;
+
+    #[tokio::test]
+    async fn test_setup_debug_console_no_shells() {
+        {
+            // Guarantee no shells have been added
+            // (required to avoid racing with
+            // test_setup_debug_console_invalid_shell()).
+            let shells_ref = SHELLS.clone();
+            let mut shells = shells_ref.lock().unwrap();
+            shells.clear();
+        }
+
+        let logger = slog_scope::logger();
+
+        let (_, rx) = watch::channel(true);
+        let result = debug_console_handler(logger, 0, rx).await;
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "no shell found to launch debug console"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_setup_debug_console_invalid_shell() {
+        {
+            let shells_ref = SHELLS.clone();
+            let mut shells = shells_ref.lock().unwrap();
+
+            let dir = tempdir().expect("failed to create tmpdir");
+
+            // Add an invalid shell
+            let shell = dir
+                .path()
+                .join("enoent")
+                .to_str()
+                .expect("failed to construct shell path")
+                .to_string();
+
+            shells.push(shell);
+        }
+
+        let logger = slog_scope::logger();
+
+        let (_, rx) = watch::channel(true);
+        let result = debug_console_handler(logger, 0, rx).await;
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "no shell found to launch debug console"
+        );
+    }
+}
--- a/src/agent/src/device.rs
+++ b/src/agent/src/device.rs
@@ -5,16 +5,20 @@

 use libc::{c_uint, major, minor};
 use nix::sys::stat;
+use regex::Regex;
 use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::MetadataExt;
 use std::path::Path;
-use std::sync::{mpsc, Arc, Mutex};
+use std::str::FromStr;
+use std::sync::Arc;
+use tokio::sync::Mutex;

 use crate::linux_abi::*;
-use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE};
+use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
+use crate::pci;
 use crate::sandbox::Sandbox;
-use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER};
+use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
 use anyhow::{anyhow, Result};
 use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
 use protocols::agent::Device;
@@ -35,22 +39,6 @@ struct DevIndexEntry {

 struct DevIndex(HashMap<String, DevIndexEntry>);

-// DeviceHandler is the type of callback to be defined to handle every type of device driver.
-type DeviceHandler = fn(&Device, &mut Spec, &Arc<Mutex<Sandbox>>, &DevIndex) -> Result<()>;
-
-// DEVICEHANDLERLIST lists the supported drivers.
-#[rustfmt::skip]
-lazy_static! {
-    static ref DEVICEHANDLERLIST: HashMap<&'static str, DeviceHandler> = {
-        let mut m: HashMap<&'static str, DeviceHandler> = HashMap::new();
-        m.insert(DRIVERBLKTYPE, virtio_blk_device_handler);
-        m.insert(DRIVERMMIOBLKTYPE, virtiommio_blk_device_handler);
-        m.insert(DRIVERNVDIMMTYPE, virtio_nvdimm_device_handler);
-        m.insert(DRIVERSCSITYPE, virtio_scsi_device_handler);
-        m
-    };
-}
-
 pub fn rescan_pci_bus() -> Result<()> {
    online_device(SYSFS_PCI_BUS_RESCAN_FILE)
 }
@@ -60,107 +48,156 @@ pub fn online_device(path: &str) -> Result<()> {
    Ok(())
 }

-// get_pci_device_address fetches the complete PCI address in sysfs, based on the PCI
-// identifier provided. This should be in the format: "bridgeAddr/deviceAddr".
-// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
-// while deviceAddr is the address at which the device is attached on the bridge.
-fn get_pci_device_address(pci_id: &str) -> Result<String> {
-    let tokens: Vec<&str> = pci_id.split('/').collect();
+// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
+// the sysfs path for the PCI host bridge, based on the PCI path
+// provided.
+fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
+    let mut bus = "0000:00".to_string();
+    let mut relpath = String::new();

-    if tokens.len() != 2 {
-        return Err(anyhow!(
-            "PCI Identifier for device should be of format [bridgeAddr/deviceAddr], got {}",
-            pci_id
-        ));
-    }
+    for i in 0..pcipath.len() {
+        let bdf = format!("{}:{}.0", bus, pcipath[i]);

-    let bridge_id = tokens[0];
-    let device_id = tokens[1];
+        relpath = format!("{}/{}", relpath, bdf);

-    // Deduce the complete bridge address based on the bridge address identifier passed
-    // and the fact that bridges are attached on the main bus with function 0.
-    let pci_bridge_addr = format!("0000:00:{}.0", bridge_id);
-
-    // Find out the bus exposed by bridge
-    let bridge_bus_path = format!("{}/{}/pci_bus/", SYSFS_PCI_BUS_PREFIX, pci_bridge_addr);
-
-    let files_slice: Vec<_> = fs::read_dir(&bridge_bus_path)
-        .unwrap()
-        .map(|res| res.unwrap().path())
-        .collect();
-    let bus_num = files_slice.len();
-
-    if bus_num != 1 {
-        return Err(anyhow!(
-            "Expected an entry for bus in {}, got {} entries instead",
-            bridge_bus_path,
-            bus_num
-        ));
-    }
-
-    let bus = files_slice[0].file_name().unwrap().to_str().unwrap();
-
-    // Device address is based on the bus of the bridge to which it is attached.
-    // We do not pass devices as multifunction, hence the trailing 0 in the address.
-    let pci_device_addr = format!("{}:{}.0", bus, device_id);
-
-    let bridge_device_pci_addr = format!("{}/{}", pci_bridge_addr, pci_device_addr);
-
-    info!(
-        sl!(),
-        "Fetched PCI address for device PCIAddr:{}\n", bridge_device_pci_addr
-    );
-
-    Ok(bridge_device_pci_addr)
-}
-
-fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
-    // Keep the same lock order as uevent::handle_block_add_event(), otherwise it may cause deadlock.
-    let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
-    let sb = sandbox.lock().unwrap();
-    for (key, value) in sb.pci_device_map.iter() {
-        if key.contains(dev_addr) {
-            info!(sl!(), "Device {} found in pci device map", dev_addr);
-            return Ok(format!("{}/{}", SYSTEM_DEV_PATH, value));
+        if i == pcipath.len() - 1 {
+            // Final device need not be a bridge
+            break;
        }
+
+        // Find out the bus exposed by bridge
+        let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath);
+        let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect();
+
+        if files.len() != 1 {
+            return Err(anyhow!(
+                "Expected exactly one PCI bus in {}, got {} instead",
+                bridgebuspath,
+                files.len()
+            ));
+        }
+
+        // unwrap is safe, because of the length test above
+        let busfile = files.pop().unwrap()?;
+        bus = busfile
+            .file_name()
+            .into_string()
+            .map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?;
    }
-    drop(sb);

-    // If device is not found in the device map, hotplug event has not
-    // been received yet, create and add channel to the watchers map.
-    // The key of the watchers map is the device we are interested in.
-    // Note this is done inside the lock, not to miss any events from the
-    // global udev listener.
-    let (tx, rx) = mpsc::channel::<String>();
-    w.insert(dev_addr.to_string(), tx);
-    drop(w);
-
-    info!(sl!(), "Waiting on channel for device notification\n");
-    let hotplug_timeout = AGENT_CONFIG.read().unwrap().hotplug_timeout;
-    let dev_name = rx.recv_timeout(hotplug_timeout).map_err(|_| {
-        GLOBAL_DEVICE_WATCHER.lock().unwrap().remove_entry(dev_addr);
-        anyhow!(
-            "Timeout reached after {:?} waiting for device {}",
-            hotplug_timeout,
-            dev_addr
-        )
-    })?;
-
-    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
+    Ok(relpath)
 }

-pub fn get_scsi_device_name(sandbox: &Arc<Mutex<Sandbox>>, scsi_addr: &str) -> Result<String> {
-    let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX);
+// FIXME: This matcher is only correct if the guest has at most one
+// SCSI host.
+#[derive(Debug)]
+struct ScsiBlockMatcher {
+    search: String,
+}
+
+impl ScsiBlockMatcher {
+    fn new(scsi_addr: &str) -> ScsiBlockMatcher {
+        let search = format!(r"/0:0:{}/block/", scsi_addr);
+
+        ScsiBlockMatcher { search }
+    }
+}
+
+impl UeventMatcher for ScsiBlockMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block" && uev.devpath.contains(&self.search) && !uev.devname.is_empty()
+    }
+}
+
+pub async fn get_scsi_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    scsi_addr: &str,
+) -> Result<String> {
+    let matcher = ScsiBlockMatcher::new(scsi_addr);

    scan_scsi_bus(scsi_addr)?;
-    get_device_name(sandbox, &dev_sub_path)
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
 }

-pub fn get_pci_device_name(sandbox: &Arc<Mutex<Sandbox>>, pci_id: &str) -> Result<String> {
-    let pci_addr = get_pci_device_address(pci_id)?;
+#[derive(Debug)]
+struct VirtioBlkPciMatcher {
+    rex: Regex,
+}
+
+impl VirtioBlkPciMatcher {
+    fn new(relpath: &str) -> VirtioBlkPciMatcher {
+        let root_bus = create_pci_root_bus_path();
+        let re = format!(r"^{}{}/virtio[0-9]+/block/", root_bus, relpath);
+        VirtioBlkPciMatcher {
+            rex: Regex::new(&re).unwrap(),
+        }
+    }
+}
+
+impl UeventMatcher for VirtioBlkPciMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty()
+    }
+}
+
+pub async fn get_virtio_blk_pci_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    pcipath: &pci::Path,
+) -> Result<String> {
+    let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
+    let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
+    let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);

    rescan_pci_bus()?;
-    get_device_name(sandbox, &pci_addr)
+
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
+}
+
+#[derive(Debug)]
+struct PmemBlockMatcher {
+    suffix: String,
+}
+
+impl PmemBlockMatcher {
+    fn new(devname: &str) -> PmemBlockMatcher {
+        let suffix = format!(r"/block/{}", devname);
+
+        PmemBlockMatcher { suffix }
+    }
+}
+
+impl UeventMatcher for PmemBlockMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.subsystem == "block"
+            && uev.devpath.starts_with(ACPI_DEV_PATH)
+            && uev.devpath.ends_with(&self.suffix)
+            && !uev.devname.is_empty()
+    }
+}
+
+pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str) -> Result<()> {
+    let devname = match devpath.strip_prefix("/dev/") {
+        Some(dev) => dev,
+        None => {
+            return Err(anyhow!(
+                "Storage source '{}' must start with /dev/",
+                devpath
+            ))
+        }
+    };
+
+    let matcher = PmemBlockMatcher::new(devname);
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    if uev.devname != devname {
+        return Err(anyhow!(
+            "Unexpected device name {} for pmem device (expected {})",
+            uev.devname,
+            devname
+        ));
+    }
+    Ok(())
 }

 /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
@@ -204,7 +241,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

    // If no container_path is provided, we won't be able to match and
    // update the device in the OCI spec device list. This is an error.
-    if device.container_path == "" {
+    if device.container_path.is_empty() {
        return Err(anyhow!(
            "container_path cannot empty for device {:?}",
            device
@@ -274,58 +311,53 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

 // device.Id should be the predicted device name (vda, vdb, ...)
 // device.VmPath already provides a way to send it in
-fn virtiommio_blk_device_handler(
+async fn virtiommio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
    _sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
-    if device.vm_path == "" {
+    if device.vm_path.is_empty() {
        return Err(anyhow!("Invalid path for virtio mmio blk device"));
    }

    update_spec_device_list(device, spec, devidx)
 }

-// device.Id should be the PCI address in the format  "bridgeAddr/deviceAddr".
-// Here, bridgeAddr is the address at which the brige is attached on the root bus,
-// while deviceAddr is the address at which the device is attached on the bridge.
-fn virtio_blk_device_handler(
+// device.Id should be a PCI path string
+async fn virtio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
    let mut dev = device.clone();
+    let pcipath = pci::Path::from_str(&device.id)?;

-    // When "Id (PCIAddr)" is not set, we allow to use the predicted "VmPath" passed from kata-runtime
-    // Note this is a special code path for cloud-hypervisor when BDF information is not available
-    if device.id != "" {
-        dev.vm_path = get_pci_device_name(sandbox, &device.id)?;
-    }
+    dev.vm_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;

    update_spec_device_list(&dev, spec, devidx)
 }

 // device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
-fn virtio_scsi_device_handler(
+async fn virtio_scsi_device_handler(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
    let mut dev = device.clone();
-    dev.vm_path = get_scsi_device_name(sandbox, &device.id)?;
+    dev.vm_path = get_scsi_device_name(sandbox, &device.id).await?;
    update_spec_device_list(&dev, spec, devidx)
 }

-fn virtio_nvdimm_device_handler(
+async fn virtio_nvdimm_device_handler(
    device: &Device,
    spec: &mut Spec,
    _sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
-    if device.vm_path == "" {
+    if device.vm_path.is_empty() {
        return Err(anyhow!("Invalid path for nvdimm device"));
    }

@@ -357,7 +389,7 @@ impl DevIndex {
    }
 }

-pub fn add_devices(
+pub async fn add_devices(
    devices: &[Device],
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -365,13 +397,13 @@ pub fn add_devices(
    let devidx = DevIndex::new(spec);

    for device in devices.iter() {
-        add_device(device, spec, sandbox, &devidx)?;
+        add_device(device, spec, sandbox, &devidx).await?;
    }

    Ok(())
 }

-fn add_device(
+async fn add_device(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -381,21 +413,24 @@ fn add_device(
    info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}",
          device.id, device.field_type, device.vm_path, device.container_path, device.options);

-    if device.field_type == "" {
+    if device.field_type.is_empty() {
        return Err(anyhow!("invalid type for device {:?}", device));
    }

-    if device.id == "" && device.vm_path == "" {
+    if device.id.is_empty() && device.vm_path.is_empty() {
        return Err(anyhow!("invalid ID and VM path for device {:?}", device));
    }

-    if device.container_path == "" {
+    if device.container_path.is_empty() {
        return Err(anyhow!("invalid container path for device {:?}", device));
    }

-    match DEVICEHANDLERLIST.get(device.field_type.as_str()) {
-        None => Err(anyhow!("Unknown device type {}", device.field_type)),
-        Some(dev_handler) => dev_handler(device, spec, sandbox, devidx),
+    match device.field_type.as_str() {
+        DRIVER_BLK_TYPE => virtio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
+        _ => Err(anyhow!("Unknown device type {}", device.field_type)),
    }
 }

@@ -432,13 +467,16 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::uevent::spawn_test_watcher;
    use oci::Linux;
+    use tempfile::tempdir;

    #[test]
    fn test_update_device_cgroup() {
-        let mut spec = Spec::default();
-
-        spec.linux = Some(Linux::default());
+        let mut spec = Spec {
+            linux: Some(Linux::default()),
+            ..Default::default()
+        };

        update_device_cgroup(&mut spec).unwrap();

@@ -712,4 +750,171 @@ mod tests {
        assert_eq!(Some(host_major), specresources.devices[1].major);
        assert_eq!(Some(host_minor), specresources.devices[1].minor);
    }
+
+    #[test]
+    fn test_pcipath_to_sysfs() {
+        let testdir = tempdir().expect("failed to create tmpdir");
+        let rootbuspath = testdir.path().to_str().unwrap();
+
+        let path2 = pci::Path::from_str("02").unwrap();
+        let path23 = pci::Path::from_str("02/03").unwrap();
+        let path234 = pci::Path::from_str("02/03/04").unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert!(relpath.is_err());
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files for the device at 0000:00:02.0
+        let bridge2path = format!("{}{}", rootbuspath, "/0000:00:02.0");
+
+        fs::create_dir_all(&bridge2path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert!(relpath.is_err());
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files to indicate that 0000:00:02.0 is a bridge to bus 01
+        let bridge2bus = "0000:01";
+        let bus2path = format!("{}/pci_bus/{}", bridge2path, bridge2bus);
+
+        fs::create_dir_all(bus2path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files for a bridge at 0000:01:03.0 to bus 02
+        let bridge3path = format!("{}/0000:01:03.0", bridge2path);
+        let bridge3bus = "0000:02";
+        let bus3path = format!("{}/pci_bus/{}", bridge3path, bridge3bus);
+
+        fs::create_dir_all(bus3path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0");
+    }
+
+    // We use device specific variants of this for real cases, but
+    // they have some complications that make them troublesome to unit
+    // test
+    async fn example_get_device_name(
+        sandbox: &Arc<Mutex<Sandbox>>,
+        relpath: &str,
+    ) -> Result<String> {
+        let matcher = VirtioBlkPciMatcher::new(relpath);
+
+        let uev = wait_for_uevent(sandbox, matcher).await?;
+
+        Ok(uev.devname)
+    }
+
+    #[tokio::test]
+    async fn test_get_device_name() {
+        let devname = "vda";
+        let root_bus = create_pci_root_bus_path();
+        let relpath = "/0000:00:0a.0/0000:03:0b.0";
+        let devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath, devname);
+
+        let mut uev = crate::uevent::Uevent::default();
+        uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev.subsystem = "block".to_string();
+        uev.devpath = devpath.clone();
+        uev.devname = devname.to_string();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.insert(devpath.clone(), uev);
+        drop(sb); // unlock
+
+        let name = example_get_device_name(&sandbox, relpath).await;
+        assert!(name.is_ok(), "{}", name.unwrap_err());
+        assert_eq!(name.unwrap(), devname);
+
+        let mut sb = sandbox.lock().await;
+        let uev = sb.uevent_map.remove(&devpath).unwrap();
+        drop(sb); // unlock
+
+        spawn_test_watcher(sandbox.clone(), uev);
+
+        let name = example_get_device_name(&sandbox, relpath).await;
+        assert!(name.is_ok(), "{}", name.unwrap_err());
+        assert_eq!(name.unwrap(), devname);
+    }
+
+    #[tokio::test]
+    async fn test_virtio_blk_matcher() {
+        let root_bus = create_pci_root_bus_path();
+        let devname = "vda";
+
+        let mut uev_a = crate::uevent::Uevent::default();
+        let relpath_a = "/0000:00:0a.0";
+        uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev_a.subsystem = "block".to_string();
+        uev_a.devname = devname.to_string();
+        uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname);
+        let matcher_a = VirtioBlkPciMatcher::new(&relpath_a);
+
+        let mut uev_b = uev_a.clone();
+        let relpath_b = "/0000:00:0a.0/0000:00:0b.0";
+        uev_b.devpath = format!("{}{}/virtio0/block/{}", root_bus, relpath_b, devname);
+        let matcher_b = VirtioBlkPciMatcher::new(&relpath_b);
+
+        assert!(matcher_a.is_match(&uev_a));
+        assert!(matcher_b.is_match(&uev_b));
+        assert!(!matcher_b.is_match(&uev_a));
+        assert!(!matcher_a.is_match(&uev_b));
+    }
+
+    #[tokio::test]
+    async fn test_scsi_block_matcher() {
+        let root_bus = create_pci_root_bus_path();
+        let devname = "sda";
+
+        let mut uev_a = crate::uevent::Uevent::default();
+        let addr_a = "0:0";
+        uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev_a.subsystem = "block".to_string();
+        uev_a.devname = devname.to_string();
+        uev_a.devpath = format!(
+            "{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda",
+            root_bus, addr_a
+        );
+        let matcher_a = ScsiBlockMatcher::new(&addr_a);
+
+        let mut uev_b = uev_a.clone();
+        let addr_b = "2:0";
+        uev_b.devpath = format!(
+            "{}/0000:00:00.0/virtio0/host0/target0:0:2/0:0:{}/block/sdb",
+            root_bus, addr_b
+        );
+        let matcher_b = ScsiBlockMatcher::new(&addr_b);
+
+        assert!(matcher_a.is_match(&uev_a));
+        assert!(matcher_b.is_match(&uev_b));
+        assert!(!matcher_b.is_match(&uev_a));
+        assert!(!matcher_a.is_match(&uev_b));
+    }
 }
--- a/src/agent/src/linux_abi.rs
+++ b/src/agent/src/linux_abi.rs
@@ -9,7 +9,6 @@
 use std::fs;

 pub const SYSFS_DIR: &str = "/sys";
-pub const SYSFS_PCI_BUS_PREFIX: &str = "/sys/bus/pci/devices";
 pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
 #[cfg(any(
    target_arch = "powerpc64",
@@ -25,10 +24,18 @@ pub fn create_pci_root_bus_path() -> String {
 pub fn create_pci_root_bus_path() -> String {
    let ret = String::from("/devices/platform/4010000000.pcie/pci0000:00");

+    let acpi_root_bus_path = String::from("/devices/pci0000:00");
+    let mut acpi_sysfs_dir = String::from(SYSFS_DIR);
    let mut sysfs_dir = String::from(SYSFS_DIR);
    let mut start_root_bus_path = String::from("/devices/platform/");
    let end_root_bus_path = String::from("/pci0000:00");

+    // check if there is pci bus path for acpi
+    acpi_sysfs_dir.push_str(&acpi_root_bus_path);
+    if let Ok(_) = fs::metadata(&acpi_sysfs_dir) {
+        return acpi_root_bus_path;
+    }
+
    sysfs_dir.push_str(&start_root_bus_path);
    let entries = match fs::read_dir(sysfs_dir) {
        Ok(e) => e,
@@ -58,17 +65,19 @@ pub fn create_pci_root_bus_path() -> String {
    ret
 }

+// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
+// The Linux kernel's core ACPI subsystem creates struct acpi_device
+// objects for ACPI namespace objects representing devices, power resources
+// processors, thermal zones. Those objects are exported to user space via
+// sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00
+pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM";
+
 pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu";

 pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block_size_bytes";
 pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe";
 pub const SYSFS_MEMORY_ONLINE_PATH: &str = "/sys/devices/system/memory";

-// Here in "0:0", the first number is the SCSI host number because
-// only one SCSI controller has been plugged, while the second number
-// is always 0.
-pub const SCSI_HOST_CHANNEL: &str = "0:0:";
-pub const SCSI_BLOCK_SUFFIX: &str = "block";
 pub const SYSFS_SCSI_HOST_PATH: &str = "/sys/class/scsi_host";

 pub const SYSFS_CGROUPPATH: &str = "/sys/fs/cgroup";
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -3,11 +3,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#![allow(non_camel_case_types)]
-#![allow(unused_parens)]
-#![allow(unused_unsafe)]
-#![allow(dead_code)]
-#![allow(non_snake_case)]
 #[macro_use]
 extern crate lazy_static;
 extern crate oci;
@@ -15,79 +10,76 @@ extern crate prctl;
 extern crate prometheus;
 extern crate protocols;
 extern crate regex;
-extern crate rustjail;
 extern crate scan_fmt;
 extern crate serde_json;
-extern crate signal_hook;

 #[macro_use]
 extern crate scopeguard;

 #[macro_use]
 extern crate slog;
-extern crate netlink;

-use crate::netlink::{RtnlHandle, NETLINK_ROUTE};
 use anyhow::{anyhow, Context, Result};
-use nix::fcntl::{self, OFlag};
-use nix::fcntl::{FcntlArg, FdFlag};
-use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
-use nix::pty;
-use nix::sys::select::{select, FdSet};
+use nix::fcntl::OFlag;
 use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
-use nix::sys::wait::{self, WaitStatus};
-use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
-use prctl::set_child_subreaper;
-use signal_hook::{iterator::Signals, SIGCHLD};
-use std::collections::HashMap;
+use nix::unistd::{self, dup, Pid};
 use std::env;
-use std::ffi::{CStr, CString, OsStr};
+use std::ffi::OsStr;
 use std::fs::{self, File};
-use std::io::{Read, Write};
 use std::os::unix::ffi::OsStrExt;
 use std::os::unix::fs as unixfs;
 use std::os::unix::io::AsRawFd;
 use std::path::Path;
-use std::sync::mpsc::{self, Sender};
-use std::sync::{Arc, Mutex, RwLock};
-use std::{io, thread, thread::JoinHandle};
-use unistd::Pid;
+use std::process::exit;
+use std::sync::Arc;

 mod config;
+mod console;
 mod device;
 mod linux_abi;
 mod metrics;
 mod mount;
 mod namespace;
+mod netlink;
 mod network;
+mod pci;
 pub mod random;
 mod sandbox;
+mod signal;
 #[cfg(test)]
 mod test_utils;
 mod uevent;
+mod util;
 mod version;

 use mount::{cgroups_mount, general_mount};
 use sandbox::Sandbox;
+use signal::setup_signal_handler;
 use slog::Logger;
 use uevent::watch_uevents;

+use futures::future::join_all;
+use rustjail::pipestream::PipeStream;
+use tokio::{
+    io::AsyncWrite,
+    sync::{
+        watch::{channel, Receiver},
+        Mutex, RwLock,
+    },
+    task::JoinHandle,
+};
+
 mod rpc;

 const NAME: &str = "kata-agent";
 const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
-const CONSOLE_PATH: &str = "/dev/console";
-
-const DEFAULT_BUF_SIZE: usize = 8 * 1024;

 lazy_static! {
-    static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Sender<String>>>> =
-        Arc::new(Mutex::new(HashMap::new()));
-    static ref AGENT_CONFIG: Arc<RwLock<agentConfig>> =
-        Arc::new(RwLock::new(config::agentConfig::new()));
+    static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
+        Arc::new(RwLock::new(config::AgentConfig::new()));
 }

-fn announce(logger: &Logger, config: &agentConfig) {
+fn announce(logger: &Logger, config: &AgentConfig) {
    info!(logger, "announce";
    "agent-commit" => version::VERSION_COMMIT,

@@ -100,7 +92,147 @@ fn announce(logger: &Logger, config: &agentConfig) {
    );
 }

-fn main() -> Result<()> {
+// Create a thread to handle reading from the logger pipe. The thread will
+// output to the vsock port specified, or stdout.
+async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool>) -> Result<()> {
+    let mut reader = PipeStream::from_fd(rfd);
+    let mut writer: Box<dyn AsyncWrite + Unpin + Send>;
+
+    if vsock_port > 0 {
+        let listenfd = socket::socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )?;
+
+        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, vsock_port);
+        socket::bind(listenfd, &addr).unwrap();
+        socket::listen(listenfd, 1).unwrap();
+
+        writer = Box::new(util::get_vsock_stream(listenfd).await.unwrap());
+    } else {
+        writer = Box::new(tokio::io::stdout());
+    }
+
+    let _ = util::interruptable_io_copier(&mut reader, &mut writer, shutdown).await;
+
+    Ok(())
+}
+
+async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
+    env::set_var("RUST_BACKTRACE", "full");
+
+    // List of tasks that need to be stopped for a clean shutdown
+    let mut tasks: Vec<JoinHandle<Result<()>>> = vec![];
+
+    console::initialize();
+
+    lazy_static::initialize(&AGENT_CONFIG);
+
+    // support vsock log
+    let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
+
+    let (shutdown_tx, shutdown_rx) = channel(true);
+
+    let agent_config = AGENT_CONFIG.clone();
+
+    let init_mode = unistd::getpid() == Pid::from_raw(1);
+    if init_mode {
+        // dup a new file descriptor for this temporary logger writer,
+        // since this logger would be dropped and it's writer would
+        // be closed out of this code block.
+        let newwfd = dup(wfd)?;
+        let writer = unsafe { File::from_raw_fd(newwfd) };
+
+        // Init a temporary logger used by init agent as init process
+        // since before do the base mount, it wouldn't access "/proc/cmdline"
+        // to get the customzied debug level.
+        let (logger, logger_async_guard) =
+            logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
+
+        // Must mount proc fs before parsing kernel command line
+        general_mount(&logger).map_err(|e| {
+            error!(logger, "fail general mount: {}", e);
+            e
+        })?;
+
+        let mut config = agent_config.write().await;
+        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
+
+        init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
+        drop(logger_async_guard);
+    } else {
+        // once parsed cmdline and set the config, release the write lock
+        // as soon as possible in case other thread would get read lock on
+        // it.
+        let mut config = agent_config.write().await;
+        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
+    }
+    let config = agent_config.read().await;
+
+    let log_vport = config.log_vport as u32;
+
+    let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
+
+    tasks.push(log_handle);
+
+    let writer = unsafe { File::from_raw_fd(wfd) };
+
+    // Recreate a logger with the log level get from "/proc/cmdline".
+    let (logger, logger_async_guard) =
+        logging::create_logger(NAME, "agent", config.log_level, writer);
+
+    announce(&logger, &config);
+
+    // This variable is required as it enables the global (and crucially static) logger,
+    // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
+    let global_logger = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
+
+    // Allow the global logger to be modified later (for shutdown)
+    global_logger.cancel_reset();
+
+    let mut ttrpc_log_guard: Result<(), log::SetLoggerError> = Ok(());
+
+    if config.log_level == slog::Level::Trace {
+        // Redirect ttrpc log calls to slog iff full debug requested
+        ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
+    }
+
+    // Start the sandbox and wait for its ttRPC server to end
+    start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
+
+    // Install a NOP logger for the remainder of the shutdown sequence
+    // to ensure any log calls made by local crates using the scope logger
+    // don't fail.
+    let global_logger_guard2 =
+        slog_scope::set_global_logger(slog::Logger::root(slog::Discard, o!()));
+    global_logger_guard2.cancel_reset();
+
+    drop(logger_async_guard);
+
+    drop(ttrpc_log_guard);
+
+    // Trigger a controlled shutdown
+    shutdown_tx
+        .send(true)
+        .map_err(|e| anyhow!(e).context("failed to request shutdown"))?;
+
+    // Wait for all threads to finish
+    let results = join_all(tasks).await;
+
+    for result in results {
+        if let Err(e) = result {
+            return Err(anyhow!(e).into());
+        }
+    }
+
+    eprintln!("{} shutdown complete", NAME);
+
+    Ok(())
+}
+
+fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let args: Vec<String> = env::args().collect();

    if args.len() == 2 && args[1] == "--version" {
@@ -116,244 +248,67 @@ fn main() -> Result<()> {
    }

    if args.len() == 2 && args[1] == "init" {
+        reset_sigpipe();
        rustjail::container::init_child();
        exit(0);
    }

-    env::set_var("RUST_BACKTRACE", "full");
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()?;

-    lazy_static::initialize(&SHELLS);
-
-    lazy_static::initialize(&AGENT_CONFIG);
-
-    // support vsock log
-    let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-
-    let agentConfig = AGENT_CONFIG.clone();
-
-    let init_mode = unistd::getpid() == Pid::from_raw(1);
-    if init_mode {
-        // dup a new file descriptor for this temporary logger writer,
-        // since this logger would be dropped and it's writer would
-        // be closed out of this code block.
-        let newwfd = dup(wfd)?;
-        let writer = unsafe { File::from_raw_fd(newwfd) };
-
-        // Init a temporary logger used by init agent as init process
-        // since before do the base mount, it wouldn't access "/proc/cmdline"
-        // to get the customzied debug level.
-        let logger = logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
-
-        // Must mount proc fs before parsing kernel command line
-        general_mount(&logger).map_err(|e| {
-            error!(logger, "fail general mount: {}", e);
-            e
-        })?;
-
-        let mut config = agentConfig.write().unwrap();
-        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
-
-        init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
-    } else {
-        // once parsed cmdline and set the config, release the write lock
-        // as soon as possible in case other thread would get read lock on
-        // it.
-        let mut config = agentConfig.write().unwrap();
-        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
-    }
-    let config = agentConfig.read().unwrap();
-
-    let log_vport = config.log_vport as u32;
-    let log_handle = thread::spawn(move || -> Result<()> {
-        let mut reader = unsafe { File::from_raw_fd(rfd) };
-        if log_vport > 0 {
-            let listenfd = socket::socket(
-                AddressFamily::Vsock,
-                SockType::Stream,
-                SockFlag::SOCK_CLOEXEC,
-                None,
-            )?;
-            let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, log_vport);
-            socket::bind(listenfd, &addr)?;
-            socket::listen(listenfd, 1)?;
-            let datafd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
-            let mut log_writer = unsafe { File::from_raw_fd(datafd) };
-            let _ = io::copy(&mut reader, &mut log_writer)?;
-            let _ = unistd::close(listenfd);
-            let _ = unistd::close(datafd);
-        }
-        // copy log to stdout
-        let mut stdout_writer = io::stdout();
-        let _ = io::copy(&mut reader, &mut stdout_writer)?;
-        Ok(())
-    });
-
-    let writer = unsafe { File::from_raw_fd(wfd) };
-    // Recreate a logger with the log level get from "/proc/cmdline".
-    let logger = logging::create_logger(NAME, "agent", config.log_level, writer);
-
-    announce(&logger, &config);
-
-    // This "unused" variable is required as it enables the global (and crucially static) logger,
-    // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
-    let _guard = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
-
-    let mut _log_guard: Result<(), log::SetLoggerError> = Ok(());
-
-    if config.log_level == slog::Level::Trace {
-        // Redirect ttrpc log calls to slog iff full debug requested
-        _log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
-    }
-
-    start_sandbox(&logger, &config, init_mode)?;
-
-    let _ = log_handle.join();
-
-    Ok(())
+    rt.block_on(real_main())
 }

-fn start_sandbox(logger: &Logger, config: &agentConfig, init_mode: bool) -> Result<()> {
-    let shells = SHELLS.clone();
+async fn start_sandbox(
+    logger: &Logger,
+    config: &AgentConfig,
+    init_mode: bool,
+    tasks: &mut Vec<JoinHandle<Result<()>>>,
+    shutdown: Receiver<bool>,
+) -> Result<()> {
    let debug_console_vport = config.debug_console_vport as u32;

-    let mut shell_handle: Option<JoinHandle<()>> = None;
    if config.debug_console {
-        let thread_logger = logger.clone();
+        let debug_console_task = tokio::task::spawn(console::debug_console_handler(
+            logger.clone(),
+            debug_console_vport,
+            shutdown.clone(),
+        ));

-        let builder = thread::Builder::new();
-
-        let handle = builder.spawn(move || {
-            let shells = shells.lock().unwrap();
-            let result = setup_debug_console(&thread_logger, shells.to_vec(), debug_console_vport);
-            if result.is_err() {
-                // Report error, but don't fail
-                warn!(thread_logger, "failed to setup debug console";
-                    "error" => format!("{}", result.unwrap_err()));
-            }
-        })?;
-
-        shell_handle = Some(handle);
+        tasks.push(debug_console_task);
    }

    // Initialize unique sandbox structure.
-    let mut s = Sandbox::new(&logger).context("Failed to create sandbox")?;
-
+    let s = Sandbox::new(&logger).context("Failed to create sandbox")?;
    if init_mode {
-        let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
-        rtnl.handle_localhost()?;
-
-        s.rtnl = Some(rtnl);
+        s.rtnl.handle_localhost().await?;
    }

    let sandbox = Arc::new(Mutex::new(s));

-    setup_signal_handler(&logger, sandbox.clone()).unwrap();
-    watch_uevents(sandbox.clone());
+    let signal_handler_task = tokio::spawn(setup_signal_handler(
+        logger.clone(),
+        sandbox.clone(),
+        shutdown.clone(),
+    ));

-    let (tx, rx) = mpsc::channel::<i32>();
-    sandbox.lock().unwrap().sender = Some(tx);
+    tasks.push(signal_handler_task);
+
+    let uevents_handler_task = tokio::spawn(watch_uevents(sandbox.clone(), shutdown.clone()));
+
+    tasks.push(uevents_handler_task);
+
+    let (tx, rx) = tokio::sync::oneshot::channel();
+    sandbox.lock().await.sender = Some(tx);

    // vsock:///dev/vsock, port
-    let mut server = rpc::start(sandbox, config.server_addr.as_str());
+    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
+    server.start().await?;

-    let _ = server.start().unwrap();
+    let _ = rx.await?;
+    server.shutdown().await?;

-    let _ = rx.recv()?;
-
-    server.shutdown();
-
-    if let Some(handle) = shell_handle {
-        handle.join().map_err(|e| anyhow!("{:?}", e))?;
-    }
-
-    Ok(())
-}
-
-use nix::sys::wait::WaitPidFlag;
-
-fn setup_signal_handler(logger: &Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
-    let logger = logger.new(o!("subsystem" => "signals"));
-
-    set_child_subreaper(true)
-        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
-
-    let signals = Signals::new(&[SIGCHLD])?;
-
-    thread::spawn(move || {
-        'outer: for sig in signals.forever() {
-            info!(logger, "received signal"; "signal" => sig);
-
-            // sevral signals can be combined together
-            // as one. So loop around to reap all
-            // exited children
-            'inner: loop {
-                let wait_status = match wait::waitpid(
-                    Some(Pid::from_raw(-1)),
-                    Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
-                ) {
-                    Ok(s) => {
-                        if s == WaitStatus::StillAlive {
-                            continue 'outer;
-                        }
-                        s
-                    }
-                    Err(e) => {
-                        info!(
-                            logger,
-                            "waitpid reaper failed";
-                            "error" => e.as_errno().unwrap().desc()
-                        );
-                        continue 'outer;
-                    }
-                };
-                info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
-
-                let pid = wait_status.pid();
-                if let Some(pid) = pid {
-                    let raw_pid = pid.as_raw();
-                    let child_pid = format!("{}", raw_pid);
-
-                    let logger = logger.new(o!("child-pid" => child_pid));
-
-                    let mut sandbox = sandbox.lock().unwrap();
-                    let process = sandbox.find_process(raw_pid);
-                    if process.is_none() {
-                        info!(logger, "child exited unexpectedly");
-                        continue 'inner;
-                    }
-
-                    let mut p = process.unwrap();
-
-                    if p.exit_pipe_w.is_none() {
-                        error!(logger, "the process's exit_pipe_w isn't set");
-                        continue 'inner;
-                    }
-                    let pipe_write = p.exit_pipe_w.unwrap();
-                    let ret: i32;
-
-                    match wait_status {
-                        WaitStatus::Exited(_, c) => ret = c,
-                        WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
-                        _ => {
-                            info!(logger, "got wrong status for process";
-                                  "child-status" => format!("{:?}", wait_status));
-                            continue 'inner;
-                        }
-                    }
-
-                    p.exit_code = ret;
-                    let _ = unistd::close(pipe_write);
-
-                    if let Some(ref poller) = p.epoller {
-                        info!(logger, "close epoller");
-                        // close the socket file to notify readStdio to close terminal specifically
-                        // in case this process's terminal has been inherited by its children.
-                        poller.close_wfd()
-                    }
-                }
-            }
-        }
-    });
    Ok(())
 }

@@ -374,7 +329,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
    unistd::setsid()?;

    unsafe {
-        libc::ioctl(io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
+        libc::ioctl(std::io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
    }

    env::set_var("PATH", "/bin:/sbin/:/usr/bin/:/usr/sbin/");
@@ -404,295 +359,16 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
    }
 }

-lazy_static! {
-    static ref SHELLS: Arc<Mutex<Vec<String>>> = {
-        let mut v = Vec::new();
-
-        if !cfg!(test) {
-            v.push("/bin/bash".to_string());
-            v.push("/bin/sh".to_string());
-        }
-
-        Arc::new(Mutex::new(v))
-    };
+// The Rust standard library had suppressed the default SIGPIPE behavior,
+// see https://github.com/rust-lang/rust/pull/13158.
+// Since the parent's signal handler would be inherited by it's child process,
+// thus we should re-enable the standard SIGPIPE behavior as a workaround to
+// fix the issue of https://github.com/kata-containers/kata-containers/issues/1887.
+fn reset_sigpipe() {
+    unsafe {
+        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
+    }
 }

-// pub static mut LOG_LEVEL: ;
-// pub static mut TRACE_MODE: ;
-
-use crate::config::agentConfig;
-use nix::sys::stat::Mode;
+use crate::config::AgentConfig;
 use std::os::unix::io::{FromRawFd, RawFd};
-use std::path::PathBuf;
-use std::process::exit;
-
-fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
-    let mut shell: &str = "";
-    for sh in shells.iter() {
-        let binary = PathBuf::from(sh);
-        if binary.exists() {
-            shell = sh;
-            break;
-        }
-    }
-
-    if shell == "" {
-        return Err(anyhow!("no shell found to launch debug console"));
-    }
-
-    if port > 0 {
-        let listenfd = socket::socket(
-            AddressFamily::Vsock,
-            SockType::Stream,
-            SockFlag::SOCK_CLOEXEC,
-            None,
-        )?;
-        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
-        socket::bind(listenfd, &addr)?;
-        socket::listen(listenfd, 1)?;
-        loop {
-            let f: RawFd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
-            match run_debug_console_shell(logger, shell, f) {
-                Ok(_) => {
-                    info!(logger, "run_debug_console_shell session finished");
-                }
-                Err(err) => {
-                    error!(logger, "run_debug_console_shell failed: {:?}", err);
-                }
-            }
-        }
-    } else {
-        let mut flags = OFlag::empty();
-        flags.insert(OFlag::O_RDWR);
-        flags.insert(OFlag::O_CLOEXEC);
-        loop {
-            let f: RawFd = fcntl::open(CONSOLE_PATH, flags, Mode::empty())?;
-            match run_debug_console_shell(logger, shell, f) {
-                Ok(_) => {
-                    info!(logger, "run_debug_console_shell session finished");
-                }
-                Err(err) => {
-                    error!(logger, "run_debug_console_shell failed: {:?}", err);
-                }
-            }
-        }
-    };
-}
-
-fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
-where
-    R: Read,
-    W: Write,
-{
-    let mut buf = [0; DEFAULT_BUF_SIZE];
-    let buf_len;
-
-    match reader.read(&mut buf) {
-        Ok(0) => return Ok(0),
-        Ok(len) => buf_len = len,
-        Err(err) => return Err(err),
-    };
-
-    // write and return
-    match writer.write_all(&buf[..buf_len]) {
-        Ok(_) => Ok(buf_len as u64),
-        Err(err) => Err(err),
-    }
-}
-
-fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Result<()> {
-    let pseduo = pty::openpty(None, None)?;
-    let _ = fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
-    let _ = fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC));
-
-    let slave_fd = pseduo.slave;
-
-    match fork() {
-        Ok(ForkResult::Child) => {
-            // create new session with child as session leader
-            setsid()?;
-
-            // dup stdin, stdout, stderr to let child act as a terminal
-            dup2(slave_fd, STDIN_FILENO)?;
-            dup2(slave_fd, STDOUT_FILENO)?;
-            dup2(slave_fd, STDERR_FILENO)?;
-
-            // set tty
-            unsafe {
-                libc::ioctl(0, libc::TIOCSCTTY);
-            }
-
-            let cmd = CString::new(shell).unwrap();
-            let args: Vec<&CStr> = vec![];
-
-            // run shell
-            let _ = unistd::execvp(cmd.as_c_str(), args.as_slice()).map_err(|e| match e {
-                nix::Error::Sys(errno) => {
-                    std::process::exit(errno as i32);
-                }
-                _ => std::process::exit(-2),
-            });
-        }
-
-        Ok(ForkResult::Parent { child: child_pid }) => {
-            info!(logger, "get debug shell pid {:?}", child_pid);
-
-            let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-            let master_fd = pseduo.master;
-            let debug_shell_logger = logger.clone();
-
-            // channel that used to sync between thread and main process
-            let (tx, rx) = mpsc::channel::<i32>();
-
-            // start a thread to do IO copy between socket and pseduo.master
-            thread::spawn(move || {
-                let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
-                let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
-                let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };
-                let mut socket_writer = unsafe { File::from_raw_fd(socket_fd) };
-
-                loop {
-                    let mut fd_set = FdSet::new();
-                    fd_set.insert(rfd);
-                    fd_set.insert(master_fd);
-                    fd_set.insert(socket_fd);
-
-                    match select(
-                        Some(fd_set.highest().unwrap() + 1),
-                        &mut fd_set,
-                        None,
-                        None,
-                        None,
-                    ) {
-                        Ok(_) => (),
-                        Err(e) => {
-                            if e == nix::Error::from(nix::errno::Errno::EINTR) {
-                                continue;
-                            } else {
-                                error!(debug_shell_logger, "select error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-
-                    if fd_set.contains(rfd) {
-                        info!(
-                            debug_shell_logger,
-                            "debug shell process {} exited", child_pid
-                        );
-                        tx.send(1).unwrap();
-                        break;
-                    }
-
-                    if fd_set.contains(master_fd) {
-                        match io_copy(&mut master_reader, &mut socket_writer) {
-                            Ok(0) => {
-                                debug!(debug_shell_logger, "master fd closed");
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                            Ok(_) => {}
-                            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
-                            Err(e) => {
-                                error!(debug_shell_logger, "read master fd error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-
-                    if fd_set.contains(socket_fd) {
-                        match io_copy(&mut socket_reader, &mut master_writer) {
-                            Ok(0) => {
-                                debug!(debug_shell_logger, "socket fd closed");
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                            Ok(_) => {}
-                            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
-                            Err(e) => {
-                                error!(debug_shell_logger, "read socket fd error {:?}", e);
-                                tx.send(1).unwrap();
-                                break;
-                            }
-                        }
-                    }
-                }
-            });
-
-            let wait_status = wait::waitpid(child_pid, None);
-            info!(logger, "debug console process exit code: {:?}", wait_status);
-
-            info!(logger, "notify debug monitor thread to exit");
-            // close pipe to exit select loop
-            let _ = close(wfd);
-
-            // wait for thread exit.
-            let _ = rx.recv().unwrap();
-            info!(logger, "debug monitor thread has exited");
-
-            // close files
-            let _ = close(rfd);
-            let _ = close(master_fd);
-            let _ = close(slave_fd);
-        }
-        Err(err) => {
-            return Err(anyhow!("fork error: {:?}", err));
-        }
-    }
-
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tempfile::tempdir;
-
-    #[test]
-    fn test_setup_debug_console_no_shells() {
-        // Guarantee no shells have been added
-        // (required to avoid racing with
-        // test_setup_debug_console_invalid_shell()).
-        let shells_ref = SHELLS.clone();
-        let mut shells = shells_ref.lock().unwrap();
-        shells.clear();
-        let logger = slog_scope::logger();
-
-        let result = setup_debug_console(&logger, shells.to_vec(), 0);
-
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "no shell found to launch debug console"
-        );
-    }
-
-    #[test]
-    fn test_setup_debug_console_invalid_shell() {
-        let shells_ref = SHELLS.clone();
-        let mut shells = shells_ref.lock().unwrap();
-
-        let dir = tempdir().expect("failed to create tmpdir");
-
-        // Add an invalid shell
-        let shell = dir
-            .path()
-            .join("enoent")
-            .to_str()
-            .expect("failed to construct shell path")
-            .to_string();
-
-        shells.push(shell);
-        let logger = slog_scope::logger();
-
-        let result = setup_debug_console(&logger, shells.to_vec(), 0);
-
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "no shell found to launch debug console"
-        );
-    }
-}
--- a/src/agent/src/metrics.rs
+++ b/src/agent/src/metrics.rs
@@ -187,9 +187,9 @@ fn update_guest_metrics() {
            info!(sl!(), "failed to get guest KernelStats: {:?}", err);
        }
        Ok(kernel_stats) => {
-            set_gauge_vec_CPU_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
+            set_gauge_vec_cpu_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
            for (i, cpu_time) in kernel_stats.cpu_time.iter().enumerate() {
-                set_gauge_vec_CPU_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
+                set_gauge_vec_cpu_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
            }
        }
    }
@@ -332,7 +332,7 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
        .set(meminfo.k_reclaimable.unwrap_or(0) as f64);
 }

-fn set_gauge_vec_CPU_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
+fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
    gv.with_label_values(&[cpu, "user"])
        .set(cpu_time.user as f64);
    gv.with_label_values(&[cpu, "nice"])
--- a/src/agent/src/mount.rs
+++ b/src/agent/src/mount.rs
@@ -7,37 +7,47 @@ use std::collections::HashMap;
 use std::ffi::CString;
 use std::fs;
 use std::io;
-use std::iter::FromIterator;
-use std::os::unix::fs::PermissionsExt;
+use std::os::unix::fs::{MetadataExt, PermissionsExt};

 use std::path::Path;
 use std::ptr::null;
-use std::sync::{Arc, Mutex};
+use std::str::FromStr;
+use std::sync::Arc;
+use tokio::sync::Mutex;

 use libc::{c_void, mount};
 use nix::mount::{self, MsFlags};
+use nix::unistd::Gid;

 use regex::Regex;
 use std::fs::File;
 use std::io::{BufRead, BufReader};

-use crate::device::{get_pci_device_name, get_scsi_device_name, online_device};
+use crate::device::{
+    get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
+};
 use crate::linux_abi::*;
+use crate::pci;
 use crate::protocols::agent::Storage;
 use crate::Sandbox;
 use anyhow::{anyhow, Context, Result};
 use slog::Logger;

-pub const DRIVER9PTYPE: &str = "9p";
-pub const DRIVERVIRTIOFSTYPE: &str = "virtio-fs";
-pub const DRIVERBLKTYPE: &str = "blk";
-pub const DRIVERMMIOBLKTYPE: &str = "mmioblk";
-pub const DRIVERSCSITYPE: &str = "scsi";
-pub const DRIVERNVDIMMTYPE: &str = "nvdimm";
-pub const DRIVEREPHEMERALTYPE: &str = "ephemeral";
-pub const DRIVERLOCALTYPE: &str = "local";
+pub const DRIVER_9P_TYPE: &str = "9p";
+pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
+pub const DRIVER_BLK_TYPE: &str = "blk";
+pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
+pub const DRIVER_SCSI_TYPE: &str = "scsi";
+pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
+pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
+pub const DRIVER_LOCAL_TYPE: &str = "local";

-pub const TYPEROOTFS: &str = "rootfs";
+pub const TYPE_ROOTFS: &str = "rootfs";
+
+pub const MOUNT_GUEST_TAG: &str = "kataShared";
+
+// Allocating an FSGroup that owns the pod's volumes
+const FS_GID: &str = "fsgid";

 #[rustfmt::skip]
 lazy_static! {
@@ -81,7 +91,7 @@ lazy_static! {
 }

 #[derive(Debug, PartialEq)]
-pub struct INIT_MOUNT {
+pub struct InitMount {
    fstype: &'static str,
    src: &'static str,
    dest: &'static str,
@@ -111,42 +121,26 @@ lazy_static!{

 #[rustfmt::skip]
 lazy_static! {
-    pub static ref INIT_ROOTFS_MOUNTS: Vec<INIT_MOUNT> = vec![
-        INIT_MOUNT{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
-        INIT_MOUNT{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
-        INIT_MOUNT{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
-        INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
-        INIT_MOUNT{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
-        INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
+    pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount> = vec![
+        InitMount{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
+        InitMount{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
+        InitMount{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
+        InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
+        InitMount{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
+        InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
    ];
 }

-// StorageHandler is the type of callback to be defined to handle every
-// type of storage driver.
-type StorageHandler = fn(&Logger, &Storage, Arc<Mutex<Sandbox>>) -> Result<String>;
-
-// STORAGEHANDLERLIST lists the supported drivers.
-#[rustfmt::skip]
-lazy_static! {
-    pub static ref STORAGEHANDLERLIST: HashMap<&'static str, StorageHandler> = {
-    	let mut m = HashMap::new();
-    let blk: StorageHandler = virtio_blk_storage_handler;
-        m.insert(DRIVERBLKTYPE, blk);
-	let p9: StorageHandler= virtio9p_storage_handler;
-        m.insert(DRIVER9PTYPE, p9);
-	let virtiofs: StorageHandler = virtiofs_storage_handler;
-        m.insert(DRIVERVIRTIOFSTYPE, virtiofs);
-    let ephemeral: StorageHandler = ephemeral_storage_handler;
-        m.insert(DRIVEREPHEMERALTYPE, ephemeral);
-    let virtiommio: StorageHandler = virtiommio_blk_storage_handler;
-        m.insert(DRIVERMMIOBLKTYPE, virtiommio);
-    let local: StorageHandler = local_storage_handler;
-        m.insert(DRIVERLOCALTYPE, local);
-    let scsi: StorageHandler = virtio_scsi_storage_handler;
-        m.insert(DRIVERSCSITYPE, scsi);
-        m
-    };
-}
+pub const STORAGE_HANDLER_LIST: [&str; 8] = [
+    DRIVER_BLK_TYPE,
+    DRIVER_9P_TYPE,
+    DRIVER_VIRTIOFS_TYPE,
+    DRIVER_EPHEMERAL_TYPE,
+    DRIVER_MMIO_BLK_TYPE,
+    DRIVER_LOCAL_TYPE,
+    DRIVER_SCSI_TYPE,
+    DRIVER_NVDIMM_TYPE,
+];

 #[derive(Debug, Clone)]
 pub struct BareMount<'a> {
@@ -238,12 +232,12 @@ impl<'a> BareMount<'a> {
    }
 }

-fn ephemeral_storage_handler(
+async fn ephemeral_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut sb = sandbox.lock().unwrap();
+    let mut sb = sandbox.lock().await;
    let new_storage = sb.set_sandbox_storage(&storage.mount_point);

    if !new_storage {
@@ -251,17 +245,45 @@ fn ephemeral_storage_handler(
    }

    fs::create_dir_all(Path::new(&storage.mount_point))?;
-    common_storage_handler(logger, storage)?;
+
+    // By now we only support one option field: "fsGroup" which
+    // isn't an valid mount option, thus we should remove it when
+    // do mount.
+    if storage.options.len() > 0 {
+        // ephemeral_storage didn't support mount options except fsGroup.
+        let mut new_storage = storage.clone();
+        new_storage.options = protobuf::RepeatedField::default();
+        common_storage_handler(logger, &new_storage)?;
+
+        let opts_vec: Vec<String> = storage.options.to_vec();
+
+        let opts = parse_options(opts_vec);
+
+        if let Some(fsgid) = opts.get(FS_GID) {
+            let gid = fsgid.parse::<u32>()?;
+
+            nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
+
+            let meta = fs::metadata(&storage.mount_point)?;
+            let mut permission = meta.permissions();
+
+            let o_mode = meta.mode() | 0o2000;
+            permission.set_mode(o_mode);
+            fs::set_permissions(&storage.mount_point, permission)?;
+        }
+    } else {
+        common_storage_handler(logger, &storage)?;
+    }

    Ok("".to_string())
 }

-fn local_storage_handler(
+async fn local_storage_handler(
    _logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut sb = sandbox.lock().unwrap();
+    let mut sb = sandbox.lock().await;
    let new_storage = sb.set_sandbox_storage(&storage.mount_point);

    if !new_storage {
@@ -276,11 +298,24 @@ fn local_storage_handler(
    let opts_vec: Vec<String> = storage.options.to_vec();

    let opts = parse_options(opts_vec);
-    let mode = opts.get("mode");
-    if let Some(mode) = mode {
+
+    let mut need_set_fsgid = false;
+    if let Some(fsgid) = opts.get(FS_GID) {
+        let gid = fsgid.parse::<u32>()?;
+
+        nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?;
+        need_set_fsgid = true;
+    }
+
+    if let Some(mode) = opts.get("mode") {
        let mut permission = fs::metadata(&storage.mount_point)?.permissions();

-        let o_mode = u32::from_str_radix(mode, 8)?;
+        let mut o_mode = u32::from_str_radix(mode, 8)?;
+
+        if need_set_fsgid {
+            // set SetGid mode mask.
+            o_mode |= 0o2000;
+        }
        permission.set_mode(o_mode);

        fs::set_permissions(&storage.mount_point, permission)?;
@@ -289,7 +324,7 @@ fn local_storage_handler(
    Ok("".to_string())
 }

-fn virtio9p_storage_handler(
+async fn virtio9p_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -298,7 +333,7 @@ fn virtio9p_storage_handler(
 }

 // virtiommio_blk_storage_handler handles the storage for mmio blk driver.
-fn virtiommio_blk_storage_handler(
+async fn virtiommio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -308,7 +343,7 @@ fn virtiommio_blk_storage_handler(
 }

 // virtiofs_storage_handler handles the storage for virtio-fs.
-fn virtiofs_storage_handler(
+async fn virtiofs_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -317,14 +352,14 @@ fn virtiofs_storage_handler(
 }

 // virtio_blk_storage_handler handles the storage for blk driver.
-fn virtio_blk_storage_handler(
+async fn virtio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
    let mut storage = storage.clone();
-    // If hot-plugged, get the device node path based on the PCI address else
-    // use the virt path provided in Storage Source
+    // If hot-plugged, get the device node path based on the PCI path
+    // otherwise use the virt path provided in Storage Source
    if storage.source.starts_with("/dev") {
        let metadata = fs::metadata(&storage.source)
            .context(format!("get metadata on file {:?}", &storage.source))?;
@@ -334,7 +369,8 @@ fn virtio_blk_storage_handler(
            return Err(anyhow!("Invalid device {}", &storage.source));
        }
    } else {
-        let dev_path = get_pci_device_name(&sandbox, &storage.source)?;
+        let pcipath = pci::Path::from_str(&storage.source)?;
+        let dev_path = get_virtio_blk_pci_device_name(&sandbox, &pcipath).await?;
        storage.source = dev_path;
    }

@@ -342,7 +378,7 @@ fn virtio_blk_storage_handler(
 }

 // virtio_scsi_storage_handler handles the storage for scsi driver.
-fn virtio_scsi_storage_handler(
+async fn virtio_scsi_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
@@ -350,7 +386,7 @@ fn virtio_scsi_storage_handler(
    let mut storage = storage.clone();

    // Retrieve the device path from SCSI address.
-    let dev_path = get_scsi_device_name(&sandbox, &storage.source)?;
+    let dev_path = get_scsi_device_name(&sandbox, &storage.source).await?;
    storage.source = dev_path;

    common_storage_handler(logger, &storage)
@@ -363,12 +399,37 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
    mount_storage(logger, storage).and(Ok(mount_point))
 }

+// nvdimm_storage_handler handles the storage for NVDIMM driver.
+async fn nvdimm_storage_handler(
+    logger: &Logger,
+    storage: &Storage,
+    sandbox: Arc<Mutex<Sandbox>>,
+) -> Result<String> {
+    let storage = storage.clone();
+
+    // Retrieve the device path from NVDIMM address.
+    wait_for_pmem_device(&sandbox, &storage.source).await?;
+
+    common_storage_handler(logger, &storage)
+}
+
 // mount_storage performs the mount described by the storage structure.
 fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

+    // Check share before attempting to mount to see if the destination is already a mount point.
+    // If so, skip doing the mount. This facilitates mounting the sharedfs automatically
+    // in the guest before the agent service starts.
+    if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? {
+        warn!(
+            logger,
+            "{} already mounted on {}, ignoring...", MOUNT_GUEST_TAG, &storage.mount_point
+        );
+        return Ok(());
+    }
+
    match storage.fstype.as_str() {
-        DRIVER9PTYPE | DRIVERVIRTIOFSTYPE => {
+        DRIVER_9P_TYPE | DRIVER_VIRTIOFS_TYPE => {
            let dest_path = Path::new(storage.mount_point.as_str());
            if !dest_path.exists() {
                fs::create_dir_all(dest_path).context("Create mount destination failed")?;
@@ -380,7 +441,7 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    }

    let options_vec = storage.options.to_vec();
-    let options_vec = Vec::from_iter(options_vec.iter().map(String::as_str));
+    let options_vec = options_vec.iter().map(String::as_str).collect();
    let (flags, options) = parse_mount_flags_and_options(options_vec);

    info!(logger, "mounting storage";
@@ -402,6 +463,24 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    bare_mount.mount()
 }

+/// Looks for `mount_point` entry in the /proc/mounts.
+fn is_mounted(mount_point: &str) -> Result<bool> {
+    let mount_point = mount_point.trim_end_matches('/');
+    let found = fs::metadata(mount_point).is_ok()
+        // Looks through /proc/mounts and check if the mount exists
+        && fs::read_to_string("/proc/mounts")?
+            .lines()
+            .any(|line| {
+                // The 2nd column reveals the mount point.
+                line.split_whitespace()
+                    .nth(1)
+                    .map(|target| mount_point.eq(target))
+                    .unwrap_or(false)
+            });
+
+    Ok(found)
+}
+
 fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
    let mut flags = MsFlags::empty();
    let mut options: String = "".to_string();
@@ -430,7 +509,7 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
 // associated operations such as waiting for the device to show up, and mount
 // it to a specific location, according to the type of handler chosen, and for
 // each storage.
-pub fn add_storages(
+pub async fn add_storages(
    logger: Logger,
    storages: Vec<Storage>,
    sandbox: Arc<Mutex<Sandbox>>,
@@ -443,17 +522,33 @@ pub fn add_storages(
            "subsystem" => "storage",
            "storage-type" => handler_name.to_owned()));

-        let handler = STORAGEHANDLERLIST
-            .get(&handler_name.as_str())
-            .ok_or_else(|| {
-                anyhow!(
+        let res = match handler_name.as_str() {
+            DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_VIRTIOFS_TYPE => {
+                virtiofs_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_EPHEMERAL_TYPE => {
+                ephemeral_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_MMIO_BLK_TYPE => {
+                virtiommio_blk_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_LOCAL_TYPE => local_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_SCSI_TYPE => {
+                virtio_scsi_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await,
+            _ => {
+                return Err(anyhow!(
                    "Failed to find the storage handler {}",
                    storage.driver.to_owned()
-                )
-            })?;
+                ));
+            }
+        };

        // Todo need to rollback the mounted storage if err met.
-        let mount_point = handler(&logger, &storage, sandbox.clone())?;
+        let mount_point = res?;

        if !mount_point.is_empty() {
            mount_list.push(mount_point);
@@ -463,7 +558,7 @@ pub fn add_storages(
    Ok(mount_list)
 }

-fn mount_to_rootfs(logger: &Logger, m: &INIT_MOUNT) -> Result<()> {
+fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    let options_vec: Vec<&str> = m.options.clone();

    let (flags, options) = parse_mount_flags_and_options(options_vec);
@@ -506,7 +601,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {
 // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
 // any error ecountered.
 pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
-    if mount_point == "" {
+    if mount_point.is_empty() {
        return Err(anyhow!("Invalid mount point {}", mount_point));
    }

@@ -539,11 +634,11 @@ pub fn get_cgroup_mounts(
    logger: &Logger,
    cg_path: &str,
    unified_cgroup_hierarchy: bool,
-) -> Result<Vec<INIT_MOUNT>> {
+) -> Result<Vec<InitMount>> {
    // cgroup v2
    // https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249
    if unified_cgroup_hierarchy {
-        return Ok(vec![INIT_MOUNT {
+        return Ok(vec![InitMount {
            fstype: "cgroup2",
            src: "cgroup2",
            dest: "/sys/fs/cgroup",
@@ -555,7 +650,7 @@ pub fn get_cgroup_mounts(
    let reader = BufReader::new(file);

    let mut has_device_cgroup = false;
-    let mut cg_mounts: Vec<INIT_MOUNT> = vec![INIT_MOUNT {
+    let mut cg_mounts: Vec<InitMount> = vec![InitMount {
        fstype: "tmpfs",
        src: "tmpfs",
        dest: SYSFS_CGROUPPATH,
@@ -591,7 +686,7 @@ pub fn get_cgroup_mounts(
            }
        }

-        if fields[0] == "" {
+        if fields[0].is_empty() {
            continue;
        }

@@ -601,7 +696,7 @@ pub fn get_cgroup_mounts(

        if let Some(value) = CGROUPS.get(&fields[0]) {
            let key = CGROUPS.keys().find(|&&f| f == fields[0]).unwrap();
-            cg_mounts.push(INIT_MOUNT {
+            cg_mounts.push(InitMount {
                fstype: "cgroup",
                src: "cgroup",
                dest: *value,
@@ -615,7 +710,7 @@ pub fn get_cgroup_mounts(
        return Ok(Vec::new());
    }

-    cg_mounts.push(INIT_MOUNT {
+    cg_mounts.push(InitMount {
        fstype: "tmpfs",
        src: "tmpfs",
        dest: SYSFS_CGROUPPATH,
@@ -798,7 +893,7 @@ mod tests {
            let src_filename: String;
            let dest_filename: String;

-            if d.src != "" {
+            if !d.src.is_empty() {
                src = dir.path().join(d.src.to_string());
                src_filename = src
                    .to_str()
@@ -808,7 +903,7 @@ mod tests {
                src_filename = "".to_owned();
            }

-            if d.dest != "" {
+            if !d.dest.is_empty() {
                dest = dir.path().join(d.dest.to_string());
                dest_filename = dest
                    .to_str()
@@ -820,7 +915,7 @@ mod tests {

            // Create the mount directories
            for d in [src_filename.clone(), dest_filename.clone()].iter() {
-                if d == "" {
+                if d.is_empty() {
                    continue;
                }

@@ -840,8 +935,8 @@ mod tests {

            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
-                assert!(result.is_ok(), msg);
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);

                // Cleanup
                unsafe {
@@ -853,7 +948,7 @@ mod tests {

                    let msg = format!("{}: umount result: {:?}", msg, result);

-                    assert!(ret == 0, msg);
+                    assert!(ret == 0, "{}", msg);
                };

                continue;
@@ -861,10 +956,18 @@ mod tests {

            let err = result.unwrap_err();
            let error_msg = format!("{}", err);
-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

+    #[test]
+    fn test_is_mounted() {
+        assert!(is_mounted("/proc").unwrap());
+        assert!(!is_mounted("").unwrap());
+        assert!(!is_mounted("!").unwrap());
+        assert!(!is_mounted("/not_existing_path").unwrap());
+    }
+
    #[test]
    fn test_remove_mounts() {
        skip_if_not_root!();
@@ -958,14 +1061,14 @@ mod tests {

            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
-                assert!(result.is_ok(), msg);
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);
                continue;
            }

            let error_msg = format!("{:#}", result.unwrap_err());

-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1041,6 +1144,7 @@ mod tests {

            assert!(
                format!("{}", err).contains("No such file or directory"),
+                "{}",
                msg
            );
        }
@@ -1066,16 +1170,16 @@ mod tests {
            // add more details if an assertion fails
            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
+            if d.error_contains.is_empty() {
                let fs_type = result.unwrap();

-                assert!(d.fs_type == fs_type, msg);
+                assert!(d.fs_type == fs_type, "{}", msg);

                continue;
            }

            let error_msg = format!("{}", result.unwrap_err());
-            assert!(error_msg.contains(d.error_contains), msg);
+            assert!(error_msg.contains(d.error_contains), "{}", msg);
        }
    }

@@ -1113,21 +1217,21 @@ mod tests {
        let drain = slog::Discard;
        let logger = slog::Logger::root(drain, o!());

-        let first_mount = INIT_MOUNT {
+        let first_mount = InitMount {
            fstype: "tmpfs",
            src: "tmpfs",
            dest: SYSFS_CGROUPPATH,
            options: vec!["nosuid", "nodev", "noexec", "mode=755"],
        };

-        let last_mount = INIT_MOUNT {
+        let last_mount = InitMount {
            fstype: "tmpfs",
            src: "tmpfs",
            dest: SYSFS_CGROUPPATH,
            options: vec!["remount", "ro", "nosuid", "nodev", "noexec", "mode=755"],
        };

-        let cg_devices_mount = INIT_MOUNT {
+        let cg_devices_mount = InitMount {
            fstype: "cgroup",
            src: "cgroup",
            dest: "/sys/fs/cgroup/devices",
@@ -1223,35 +1327,35 @@ mod tests {
            let result = get_cgroup_mounts(&logger, filename, false);
            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains != "" {
-                assert!(result.is_err(), msg);
+            if !d.error_contains.is_empty() {
+                assert!(result.is_err(), "{}", msg);

                let error_msg = format!("{}", result.unwrap_err());
-                assert!(error_msg.contains(d.error_contains), msg);
+                assert!(error_msg.contains(d.error_contains), "{}", msg);
                continue;
            }

-            assert!(result.is_ok(), msg);
+            assert!(result.is_ok(), "{}", msg);

            let mounts = result.unwrap();
            let count = mounts.len();

            if !d.devices_cgroup {
-                assert!(count == 0, msg);
+                assert!(count == 0, "{}", msg);
                continue;
            }

            // get_cgroup_mounts() adds the device cgroup plus two other mounts.
-            assert!(count == (1 + 2), msg);
+            assert!(count == (1 + 2), "{}", msg);

            // First mount
-            assert!(mounts[0].eq(&first_mount), msg);
+            assert!(mounts[0].eq(&first_mount), "{}", msg);

            // Last mount
-            assert!(mounts[2].eq(&last_mount), msg);
+            assert!(mounts[2].eq(&last_mount), "{}", msg);

            // Devices cgroup
-            assert!(mounts[1].eq(&cg_devices_mount), msg);
+            assert!(mounts[1].eq(&cg_devices_mount), "{}", msg);
        }
    }
 }
--- a/src/agent/src/namespace.rs
+++ b/src/agent/src/namespace.rs
@@ -11,7 +11,6 @@ use std::fmt;
 use std::fs;
 use std::fs::File;
 use std::path::{Path, PathBuf};
-use std::thread::{self};

 use crate::mount::{BareMount, FLAGS};
 use slog::Logger;
@@ -46,29 +45,30 @@ impl Namespace {
            logger: logger.clone(),
            path: String::from(""),
            persistent_ns_dir: String::from(PERSISTENT_NS_DIR),
-            ns_type: NamespaceType::IPC,
+            ns_type: NamespaceType::Ipc,
            hostname: None,
        }
    }

    pub fn get_ipc(mut self) -> Self {
-        self.ns_type = NamespaceType::IPC;
+        self.ns_type = NamespaceType::Ipc;
        self
    }

    pub fn get_uts(mut self, hostname: &str) -> Self {
-        self.ns_type = NamespaceType::UTS;
-        if hostname != "" {
+        self.ns_type = NamespaceType::Uts;
+        if !hostname.is_empty() {
            self.hostname = Some(String::from(hostname));
        }
        self
    }

    pub fn get_pid(mut self) -> Self {
-        self.ns_type = NamespaceType::PID;
+        self.ns_type = NamespaceType::Pid;
        self
    }

+    #[allow(dead_code)]
    pub fn set_root_dir(mut self, dir: &str) -> Self {
        self.persistent_ns_dir = dir.to_string();
        self
@@ -76,12 +76,12 @@ impl Namespace {

    // setup creates persistent namespace without switching to it.
    // Note, pid namespaces cannot be persisted.
-    pub fn setup(mut self) -> Result<Self> {
+    pub async fn setup(mut self) -> Result<Self> {
        fs::create_dir_all(&self.persistent_ns_dir)?;

        let ns_path = PathBuf::from(&self.persistent_ns_dir);
        let ns_type = self.ns_type;
-        if ns_type == NamespaceType::PID {
+        if ns_type == NamespaceType::Pid {
            return Err(anyhow!("Cannot persist namespace of PID type"));
        }
        let logger = self.logger.clone();
@@ -93,45 +93,51 @@ impl Namespace {
        self.path = new_ns_path.clone().into_os_string().into_string().unwrap();
        let hostname = self.hostname.clone();

-        let new_thread = thread::spawn(move || -> Result<()> {
-            let origin_ns_path = get_current_thread_ns_path(&ns_type.get());
+        let new_thread = tokio::spawn(async move {
+            if let Err(err) = || -> Result<()> {
+                let origin_ns_path = get_current_thread_ns_path(&ns_type.get());

-            File::open(Path::new(&origin_ns_path))?;
+                File::open(Path::new(&origin_ns_path))?;

-            // Create a new netns on the current thread.
-            let cf = ns_type.get_flags();
+                // Create a new netns on the current thread.
+                let cf = ns_type.get_flags();

-            unshare(cf)?;
+                unshare(cf)?;

-            if ns_type == NamespaceType::UTS && hostname.is_some() {
-                nix::unistd::sethostname(hostname.unwrap())?;
+                if ns_type == NamespaceType::Uts && hostname.is_some() {
+                    nix::unistd::sethostname(hostname.unwrap())?;
+                }
+                // Bind mount the new namespace from the current thread onto the mount point to persist it.
+                let source: &str = origin_ns_path.as_str();
+                let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
+
+                let mut flags = MsFlags::empty();
+
+                if let Some(x) = FLAGS.get("rbind") {
+                    let (_, f) = *x;
+                    flags |= f;
+                };
+
+                let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
+                bare_mount.mount().map_err(|e| {
+                    anyhow!(
+                        "Failed to mount {} to {} with err:{:?}",
+                        source,
+                        destination,
+                        e
+                    )
+                })?;
+
+                Ok(())
+            }() {
+                return Err(err);
            }
-            // Bind mount the new namespace from the current thread onto the mount point to persist it.
-            let source: &str = origin_ns_path.as_str();
-            let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
-
-            let mut flags = MsFlags::empty();
-
-            if let Some(x) = FLAGS.get("rbind") {
-                let (_, f) = *x;
-                flags |= f;
-            };
-
-            let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
-            bare_mount.mount().map_err(|e| {
-                anyhow!(
-                    "Failed to mount {} to {} with err:{:?}",
-                    source,
-                    destination,
-                    e
-                )
-            })?;

            Ok(())
        });

        new_thread
-            .join()
+            .await
            .map_err(|e| anyhow!("Failed to join thread {:?}!", e))??;

        Ok(self)
@@ -141,27 +147,27 @@ impl Namespace {
 /// Represents the Namespace type.
 #[derive(Clone, Copy, PartialEq)]
 enum NamespaceType {
-    IPC,
-    UTS,
-    PID,
+    Ipc,
+    Uts,
+    Pid,
 }

 impl NamespaceType {
    /// Get the string representation of the namespace type.
    pub fn get(&self) -> &str {
        match *self {
-            Self::IPC => "ipc",
-            Self::UTS => "uts",
-            Self::PID => "pid",
+            Self::Ipc => "ipc",
+            Self::Uts => "uts",
+            Self::Pid => "pid",
        }
    }

    /// Get the associate flags with the namespace type.
    pub fn get_flags(&self) -> CloneFlags {
        match *self {
-            Self::IPC => CloneFlags::CLONE_NEWIPC,
-            Self::UTS => CloneFlags::CLONE_NEWUTS,
-            Self::PID => CloneFlags::CLONE_NEWPID,
+            Self::Ipc => CloneFlags::CLONE_NEWIPC,
+            Self::Uts => CloneFlags::CLONE_NEWUTS,
+            Self::Pid => CloneFlags::CLONE_NEWPID,
        }
    }
 }
@@ -172,12 +178,6 @@ impl fmt::Debug for NamespaceType {
    }
 }

-impl Default for NamespaceType {
-    fn default() -> Self {
-        NamespaceType::IPC
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::{Namespace, NamespaceType};
@@ -185,8 +185,8 @@ mod tests {
    use nix::sched::CloneFlags;
    use tempfile::Builder;

-    #[test]
-    fn test_setup_persistent_ns() {
+    #[tokio::test]
+    async fn test_setup_persistent_ns() {
        skip_if_not_root!();
        // Create dummy logger and temp folder.
        let logger = slog::Logger::root(slog::Discard, o!());
@@ -195,7 +195,8 @@ mod tests {
        let ns_ipc = Namespace::new(&logger)
            .get_ipc()
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_ipc.is_ok());
        assert!(remove_mounts(&[ns_ipc.unwrap().path]).is_ok());
@@ -206,7 +207,8 @@ mod tests {
        let ns_uts = Namespace::new(&logger)
            .get_uts("test_hostname")
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_uts.is_ok());
        assert!(remove_mounts(&[ns_uts.unwrap().path]).is_ok());
@@ -218,22 +220,23 @@ mod tests {
        let ns_pid = Namespace::new(&logger)
            .get_pid()
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_pid.is_err());
    }

    #[test]
    fn test_namespace_type() {
-        let ipc = NamespaceType::IPC;
+        let ipc = NamespaceType::Ipc;
        assert_eq!("ipc", ipc.get());
        assert_eq!(CloneFlags::CLONE_NEWIPC, ipc.get_flags());

-        let uts = NamespaceType::UTS;
+        let uts = NamespaceType::Uts;
        assert_eq!("uts", uts.get());
        assert_eq!(CloneFlags::CLONE_NEWUTS, uts.get_flags());

-        let pid = NamespaceType::PID;
+        let pid = NamespaceType::Pid;
        assert_eq!("pid", pid.get());
        assert_eq!(CloneFlags::CLONE_NEWPID, pid.get_flags());
    }
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
--- a/src/agent/src/network.rs
+++ b/src/agent/src/network.rs
@@ -139,10 +139,10 @@ mod tests {
        assert_eq!(true, content.is_ok());
        let content = content.unwrap();

-        let expected_DNS: Vec<&str> = content.split('\n').collect();
+        let expected_dns: Vec<&str> = content.split('\n').collect();

        // assert the data are the same as /run/kata-containers/sandbox/resolv.conf
-        assert_eq!(dns, expected_DNS);
+        assert_eq!(dns, expected_dns);

        // umount /etc/resolv.conf
        let _ = mount::umount(dst_filename);
--- a/src/agent/src/pci.rs
+++ b/src/agent/src/pci.rs
@@ -0,0 +1,168 @@
+// Copyright Red Hat.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+use std::convert::TryInto;
+use std::fmt;
+use std::ops::Deref;
+use std::str::FromStr;
+
+use anyhow::anyhow;
+
+// The PCI spec reserves 5 bits for slot number (a.k.a. device
+// number), giving slots 0..31
+const SLOT_BITS: u8 = 5;
+const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
+
+// Represents a PCI function's slot number (a.k.a. device number),
+// giving its location on a single bus
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Slot(u8);
+
+impl Slot {
+    pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
+        if let Ok(v8) = v.try_into() {
+            if v8 <= SLOT_MAX {
+                return Ok(Slot(v8));
+            }
+        }
+        Err(anyhow!(
+            "PCI slot {} should be in range [0..{:#x}]",
+            v,
+            SLOT_MAX
+        ))
+    }
+}
+
+impl FromStr for Slot {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let v = isize::from_str_radix(s, 16)?;
+        Slot::new(v)
+    }
+}
+
+impl fmt::Display for Slot {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "{:02x}", self.0)
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Path(Vec<Slot>);
+
+impl Path {
+    pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
+        if slots.is_empty() {
+            return Err(anyhow!("PCI path must have at least one element"));
+        }
+        Ok(Path(slots))
+    }
+}
+
+// Let Path be treated as a slice of Slots
+impl Deref for Path {
+    type Target = [Slot];
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl fmt::Display for Path {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        let sslots: Vec<String> = self
+            .0
+            .iter()
+            .map(std::string::ToString::to_string)
+            .collect();
+        write!(f, "{}", sslots.join("/"))
+    }
+}
+
+impl FromStr for Path {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
+        Path::new(rslots?)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::pci::{Path, Slot};
+    use std::str::FromStr;
+
+    #[test]
+    fn test_slot() {
+        // Valid slots
+        let slot = Slot::new(0x00).unwrap();
+        assert_eq!(format!("{}", slot), "00");
+
+        let slot = Slot::from_str("00").unwrap();
+        assert_eq!(format!("{}", slot), "00");
+
+        let slot = Slot::new(31).unwrap();
+        let slot2 = Slot::from_str("1f").unwrap();
+        assert_eq!(slot, slot2);
+
+        // Bad slots
+        let slot = Slot::new(-1);
+        assert!(slot.is_err());
+
+        let slot = Slot::new(32);
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("20");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("xy");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("00/");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("");
+        assert!(slot.is_err());
+    }
+
+    #[test]
+    fn test_path() {
+        let slot3 = Slot::new(0x03).unwrap();
+        let slot4 = Slot::new(0x04).unwrap();
+        let slot5 = Slot::new(0x05).unwrap();
+
+        // Valid paths
+        let pcipath = Path::new(vec![slot3]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03");
+        let pcipath2 = Path::from_str("03").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 1);
+        assert_eq!(pcipath[0], slot3);
+
+        let pcipath = Path::new(vec![slot3, slot4]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03/04");
+        let pcipath2 = Path::from_str("03/04").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 2);
+        assert_eq!(pcipath[0], slot3);
+        assert_eq!(pcipath[1], slot4);
+
+        let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03/04/05");
+        let pcipath2 = Path::from_str("03/04/05").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 3);
+        assert_eq!(pcipath[0], slot3);
+        assert_eq!(pcipath[1], slot4);
+        assert_eq!(pcipath[2], slot5);
+
+        // Bad paths
+        assert!(Path::new(vec!()).is_err());
+        assert!(Path::from_str("20").is_err());
+        assert!(Path::from_str("//").is_err());
+        assert!(Path::from_str("xyz").is_err());
+    }
+}
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -4,12 +4,13 @@
 //

 use crate::linux_abi::*;
-use crate::mount::{get_mount_fs_type, remove_mounts, TYPEROOTFS};
+use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
 use crate::namespace::Namespace;
+use crate::netlink::Handle;
 use crate::network::Network;
+use crate::uevent::{Uevent, UeventMatcher};
 use anyhow::{anyhow, Context, Result};
 use libc::pid_t;
-use netlink::{RtnlHandle, NETLINK_ROUTE};
 use oci::{Hook, Hooks};
 use protocols::agent::OnlineCPUMemRequest;
 use regex::Regex;
@@ -22,9 +23,13 @@ use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
-use std::sync::mpsc::{self, Receiver, Sender};
-use std::sync::{Arc, Mutex};
+use std::sync::Arc;
 use std::{thread, time};
+use tokio::sync::mpsc::{channel, Receiver, Sender};
+use tokio::sync::oneshot;
+use tokio::sync::Mutex;
+
+type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);

 #[derive(Debug)]
 pub struct Sandbox {
@@ -35,25 +40,26 @@ pub struct Sandbox {
    pub network: Network,
    pub mounts: Vec<String>,
    pub container_mounts: HashMap<String, Vec<String>>,
-    pub pci_device_map: HashMap<String, String>,
+    pub uevent_map: HashMap<String, Uevent>,
+    pub uevent_watchers: Vec<Option<UeventWatcher>>,
    pub shared_utsns: Namespace,
    pub shared_ipcns: Namespace,
    pub sandbox_pidns: Option<Namespace>,
    pub storages: HashMap<String, u32>,
    pub running: bool,
    pub no_pivot_root: bool,
-    pub sender: Option<Sender<i32>>,
-    pub rtnl: Option<RtnlHandle>,
+    pub sender: Option<tokio::sync::oneshot::Sender<i32>>,
+    pub rtnl: Handle,
    pub hooks: Option<Hooks>,
    pub event_rx: Arc<Mutex<Receiver<String>>>,
-    pub event_tx: Sender<String>,
+    pub event_tx: Option<Sender<String>>,
 }

 impl Sandbox {
    pub fn new(logger: &Logger) -> Result<Self> {
        let fs_type = get_mount_fs_type("/")?;
        let logger = logger.new(o!("subsystem" => "sandbox"));
-        let (tx, rx) = mpsc::channel::<String>();
+        let (tx, rx) = channel::<String>(100);
        let event_rx = Arc::new(Mutex::new(rx));

        Ok(Sandbox {
@@ -64,18 +70,19 @@ impl Sandbox {
            containers: HashMap::new(),
            mounts: Vec::new(),
            container_mounts: HashMap::new(),
-            pci_device_map: HashMap::new(),
+            uevent_map: HashMap::new(),
+            uevent_watchers: Vec::new(),
            shared_utsns: Namespace::new(&logger),
            shared_ipcns: Namespace::new(&logger),
            sandbox_pidns: None,
            storages: HashMap::new(),
            running: false,
-            no_pivot_root: fs_type.eq(TYPEROOTFS),
+            no_pivot_root: fs_type.eq(TYPE_ROOTFS),
            sender: None,
-            rtnl: Some(RtnlHandle::new(NETLINK_ROUTE, 0).unwrap()),
+            rtnl: Handle::new()?,
            hooks: None,
            event_rx,
-            event_tx: tx,
+            event_tx: Some(tx),
        })
    }

@@ -149,25 +156,19 @@ impl Sandbox {
        Ok(())
    }

-    pub fn is_running(&self) -> bool {
-        self.running
-    }
-
-    pub fn set_hostname(&mut self, hostname: String) {
-        self.hostname = hostname;
-    }
-
-    pub fn setup_shared_namespaces(&mut self) -> Result<bool> {
+    pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
        // Set up shared IPC namespace
        self.shared_ipcns = Namespace::new(&self.logger)
            .get_ipc()
            .setup()
+            .await
            .context("Failed to setup persistent IPC namespace")?;

        // // Set up shared UTS namespace
        self.shared_utsns = Namespace::new(&self.logger)
            .get_uts(self.hostname.as_str())
            .setup()
+            .await
            .context("Failed to setup persistent UTS namespace")?;

        Ok(true)
@@ -214,9 +215,9 @@ impl Sandbox {
        None
    }

-    pub fn destroy(&mut self) -> Result<()> {
+    pub async fn destroy(&mut self) -> Result<()> {
        for ctr in self.containers.values_mut() {
-            ctr.destroy()?;
+            ctr.destroy().await?;
        }
        Ok(())
    }
@@ -315,15 +316,32 @@ impl Sandbox {
        Ok(hooks)
    }

-    pub fn run_oom_event_monitor(&self, rx: Receiver<String>, container_id: String) {
-        let tx = self.event_tx.clone();
+    pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
        let logger = self.logger.clone();

-        thread::spawn(move || {
-            for event in rx {
+        if self.event_tx.is_none() {
+            error!(
+                logger,
+                "sandbox.event_tx not found in run_oom_event_monitor"
+            );
+            return;
+        }
+
+        let tx = self.event_tx.as_ref().unwrap().clone();
+
+        tokio::spawn(async move {
+            loop {
+                let event = rx.recv().await;
+                // None means the container has exited,
+                // and sender in OOM notifier is dropped.
+                if event.is_none() {
+                    return;
+                }
                info!(logger, "got an OOM event {:?}", event);
+
                let _ = tx
                    .send(container_id.clone())
+                    .await
                    .map_err(|e| error!(logger, "failed to send message: {:?}", e));
            }
        });
@@ -383,7 +401,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
            logger,
            SYSFS_CPU_ONLINE_PATH,
            r"cpu[0-9]+",
-            (num - onlined_count),
+            num - onlined_count,
        );
        if r.is_err() {
            return r;
@@ -428,8 +446,8 @@ mod tests {
        baremount.mount()
    }

-    #[test]
-    fn set_sandbox_storage() {
+    #[tokio::test]
+    async fn set_sandbox_storage() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -462,8 +480,8 @@ mod tests {
        );
    }

-    #[test]
-    fn remove_sandbox_storage() {
+    #[tokio::test]
+    async fn remove_sandbox_storage() {
        skip_if_not_root!();

        let logger = slog::Logger::root(slog::Discard, o!());
@@ -518,9 +536,9 @@ mod tests {
        assert!(s.remove_sandbox_storage(destdir_path).is_ok());
    }

-    #[test]
+    #[tokio::test]
    #[allow(unused_assignments)]
-    fn unset_and_remove_sandbox_storage() {
+    async fn unset_and_remove_sandbox_storage() {
        skip_if_not_root!();

        let logger = slog::Logger::root(slog::Discard, o!());
@@ -570,8 +588,8 @@ mod tests {
        assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err());
    }

-    #[test]
-    fn unset_sandbox_storage() {
+    #[tokio::test]
+    async fn unset_sandbox_storage() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -624,13 +642,16 @@ mod tests {
    }

    fn create_dummy_opts() -> CreateOpts {
-        let mut root = Root::default();
-        root.path = String::from("/");
+        let root = Root {
+            path: String::from("/"),
+            ..Default::default()
+        };

-        let linux = Linux::default();
-        let mut spec = Spec::default();
-        spec.root = Some(root);
-        spec.linux = Some(linux);
+        let spec = Spec {
+            linux: Some(Linux::default()),
+            root: Some(root),
+            ..Default::default()
+        };

        CreateOpts {
            cgroup_name: "".to_string(),
@@ -653,8 +674,8 @@ mod tests {
        .unwrap()
    }

-    #[test]
-    fn get_container_entry_exist() {
+    #[tokio::test]
+    async fn get_container_entry_exist() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -666,8 +687,8 @@ mod tests {
        assert!(cnt.is_some());
    }

-    #[test]
-    fn get_container_no_entry() {
+    #[tokio::test]
+    async fn get_container_no_entry() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -675,8 +696,8 @@ mod tests {
        assert!(cnt.is_none());
    }

-    #[test]
-    fn add_and_get_container() {
+    #[tokio::test]
+    async fn add_and_get_container() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -685,8 +706,9 @@ mod tests {
        s.add_container(linux_container);
        assert!(s.get_container("some_id").is_some());
    }
-    #[test]
-    fn update_shared_pidns() {
+
+    #[tokio::test]
+    async fn update_shared_pidns() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -702,8 +724,9 @@ mod tests {
        let ns_path = format!("/proc/{}/ns/pid", test_pid);
        assert_eq!(s.sandbox_pidns.unwrap().path, ns_path);
    }
-    #[test]
-    fn add_guest_hooks() {
+
+    #[tokio::test]
+    async fn add_guest_hooks() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
        let tmpdir = Builder::new().tempdir().unwrap();
@@ -725,30 +748,11 @@ mod tests {
        assert!(s.hooks.as_ref().unwrap().poststop.is_empty());
    }

-    #[test]
-    pub fn test_sandbox_is_running() {
+    #[tokio::test]
+    async fn test_sandbox_set_destroy() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
-        s.running = true;
-        assert!(s.is_running());
-        s.running = false;
-        assert!(!s.is_running());
-    }
-
-    #[test]
-    fn test_sandbox_set_hostname() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let mut s = Sandbox::new(&logger).unwrap();
-        let hostname = "abc123";
-        s.set_hostname(hostname.to_string());
-        assert_eq!(s.hostname, hostname);
-    }
-
-    #[test]
-    fn test_sandbox_set_destroy() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let mut s = Sandbox::new(&logger).unwrap();
-        let ret = s.destroy();
+        let ret = s.destroy().await;
        assert!(ret.is_ok());
    }
 }
--- a/src/agent/src/signal.rs
+++ b/src/agent/src/signal.rs
@@ -0,0 +1,155 @@
+// Copyright (c) 2019-2020 Ant Financial
+// Copyright (c) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::sandbox::Sandbox;
+use anyhow::{anyhow, Result};
+use nix::sys::wait::WaitPidFlag;
+use nix::sys::wait::{self, WaitStatus};
+use nix::unistd;
+use prctl::set_child_subreaper;
+use slog::{error, info, o, Logger};
+use std::sync::Arc;
+use tokio::select;
+use tokio::signal::unix::{signal, SignalKind};
+use tokio::sync::watch::Receiver;
+use tokio::sync::Mutex;
+use unistd::Pid;
+
+async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
+    info!(logger, "handling signal"; "signal" => "SIGCHLD");
+
+    loop {
+        // Avoid reaping the undesirable child's signal, e.g., execute_hook's
+        // The lock should be released immediately.
+        rustjail::container::WAIT_PID_LOCKER.lock().await;
+        let result = wait::waitpid(
+            Some(Pid::from_raw(-1)),
+            Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
+        );
+
+        let wait_status = match result {
+            Ok(s) => {
+                if s == WaitStatus::StillAlive {
+                    return Ok(());
+                }
+                s
+            }
+            Err(e) => return Err(anyhow!(e).context("waitpid reaper failed")),
+        };
+
+        info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
+
+        if let Some(pid) = wait_status.pid() {
+            let raw_pid = pid.as_raw();
+            let child_pid = format!("{}", raw_pid);
+
+            let logger = logger.new(o!("child-pid" => child_pid));
+
+            let sandbox_ref = sandbox.clone();
+            let mut sandbox = sandbox_ref.lock().await;
+
+            let process = sandbox.find_process(raw_pid);
+            if process.is_none() {
+                info!(logger, "child exited unexpectedly");
+                continue;
+            }
+
+            let mut p = process.unwrap();
+            let ret: i32;
+
+            match wait_status {
+                WaitStatus::Exited(_, c) => ret = c,
+                WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
+                _ => {
+                    info!(logger, "got wrong status for process";
+                                  "child-status" => format!("{:?}", wait_status));
+                    continue;
+                }
+            }
+
+            p.exit_code = ret;
+            let _ = p.exit_tx.take();
+
+            info!(logger, "notify term to close");
+            // close the socket file to notify readStdio to close terminal specifically
+            // in case this process's terminal has been inherited by its children.
+            p.notify_term_close();
+        }
+    }
+}
+
+pub async fn setup_signal_handler(
+    logger: Logger,
+    sandbox: Arc<Mutex<Sandbox>>,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "signals"));
+
+    set_child_subreaper(true)
+        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
+
+    let mut sigchild_stream = signal(SignalKind::child())?;
+
+    loop {
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "got shutdown request");
+                break;
+            }
+
+            _ = sigchild_stream.recv() => {
+                let result = handle_sigchild(logger.clone(), sandbox.clone()).await;
+
+                match result {
+                    Ok(()) => (),
+                    Err(e) => {
+                        // Log errors, but don't abort - just wait for more signals!
+                        error!(logger, "failed to handle signal"; "error" => format!("{:?}", e));
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::pin;
+    use tokio::sync::watch::channel;
+    use tokio::time::Duration;
+
+    #[tokio::test]
+    async fn test_setup_signal_handler() {
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let s = Sandbox::new(&logger).unwrap();
+
+        let sandbox = Arc::new(Mutex::new(s));
+
+        let (tx, rx) = channel(true);
+
+        let handle = tokio::spawn(setup_signal_handler(logger, sandbox, rx));
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        tx.send(true).expect("failed to request shutdown");
+
+        loop {
+            select! {
+                _ = handle => {
+                    println!("INFO: task completed");
+                    break;
+                },
+                _ = &mut timeout => {
+                    panic!("signal thread failed to stop");
+                }
+            }
+        }
+    }
+}
--- a/src/agent/src/uevent.rs
+++ b/src/agent/src/uevent.rs
@@ -6,20 +6,38 @@
 use crate::device::online_device;
 use crate::linux_abi::*;
 use crate::sandbox::Sandbox;
-use crate::GLOBAL_DEVICE_WATCHER;
-use netlink::{RtnlHandle, NETLINK_UEVENT};
+use crate::AGENT_CONFIG;
 use slog::Logger;
-use std::sync::{Arc, Mutex};
-use std::thread;

-#[derive(Debug, Default)]
-struct Uevent {
-    action: String,
-    devpath: String,
-    devname: String,
-    subsystem: String,
+use anyhow::{anyhow, Result};
+use netlink_sys::{protocols, SocketAddr, TokioSocket};
+use nix::errno::Errno;
+use std::fmt::Debug;
+use std::os::unix::io::FromRawFd;
+use std::sync::Arc;
+use tokio::select;
+use tokio::sync::watch::Receiver;
+use tokio::sync::Mutex;
+
+// Convenience macro to obtain the scope logger
+macro_rules! sl {
+    () => {
+        slog_scope::logger().new(o!("subsystem" => "uevent"))
+    };
+}
+
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct Uevent {
+    pub action: String,
+    pub devpath: String,
+    pub devname: String,
+    pub subsystem: String,
    seqnum: String,
-    interface: String,
+    pub interface: String,
+}
+
+pub trait UeventMatcher: Sync + Send + Debug + 'static {
+    fn is_match(&self, uev: &Uevent) -> bool;
 }

 impl Uevent {
@@ -46,105 +64,216 @@ impl Uevent {
        event
    }

-    // Check whether this is a block device hot-add event.
-    fn is_block_add_event(&self) -> bool {
-        let pci_root_bus_path = create_pci_root_bus_path();
-        self.action == U_EVENT_ACTION_ADD
-            && self.subsystem == "block"
-            && self.devpath.starts_with(&pci_root_bus_path)
-            && self.devname != ""
-    }
+    async fn process_add(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
+        // Special case for memory hot-adds first
+        let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
+        if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
+            let _ = online_device(online_path.as_ref()).map_err(|e| {
+                error!(
+                    *logger,
+                    "failed to online device";
+                    "device" => &self.devpath,
+                    "error" => format!("{}", e),
+                )
+            });
+            return;
+        }

-    fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
-        let pci_root_bus_path = create_pci_root_bus_path();
+        let mut sb = sandbox.lock().await;

-        // Keep the same lock order as device::get_device_name(), otherwise it may cause deadlock.
-        let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
-        let mut sb = sandbox.lock().unwrap();
-
-        // Add the device node name to the pci device map.
-        sb.pci_device_map
-            .insert(self.devpath.clone(), self.devname.clone());
+        // Record the event by sysfs path
+        sb.uevent_map.insert(self.devpath.clone(), self.clone());

        // Notify watchers that are interested in the udev event.
-        // Close the channel after watcher has been notified.
-        let devpath = self.devpath.clone();
-        let empties: Vec<_> = w
-            .iter()
-            .filter(|(dev_addr, _)| {
-                let pci_p = format!("{}/{}", pci_root_bus_path, *dev_addr);
-
-                // blk block device
-                devpath.starts_with(pci_p.as_str()) ||
-                    // scsi block device
-                    {
-                        (*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
-                            devpath.contains(*dev_addr)
-                    }
-            })
-            .map(|(k, sender)| {
-                let devname = self.devname.clone();
-                let _ = sender.send(devname);
-                k.clone()
-            })
-            .collect();
-
-        // Remove notified nodes from the watcher map.
-        for empty in empties {
-            w.remove(&empty);
+        for watch in &mut sb.uevent_watchers {
+            if let Some((matcher, _)) = watch {
+                if matcher.is_match(&self) {
+                    let (_, sender) = watch.take().unwrap();
+                    let _ = sender.send(self.clone());
+                }
+            }
        }
    }

-    fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
-        if self.is_block_add_event() {
-            return self.handle_block_add_event(sandbox);
-        } else if self.action == U_EVENT_ACTION_ADD {
-            let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
-            // It's a memory hot-add event.
-            if online_path.starts_with(SYSFS_MEMORY_ONLINE_PATH) {
-                let _ = online_device(online_path.as_ref()).map_err(|e| {
-                    error!(
-                        *logger,
-                        "failed to online device";
-                        "device" => &self.devpath,
-                        "error" => format!("{}", e),
-                    )
-                });
-                return;
-            }
+    async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
+        if self.action == U_EVENT_ACTION_ADD {
+            return self.process_add(logger, sandbox).await;
        }
        debug!(*logger, "ignoring event"; "uevent" => format!("{:?}", self));
    }
 }

-pub fn watch_uevents(sandbox: Arc<Mutex<Sandbox>>) {
-    thread::spawn(move || {
-        let rtnl = RtnlHandle::new(NETLINK_UEVENT, 1).unwrap();
-        let logger = sandbox
-            .lock()
-            .unwrap()
-            .logger
-            .new(o!("subsystem" => "uevent"));
+pub async fn wait_for_uevent(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    matcher: impl UeventMatcher,
+) -> Result<Uevent> {
+    let mut sb = sandbox.lock().await;
+    for uev in sb.uevent_map.values() {
+        if matcher.is_match(uev) {
+            info!(sl!(), "Device {:?} found in pci device map", uev);
+            return Ok(uev.clone());
+        }
+    }

-        loop {
-            match rtnl.recv_message() {
-                Err(e) => {
-                    error!(logger, "receive uevent message failed"; "error" => format!("{}", e))
-                }
-                Ok(data) => {
-                    let text = String::from_utf8(data);
-                    match text {
-                        Err(e) => {
-                            error!(logger, "failed to convert bytes to text"; "error" => format!("{}", e))
+    // If device is not found in the device map, hotplug event has not
+    // been received yet, create and add channel to the watchers map.
+    // The key of the watchers map is the device we are interested in.
+    // Note this is done inside the lock, not to miss any events from the
+    // global udev listener.
+    let (tx, rx) = tokio::sync::oneshot::channel::<Uevent>();
+    let idx = sb.uevent_watchers.len();
+    sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
+    drop(sb); // unlock
+
+    info!(sl!(), "Waiting on channel for uevent notification\n");
+    let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
+
+    let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
+        Ok(v) => v?,
+        Err(_) => {
+            let mut sb = sandbox.lock().await;
+            let matcher = sb.uevent_watchers[idx].take().unwrap().0;
+
+            return Err(anyhow!(
+                "Timeout after {:?} waiting for uevent {:?}",
+                hotplug_timeout,
+                &matcher
+            ));
+        }
+    };
+
+    Ok(uev)
+}
+
+pub async fn watch_uevents(
+    sandbox: Arc<Mutex<Sandbox>>,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let sref = sandbox.clone();
+    let s = sref.lock().await;
+    let logger = s.logger.new(o!("subsystem" => "uevent"));
+
+    // Unlock the sandbox to allow a successful shutdown
+    drop(s);
+
+    info!(logger, "starting uevents handler");
+
+    let mut socket;
+
+    unsafe {
+        let fd = libc::socket(
+            libc::AF_NETLINK,
+            libc::SOCK_DGRAM | libc::SOCK_CLOEXEC,
+            protocols::NETLINK_KOBJECT_UEVENT as libc::c_int,
+        );
+        socket = TokioSocket::from_raw_fd(fd);
+    }
+
+    socket.bind(&SocketAddr::new(0, 1))?;
+
+    loop {
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "got shutdown request");
+                break;
+            }
+            result = socket.recv_from_full() => {
+                match result {
+                    Err(e) => {
+                        error!(logger, "failed to receive uevent"; "error" => format!("{}", e))
+                    }
+                    Ok((buf, addr)) => {
+                        if addr.port_number() != 0 {
+                            // not our netlink message
+                            let err_msg = format!("{:?}", nix::Error::Sys(Errno::EBADMSG));
+                            error!(logger, "receive uevent message failed"; "error" => err_msg);
+                            continue;
                        }
-                        Ok(text) => {
-                            let event = Uevent::new(&text);
-                            info!(logger, "got uevent message"; "event" => format!("{:?}", event));
-                            event.process(&logger, &sandbox);
+
+                        let text = String::from_utf8(buf);
+                        match text {
+                            Err(e) => {
+                                error!(logger, "failed to convert bytes to text"; "error" => format!("{}", e))
+                            }
+                            Ok(text) => {
+                                let event = Uevent::new(&text);
+                                info!(logger, "got uevent message"; "event" => format!("{:?}", event));
+                                event.process(&logger, &sandbox).await;
+                            }
                        }
                    }
                }
            }
        }
+    }
+
+    Ok(())
+}
+
+// Used in the device module unit tests
+#[cfg(test)]
+pub(crate) fn spawn_test_watcher(sandbox: Arc<Mutex<Sandbox>>, uev: Uevent) {
+    tokio::spawn(async move {
+        loop {
+            let mut sb = sandbox.lock().await;
+            for w in &mut sb.uevent_watchers {
+                if let Some((matcher, _)) = w {
+                    if matcher.is_match(&uev) {
+                        let (_, sender) = w.take().unwrap();
+                        let _ = sender.send(uev);
+                        return;
+                    }
+                }
+            }
+            drop(sb); // unlock
+        }
    });
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Debug, Clone, Copy)]
+    struct AlwaysMatch();
+
+    impl UeventMatcher for AlwaysMatch {
+        fn is_match(&self, _: &Uevent) -> bool {
+            true
+        }
+    }
+
+    #[tokio::test]
+    async fn test_wait_for_uevent() {
+        let uev = Uevent {
+            action: crate::linux_abi::U_EVENT_ACTION_ADD.to_string(),
+            subsystem: "test".to_string(),
+            devpath: "/test/sysfs/path".to_string(),
+            devname: "testdevname".to_string(),
+            ..Default::default()
+        };
+
+        let matcher = AlwaysMatch();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let sandbox = Arc::new(Mutex::new(Sandbox::new(&logger).unwrap()));
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.insert(uev.devpath.clone(), uev.clone());
+        drop(sb); // unlock
+
+        let uev2 = wait_for_uevent(&sandbox, matcher).await;
+        assert!(uev2.is_ok());
+        assert_eq!(uev2.unwrap(), uev);
+
+        let mut sb = sandbox.lock().await;
+        sb.uevent_map.remove(&uev.devpath).unwrap();
+        drop(sb); // unlock
+
+        spawn_test_watcher(sandbox.clone(), uev.clone());
+
+        let uev2 = wait_for_uevent(&sandbox, matcher).await;
+        assert!(uev2.is_ok());
+        assert_eq!(uev2.unwrap(), uev);
+    }
+}
--- a/src/agent/src/util.rs
+++ b/src/agent/src/util.rs
@@ -0,0 +1,355 @@
+// Copyright (c) 2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use anyhow::Result;
+use futures::StreamExt;
+use std::io;
+use std::io::ErrorKind;
+use std::os::unix::io::{FromRawFd, RawFd};
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::sync::watch::Receiver;
+use tokio_vsock::{Incoming, VsockListener, VsockStream};
+
+// Size of I/O read buffer
+const BUF_SIZE: usize = 8192;
+
+// Interruptable I/O copy using readers and writers
+// (an interruptable version of "io::copy()").
+pub async fn interruptable_io_copier<R: Sized, W: Sized>(
+    mut reader: R,
+    mut writer: W,
+    mut shutdown: Receiver<bool>,
+) -> io::Result<u64>
+where
+    R: tokio::io::AsyncRead + Unpin,
+    W: tokio::io::AsyncWrite + Unpin,
+{
+    let mut total_bytes: u64 = 0;
+
+    let mut buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
+
+    loop {
+        tokio::select! {
+            _ = shutdown.changed() => {
+                eprintln!("INFO: interruptable_io_copier: got shutdown request");
+                break;
+            },
+
+            result = reader.read(&mut buf) => {
+                let bytes = match result {
+                    Ok(0) => return Ok(total_bytes),
+                    Ok(len) => len,
+                    Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
+                    Err(e) => return Err(e),
+                };
+
+                total_bytes += bytes as u64;
+
+                // Actually copy the data ;)
+                writer.write_all(&buf[..bytes]).await?;
+            },
+        };
+    }
+
+    Ok(total_bytes)
+}
+
+pub fn get_vsock_incoming(fd: RawFd) -> Incoming {
+    unsafe { VsockListener::from_raw_fd(fd).incoming() }
+}
+
+pub async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
+    let stream = get_vsock_incoming(fd).next().await.unwrap()?;
+    Ok(stream)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io;
+    use std::io::Cursor;
+    use std::io::Write;
+    use std::pin::Pin;
+    use std::sync::{Arc, Mutex};
+    use std::task::{Context, Poll, Poll::Ready};
+    use tokio::pin;
+    use tokio::select;
+    use tokio::sync::watch::channel;
+    use tokio::task::JoinError;
+    use tokio::time::Duration;
+
+    #[derive(Debug, Default, Clone)]
+    struct BufWriter {
+        data: Arc<Mutex<Vec<u8>>>,
+        slow_write: bool,
+        write_delay: Duration,
+    }
+
+    impl BufWriter {
+        fn new() -> Self {
+            BufWriter {
+                data: Arc::new(Mutex::new(Vec::<u8>::new())),
+                slow_write: false,
+                write_delay: Duration::new(0, 0),
+            }
+        }
+
+        fn write_vec(&mut self, buf: &[u8]) -> io::Result<usize> {
+            let vec_ref = self.data.clone();
+
+            let mut vec_locked = vec_ref.lock();
+
+            let mut v = vec_locked.as_deref_mut().unwrap();
+
+            if self.write_delay.as_nanos() > 0 {
+                std::thread::sleep(self.write_delay);
+            }
+
+            std::io::Write::write(&mut v, buf)
+        }
+    }
+
+    impl Write for BufWriter {
+        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+            self.write_vec(buf)
+        }
+
+        fn flush(&mut self) -> io::Result<()> {
+            let vec_ref = self.data.clone();
+
+            let mut vec_locked = vec_ref.lock();
+
+            let v = vec_locked.as_deref_mut().unwrap();
+
+            std::io::Write::flush(v)
+        }
+    }
+
+    impl tokio::io::AsyncWrite for BufWriter {
+        fn poll_write(
+            mut self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+            buf: &[u8],
+        ) -> Poll<Result<usize, io::Error>> {
+            let result = self.write_vec(buf);
+
+            Ready(result)
+        }
+
+        fn poll_flush(
+            self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+        ) -> Poll<Result<(), std::io::Error>> {
+            // NOP
+            Ready(Ok(()))
+        }
+
+        fn poll_shutdown(
+            self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+        ) -> Poll<Result<(), std::io::Error>> {
+            // NOP
+            Ready(Ok(()))
+        }
+    }
+
+    impl ToString for BufWriter {
+        fn to_string(&self) -> String {
+            let data_ref = self.data.clone();
+            let output = data_ref.lock().unwrap();
+            let s = (*output).clone();
+
+            String::from_utf8(s).unwrap()
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_interruptable_io_copier_reader() {
+        #[derive(Debug)]
+        struct TestData {
+            reader_value: String,
+            result: io::Result<u64>,
+        }
+
+        let tests = &[
+            TestData {
+                reader_value: "".into(),
+                result: Ok(0),
+            },
+            TestData {
+                reader_value: "a".into(),
+                result: Ok(1),
+            },
+            TestData {
+                reader_value: "foo".into(),
+                result: Ok(3),
+            },
+            TestData {
+                reader_value: "b".repeat(BUF_SIZE - 1),
+                result: Ok((BUF_SIZE - 1) as u64),
+            },
+            TestData {
+                reader_value: "c".repeat(BUF_SIZE),
+                result: Ok((BUF_SIZE) as u64),
+            },
+            TestData {
+                reader_value: "d".repeat(BUF_SIZE + 1),
+                result: Ok((BUF_SIZE + 1) as u64),
+            },
+            TestData {
+                reader_value: "e".repeat((2 * BUF_SIZE) - 1),
+                result: Ok(((2 * BUF_SIZE) - 1) as u64),
+            },
+            TestData {
+                reader_value: "f".repeat(2 * BUF_SIZE),
+                result: Ok((2 * BUF_SIZE) as u64),
+            },
+            TestData {
+                reader_value: "g".repeat((2 * BUF_SIZE) + 1),
+                result: Ok(((2 * BUF_SIZE) + 1) as u64),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            // Create a string containing details of the test
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let (tx, rx) = channel(true);
+            let reader = Cursor::new(d.reader_value.clone());
+            let writer = BufWriter::new();
+
+            // XXX: Pass a copy of the writer to the copier to allow the
+            // result of the write operation to be checked below.
+            let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
+
+            // Allow time for the thread to be spawned.
+            tokio::time::sleep(Duration::from_secs(1)).await;
+
+            let timeout = tokio::time::sleep(Duration::from_secs(1));
+            pin!(timeout);
+
+            // Since the readers only specify a small number of bytes, the
+            // copier will quickly read zero and kill the task, closing the
+            // Receiver.
+            assert!(tx.is_closed(), "{}", msg);
+
+            let spawn_result: std::result::Result<
+                std::result::Result<u64, std::io::Error>,
+                JoinError,
+            >;
+
+            let result: std::result::Result<u64, std::io::Error>;
+
+            select! {
+                res = handle => spawn_result = res,
+                _ = &mut timeout => panic!("timed out"),
+            }
+
+            assert!(spawn_result.is_ok());
+
+            result = spawn_result.unwrap();
+
+            assert!(result.is_ok());
+
+            let byte_count = result.unwrap() as usize;
+            assert_eq!(byte_count, d.reader_value.len(), "{}", msg);
+
+            let value = writer.to_string();
+            assert_eq!(value, d.reader_value, "{}", msg);
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_interruptable_io_copier_eof() {
+        // Create an async reader that always returns EOF
+        let reader = tokio::io::empty();
+
+        let (tx, rx) = channel(true);
+        let writer = BufWriter::new();
+
+        let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
+
+        // Allow time for the thread to be spawned.
+        tokio::time::sleep(Duration::from_secs(1)).await;
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        assert!(tx.is_closed());
+
+        let spawn_result: std::result::Result<std::result::Result<u64, std::io::Error>, JoinError>;
+
+        let result: std::result::Result<u64, std::io::Error>;
+
+        select! {
+            res = handle => spawn_result = res,
+            _ = &mut timeout => panic!("timed out"),
+        }
+
+        assert!(spawn_result.is_ok());
+
+        result = spawn_result.unwrap();
+
+        assert!(result.is_ok());
+
+        let byte_count = result.unwrap();
+        assert_eq!(byte_count, 0);
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_interruptable_io_copier_shutdown() {
+        // Create an async reader that creates an infinite stream of bytes
+        // (which allows us to interrupt it, since we know it is always busy ;)
+        const REPEAT_CHAR: u8 = b'r';
+
+        let reader = tokio::io::repeat(REPEAT_CHAR);
+
+        let (tx, rx) = channel(true);
+        let writer = BufWriter::new();
+
+        let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
+
+        // Allow time for the thread to be spawned.
+        tokio::time::sleep(Duration::from_secs(1)).await;
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        assert!(!tx.is_closed());
+
+        tx.send(true).expect("failed to request shutdown");
+
+        let spawn_result: std::result::Result<std::result::Result<u64, std::io::Error>, JoinError>;
+
+        let result: std::result::Result<u64, std::io::Error>;
+
+        select! {
+            res = handle => spawn_result = res,
+            _ = &mut timeout => panic!("timed out"),
+        }
+
+        assert!(spawn_result.is_ok());
+
+        result = spawn_result.unwrap();
+
+        assert!(result.is_ok());
+
+        let byte_count = result.unwrap();
+
+        let value = writer.to_string();
+
+        let writer_byte_count = value.len() as u64;
+
+        assert_eq!(byte_count, writer_byte_count);
+
+        // Remove the char used as a payload. If anything else remins,
+        // something went wrong.
+        let mut remainder = value;
+
+        remainder.retain(|c| c != REPEAT_CHAR as char);
+
+        assert_eq!(remainder.len(), 0);
+    }
+}
--- a/src/agent/src/version.rs.in
+++ b/src/agent/src/version.rs.in
@@ -7,6 +7,8 @@
 // WARNING: This file is auto-generated - DO NOT EDIT!
 //

+#![allow(dead_code)]
+
 pub const AGENT_VERSION: &str = "@AGENT_VERSION@";
 pub const API_VERSION: &str = "@API_VERSION@";
 pub const VERSION_COMMIT: &str = "@VERSION_COMMIT@";
--- a/src/runtime/.gitignore
+++ b/src/runtime/.gitignore
@@ -8,9 +8,7 @@ coverage.html
 /cli/config/configuration-acrn.toml
 /cli/config/configuration-clh.toml
 /cli/config/configuration-fc.toml
-/cli/config/configuration-nemu.toml
 /cli/config/configuration-qemu.toml
-/cli/config/configuration-qemu-virtiofs.toml
 /cli/config/configuration-clh.toml
 /cli/config-generated.go
 /cli/containerd-shim-kata-v2/config-generated.go
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -4,14 +4,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #

-distro := $(shell \
-for file in /etc/os-release /usr/lib/os-release; do \
-    if [ -e $$file ]; then \
-        grep ^ID= $$file|cut -d= -f2-|tr -d '"'; \
-        break; \
-    fi \
-done)
-
 SKIP_GO_VERSION_CHECK=
 include golang.mk

@@ -57,7 +49,6 @@ BINLIBEXECLIST :=
 BIN_PREFIX = $(PROJECT_TYPE)
 PROJECT_DIR = $(PROJECT_TAG)
 IMAGENAME = $(PROJECT_TAG).img
-INITRDNAME = $(PROJECT_TAG)-initrd.img

 TARGET = $(BIN_PREFIX)-runtime
 TARGET_OUTPUT = $(CURDIR)/$(TARGET)
@@ -91,7 +82,6 @@ SHAREDIR := $(PREFIX)/share
 DEFAULTSDIR := $(SHAREDIR)/defaults

 COLLECT_SCRIPT = data/kata-collect-data.sh
-COLLECT_SCRIPT_SRC = $(COLLECT_SCRIPT).in

 # @RUNTIME_NAME@ should be replaced with the target in generated files
 RUNTIME_NAME = $(TARGET)
@@ -111,13 +101,11 @@ BASH_COMPLETIONS := data/completions/bash/kata-runtime
 BASH_COMPLETIONSDIR := $(SHAREDIR)/bash-completion/completions

 PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR)
-PKGLIBDIR := $(LOCALSTATEDIR)/lib/$(PROJECT_DIR)
 PKGRUNDIR := $(LOCALSTATEDIR)/run/$(PROJECT_DIR)
 PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)

 KERNELDIR := $(PKGDATADIR)

-INITRDPATH := $(PKGDATADIR)/$(INITRDNAME)
 IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME)
 FIRMWAREPATH :=

@@ -126,7 +114,6 @@ CONFIG_FILE = configuration.toml

 HYPERVISOR_ACRN = acrn
 HYPERVISOR_FC = firecracker
-JAILER_FC = jailer
 HYPERVISOR_QEMU = qemu
 HYPERVISOR_CLH = cloud-hypervisor

@@ -140,7 +127,6 @@ QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
 QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]

 QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD)
-QEMUVALIDVIRTIOFSPATHS := [\"$(QEMUVIRTIOFSPATH)\"]

 CLHPATH := $(CLHBINDIR)/$(CLHCMD)
 CLHVALIDHYPERVISORPATHS := [\"$(CLHPATH)\"]
@@ -155,9 +141,6 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"]
 ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD)
 ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]

-SHIMCMD := $(BIN_PREFIX)-shim
-SHIMPATH := $(PKGLIBEXECDIR)/$(SHIMCMD)
-
 NETMONCMD := $(BIN_PREFIX)-netmon
 NETMONPATH := $(PKGLIBEXECDIR)/$(NETMONCMD)

@@ -183,15 +166,15 @@ DEFAULTEXPFEATURES := []

 #Default entropy source
 DEFENTROPYSOURCE := /dev/urandom
+DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"]

 DEFDISABLEBLOCK := false
-DEFSHAREDFS := virtio-9p
 DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
 DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
 DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
 # Default DAX mapping cache size in MiB
 #if value is 0, DAX is not enabled
-DEFVIRTIOFSCACHESIZE := 0
+DEFVIRTIOFSCACHESIZE ?= 0
 DEFVIRTIOFSCACHE ?= auto
 # Format example:
 #   [\"-o\", \"arg1=xxx,arg2\", \"-o\", \"hello world\", \"--arg3=yyy\"]
@@ -200,23 +183,18 @@ DEFVIRTIOFSCACHE ?= auto
 # Make sure you quote args.
 DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\"]
 DEFENABLEIOTHREADS := false
-DEFENABLEMEMPREALLOC := false
-DEFENABLEHUGEPAGES := false
 DEFENABLEVHOSTUSERSTORE := false
 DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
 DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
 DEFFILEMEMBACKEND := ""
 DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
-DEFENABLESWAP := false
-DEFENABLEDEBUG := false
-DEFDISABLENESTINGCHECKS := false
 DEFMSIZE9P := 8192
-DEFHOTPLUGVFIOONROOTBUS := false
-DEFPCIEROOTPORT := 0

 # Default cgroup model
 DEFSANDBOXCGROUPONLY ?= false

+DEFBINDMOUNTS := []
+
 # Features
 FEATURE_SELINUX ?= check

@@ -283,10 +261,9 @@ ifneq (,$(CLHCMD))

    # CLH-specific options (all should be suffixed by "_CLH")
    # currently, huge pages are required for virtiofsd support
-    DEFENABLEHUGEPAGES_CLH := true
    DEFNETWORKMODEL_CLH := tcfilter
    KERNELTYPE_CLH = uncompressed
-    KERNEL_NAME_CLH = $(call MAKE_KERNEL_VIRTIOFS_NAME,$(KERNELTYPE_CLH))
+    KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH))
    KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH)
 endif

@@ -408,8 +385,6 @@ USER_VARS += FCVALIDJAILERPATHS
 USER_VARS += SYSCONFIG
 USER_VARS += IMAGENAME
 USER_VARS += IMAGEPATH
-USER_VARS += INITRDNAME
-USER_VARS += INITRDPATH
 USER_VARS += MACHINETYPE
 USER_VARS += KERNELDIR
 USER_VARS += KERNELTYPE
@@ -429,7 +404,6 @@ USER_VARS += KERNELPARAMS
 USER_VARS += LIBEXECDIR
 USER_VARS += LOCALSTATEDIR
 USER_VARS += PKGDATADIR
-USER_VARS += PKGLIBDIR
 USER_VARS += PKGLIBEXECDIR
 USER_VARS += PKGRUNDIR
 USER_VARS += PREFIX
@@ -447,10 +421,8 @@ USER_VARS += QEMUPATH
 USER_VARS += QEMUVALIDHYPERVISORPATHS
 USER_VARS += QEMUVIRTIOFSCMD
 USER_VARS += QEMUVIRTIOFSPATH
-USER_VARS += QEMUVALIDVIRTIOFSPATHS
 USER_VARS += RUNTIME_NAME
 USER_VARS += SHAREDIR
-USER_VARS += SHIMPATH
 USER_VARS += SYSCONFDIR
 USER_VARS += DEFVCPUS
 USER_VARS += DEFMAXVCPUS
@@ -468,7 +440,6 @@ USER_VARS += DEFDISABLEBLOCK
 USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
 USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
 USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
-USER_VARS += DEFSHAREDFS
 USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
 USER_VARS += DEFVIRTIOFSDAEMON
 USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS
@@ -477,21 +448,16 @@ USER_VARS += DEFVIRTIOFSCACHE
 USER_VARS += DEFVIRTIOFSEXTRAARGS
 USER_VARS += DEFENABLEANNOTATIONS
 USER_VARS += DEFENABLEIOTHREADS
-USER_VARS += DEFENABLEMEMPREALLOC
-USER_VARS += DEFENABLEHUGEPAGES
 USER_VARS += DEFENABLEVHOSTUSERSTORE
 USER_VARS += DEFVHOSTUSERSTOREPATH
 USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS
 USER_VARS += DEFFILEMEMBACKEND
 USER_VARS += DEFVALIDFILEMEMBACKENDS
-USER_VARS += DEFENABLESWAP
-USER_VARS += DEFENABLEDEBUG
-USER_VARS += DEFDISABLENESTINGCHECKS
 USER_VARS += DEFMSIZE9P
-USER_VARS += DEFHOTPLUGVFIOONROOTBUS
-USER_VARS += DEFPCIEROOTPORT
 USER_VARS += DEFENTROPYSOURCE
+USER_VARS += DEFVALIDENTROPYSOURCES
 USER_VARS += DEFSANDBOXCGROUPONLY
+USER_VARS += DEFBINDMOUNTS
 USER_VARS += FEATURE_SELINUX
 USER_VARS += BUILDFLAGS

@@ -605,8 +571,8 @@ $(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST)
 	$(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)

 $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit
-	$(QUIET_BUILD)(cd $(MONITOR_DIR)/ && go build \
-		--ldflags "-X main.GitCommit=$(shell cat .git-commit)" -o $@ .)
+	$(QUIET_BUILD)(cd $(MONITOR_DIR)/ && CGO_ENABLED=0 go build \
+		--ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -buildmode=exe -o $@ .)

 .PHONY: \
 	check \
@@ -633,13 +599,19 @@ generate-config: $(CONFIGS)

 check: check-go-static

-test: go-test
+test: install-hook go-test
+
+install-hook:
+	make -C virtcontainers hook
+ifeq ($(shell id -u), 0)
+	echo "installing mock hook"
+	make -C virtcontainers install
+endif

 go-test: $(GENERATED_FILES)
 	go test -v -mod=vendor ./...

 check-go-static:
-	$(QUIET_CHECK)../../ci/static-checks.sh
 	$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cli
 	$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./virtcontainers

@@ -696,7 +668,8 @@ show-usage: show-header
 	@printf "• Additional targets:\n"
 	@printf "\n"
 	@printf "\tbuild                      : standard build (build everything).\n"
-	@printf "\tcheck                      : run tests.\n"
+	@printf "\ttest                       : run tests.\n"
+	@printf "\tcheck                      : run code checks.\n"
 	@printf "\tclean                      : remove built files.\n"
 	@printf "\tcontainerd-shim-v2         : only build containerd shim v2.\n"
 	@printf "\tcoverage                   : run coverage tests.\n"
--- a/src/runtime/README.md
+++ b/src/runtime/README.md
@@ -129,14 +129,14 @@ The below command lists the full paths to the configuration files that the
 runtime attempts to load. The first path that exists will be used:

 ```bash
-$ kata-runtime --kata-show-default-config-paths
+$ kata-runtime --show-default-config-paths
 ```

 Aside from the built-in locations, it is possible to specify the path to a
-custom configuration file using the `--kata-config` option:
+custom configuration file using the `--config` option:

 ```bash
-$ kata-runtime --kata-config=/some/where/configuration.toml ...
+$ kata-runtime --config=/some/where/configuration.toml ...
 ```

 The runtime will log the full path to the configuration file it is using. See
--- a/src/runtime/VERSION
+++ b/src/runtime/VERSION
@@ -1 +0,0 @@
-2.0.0
--- a/src/runtime/VERSION
+++ b/src/runtime/VERSION
@@ -0,0 +1 @@
+../../VERSION
--- a/Show More
+++ b/Show More