Merge pull request #1569 from bergwolf/2.1-alpha1-branch-bump

# Kata Containers 2.1-alpha1
release: Kata Containers 2.1-alpha1
2026-03-17 10:12:24 +00:00 · 2021-03-30 06:05:54 -07:00 · 2021-03-30 07:36:36 +00:00 · 2021-03-30 14:01:56 +08:00 · 2021-03-29 15:56:03 -07:00 · 2021-03-29 14:54:09 -07:00
780 changed files with 62897 additions and 34773 deletions
--- a/.github/workflows/kata-deploy-test.yaml
+++ b/.github/workflows/kata-deploy-test.yaml
@@ -2,6 +2,8 @@ on: issue_comment
 name: test-kata-deploy
 jobs:
  check_comments:
+    if: ${{ github.event.issue.pull_request }}
+    types: [created, edited]
    runs-on: ubuntu-latest
    steps:
      - name: Check for Command
@@ -34,7 +36,7 @@ jobs:
        id: build-container-image
        run: |
            PR_SHA=$(git log --format=format:%H -n1)
-            VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/VERSION)
+            VERSION=$(curl https://raw.githubusercontent.com/kata-containers/kata-containers/main/VERSION)
            ARTIFACT_URL="https://github.com/kata-containers/kata-containers/releases/download/${VERSION}/kata-static-${VERSION}-x86_64.tar.xz"
            wget "${ARTIFACT_URL}" -O ./kata-deploy/kata-static.tar.xz
            docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:${PR_SHA} ./kata-deploy
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -103,59 +103,6 @@ jobs:
          name: kata-artifacts
          path: kata-static-qemu.tar.gz

-  build-nemu:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_nemu"
-    steps:
-      - uses: actions/checkout@v1
-      - name: get-artifact-list
-        uses: actions/download-artifact@master
-        with:
-          name: artifact-list
-      - name: build-nemu
-        run: |
-         if grep -q $buildstr ./artifact-list/artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@master
-        with:
-          name: kata-artifacts
-          path: kata-static-nemu.tar.gz
-
-  # Job for building the QEMU binaries with virtiofs support
-  build-qemu-virtiofsd:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_qemu_virtiofsd"
-    steps:
-      - uses: actions/checkout@v1
-      - name: get-artifact-list
-        uses: actions/download-artifact@master
-        with:
-          name: artifact-list
-      - name: build-qemu-virtiofsd
-        run: |
-         if grep -q $buildstr ./artifact-list/artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@master
-        with:
-          name: kata-artifacts
-          path: kata-static-qemu-virtiofsd.tar.gz
-
  # Job for building the image
  build-image:
    runs-on: ubuntu-16.04
@@ -266,7 +213,7 @@ jobs:

  gather-artifacts:
    runs-on: ubuntu-16.04
-    needs: [build-experimental-kernel, build-kernel, build-qemu, build-qemu-virtiofsd, build-image, build-firecracker, build-kata-components, build-nemu, build-clh]
+    needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
    steps:
      - uses: actions/checkout@v1
      - name: get-artifacts
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -104,32 +104,6 @@ jobs:
          name: kata-artifacts
          path: kata-static-qemu.tar.gz

-  build-qemu-virtiofsd:
-    runs-on: ubuntu-16.04
-    needs: get-artifact-list
-    env:
-      buildstr: "install_qemu_virtiofsd"
-    steps:
-      - uses: actions/checkout@v2
-      - name: get-artifact-list
-        uses: actions/download-artifact@v2
-        with:
-          name: artifact-list
-      - name: build-qemu-virtiofsd
-        run: |
-         if grep -q $buildstr artifact-list.txt; then
-           $GITHUB_WORKSPACE/.github/workflows/generate-local-artifact-tarball.sh $buildstr
-           echo "artifact-built=true" >> $GITHUB_ENV
-         else
-           echo "artifact-built=false" >> $GITHUB_ENV
-         fi
-      - name: store-artifacts
-        if: ${{ env.artifact-built }} == 'true'
-        uses: actions/upload-artifact@v2
-        with:
-          name: kata-artifacts
-          path: kata-static-qemu-virtiofsd.tar.gz
-
  build-image:
    runs-on: ubuntu-16.04
    needs: get-artifact-list
@@ -237,7 +211,7 @@ jobs:

  gather-artifacts:
    runs-on: ubuntu-16.04
-    needs: [build-experimental-kernel, build-kernel, build-qemu, build-qemu-virtiofsd, build-image, build-firecracker, build-kata-components, build-clh]
+    needs: [build-experimental-kernel, build-kernel, build-qemu, build-image, build-firecracker, build-kata-components, build-clh]
    steps:
      - uses: actions/checkout@v2
      - name: get-artifacts
--- a/.github/workflows/snap-release.yaml
+++ b/.github/workflows/snap-release.yaml
@@ -21,8 +21,8 @@ jobs:
          kata_url="https://github.com/kata-containers/kata-containers"
          latest_version=$(git ls-remote --tags ${kata_url}  | egrep -o "refs.*" | egrep -v "\-alpha|\-rc|{}" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r | head -1)
          current_version="$(echo ${GITHUB_REF} | cut -d/ -f3)"
-          # Check if the current tag is the latest tag
-          if echo -e "$latest_version\n$current_version" | sort -C -V; then
+          # Check semantic versioning format (x.y.z) and if the current tag is the latest tag
+          if echo "${current_version}" | grep -q "^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+$" && echo -e "$latest_version\n$current_version" | sort -C -V; then
            # Current version is the latest version, build it
            snapcraft -d snap --destructive-mode
          fi
--- a/.github/workflows/snap.yaml
+++ b/.github/workflows/snap.yaml
@@ -1,15 +1,5 @@
 name: snap CI
-on:
-  pull_request:
-    paths:
-      - "**/Makefile"
-      - "**/*.go"
-      - "**/*.mk"
-      - "**/*.rs"
-      - "**/*.sh"
-      - "**/*.toml"
-      - "**/*.yaml"
-      - "**/*.yml"
+on: ["pull_request"]
 jobs:
  test:
    runs-on: ubuntu-20.04
--- a/.github/workflows/static-checks.yaml
+++ b/.github/workflows/static-checks.yaml
@@ -5,16 +5,14 @@ jobs:
    strategy:
      matrix:
        go-version: [1.13.x, 1.14.x, 1.15.x]
-        os: [ubuntu-18.04]
+        os: [ubuntu-20.04]
    runs-on: ${{ matrix.os }}
    env:
-      GO111MODULE: off
      TRAVIS: "true"
      TRAVIS_BRANCH: ${{ github.base_ref }}
      TRAVIS_PULL_REQUEST_BRANCH: ${{ github.head_ref }}
      TRAVIS_PULL_REQUEST_SHA : ${{ github.event.pull_request.head.sha }}
      RUST_BACKTRACE: "1"
-      RUST_AGENT: "yes"
      target_branch: ${TRAVIS_BRANCH}
    steps:
    - name: Install Go
@@ -25,9 +23,6 @@ jobs:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Setup GOPATH
      run: |
-        gopath_org=$(go env GOPATH)/src/github.com/kata-containers/
-        mkdir -p ${gopath_org}
-        ln -s ${PWD} ${gopath_org}
        echo "TRAVIS_BRANCH: ${TRAVIS_BRANCH}"
        echo "TRAVIS_PULL_REQUEST_BRANCH: ${TRAVIS_PULL_REQUEST_BRANCH}"
        echo "TRAVIS_PULL_REQUEST_SHA: ${TRAVIS_PULL_REQUEST_SHA}"
@@ -43,26 +38,29 @@ jobs:
        path: ./src/github.com/${{ github.repository }}
    - name: Setup travis references
      run: |
-        echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}" 
+        echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}"
        target_branch=${TRAVIS_BRANCH}
    - name: Setup
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/setup.sh
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
      env:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Building rust
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/install_rust.sh
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh
        PATH=$PATH:"$HOME/.cargo/bin"
-    - name: Make clippy
+        rustup target add x86_64-unknown-linux-musl
+        rustup component add rustfmt clippy
+    # Must build before static checks as we depend on some generated code in runtime and agent
+    - name: Build
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && rustup target add x86_64-unknown-linux-musl && rustup component add rustfmt && rustup component add clippy && make clippy
-    - name: Static checks
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make
+    - name: Static Checks
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers && ./ci/static-checks.sh
-    - name: Build agent
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/static-checks.sh
+    - name: Run Compiler Checks
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && make
-    - name: Run agent unit tests
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make check
+    - name: Run Unit Tests
      run: |
-        cd ${GOPATH}/src/github.com/kata-containers/kata-containers/src/agent && make check
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make test
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 **/*.bk
+**/*~
 **/*.orig
 **/*.rej
 **/target
--- a/README.md
+++ b/README.md
@@ -126,9 +126,9 @@ The following repositories are used by both the current and first generation Kat

 | Component | Description | Current | First generation | Notes |
 |-|-|-|-|-|
-| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/2.0-dev) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
+| CI | Continuous Integration configuration files and scripts. | [Kata 2.x](https://github.com/kata-containers/ci/tree/main) | [Kata 1.x](https://github.com/kata-containers/ci/tree/master) | |
 | kernel | The Linux kernel used by the hypervisor to boot the guest image. | [Kata 2.x][kernel] | [Kata 1.x][kernel] | Patches are stored in the packaging component. |
-| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/2.0-dev) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
+| tests | Test code. | [Kata 2.x](https://github.com/kata-containers/tests/tree/main) | [Kata 1.x](https://github.com/kata-containers/tests/tree/master) | Excludes unit tests which live with the main code. |
 | www.katacontainers.io | Contains the source for the [main web site](https://www.katacontainers.io). | [Kata 2.x][github-katacontainers.io] | [Kata 1.x][github-katacontainers.io] | | |

 ### Packaging and releases
--- a/2
+++ b/2
@@ -1 +1 @@
-2.1-alpha0
+2.1-alpha1
--- a/ci/install_musl.sh
+++ b/ci/install_musl.sh
@@ -12,10 +12,11 @@ install_aarch64_musl() {
 		local musl_tar="${arch}-linux-musl-native.tgz"
 		local musl_dir="${arch}-linux-musl-native"
 		pushd /tmp
-		curl -sLO https://musl.cc/${musl_tar}
-		tar -zxf ${musl_tar}
-		mkdir -p /usr/local/musl/
-		cp -r ${musl_dir}/* /usr/local/musl/
+		if curl -sLO --fail https://musl.cc/${musl_tar}; then
+			tar -zxf ${musl_tar}
+			mkdir -p /usr/local/musl/
+			cp -r ${musl_dir}/* /usr/local/musl/
+		fi
 		popd
 	fi
 }
--- a/ci/install_yq.sh
+++ b/ci/install_yq.sh
@@ -56,7 +56,7 @@ function install_yq() {
 		die "Please install curl"
 	fi

-	local yq_version=3.1.0
+	local yq_version=3.4.1

 	## NOTE: ${var,,} => gives lowercase value of var
 	local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -5,7 +5,7 @@

 export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
 export tests_repo_dir="$GOPATH/src/$tests_repo"
-export branch="${branch:-2.0-dev}"
+export branch="${branch:-main}"

 clone_tests_repo()
 {
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -37,7 +37,6 @@
    * [Set up a debug console](#set-up-a-debug-console)
      * [Simple debug console setup](#simple-debug-console-setup)
          * [Enable agent debug console](#enable-agent-debug-console)
-          * [Start `kata-monitor`](#start-kata-monitor)
          * [Connect to debug console](#connect-to-debug-console)
      * [Traditional debug console setup](#traditional-debug-console-setup)
          * [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
@@ -404,7 +403,7 @@ To build a version of QEMU using the same options as the default `qemu-lite` ver
 ```
 $ go get -d github.com/kata-containers/kata-containers/tools/packaging
 $ cd $your_qemu_directory
-$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
+$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
 $ eval ./configure "$(cat kata.cfg)"
 $ make -j $(nproc)
 $ sudo -E make install
@@ -477,17 +476,6 @@ debug_console_enabled = true

 This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.

-#### Start `kata-monitor`
-
-The `kata-runtime exec` command needs `kata-monitor` to get the sandbox's `vsock` address to connect to, first start `kata-monitor`.
-
-```
-$ sudo kata-monitor
-```
-
-`kata-monitor` will serve at `localhost:8090` by default.
-
-
 #### Connect to debug console

 Command `kata-runtime exec` is used to connect to the debug console.
@@ -502,6 +490,10 @@ bash-4.2# exit
 exit
 ```

+`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/master/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured
+ with Kubernetes. For CRI-O, the namespace should set to `default` explicitly. This should not be confused with [Kubernetes namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
+For other CRI-runtimes and configurations, you may need to set the namespace utilizing the `runtime-namespace` option.
+
 If you want to access guest OS through a traditional way, see [Traditional debug console setup)](#traditional-debug-console-setup).

 ### Traditional debug console setup
--- a/docs/Documentation-Requirements.md
+++ b/docs/Documentation-Requirements.md
@@ -25,7 +25,7 @@ All documents must:
 - Have a `.md` file extension.
 - Include a TOC (table of contents) at the top of the document with links to
  all heading sections. We recommend using the
-  [`check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
+  [`kata-check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
  tool to generate the TOC.
 - Be linked to from another document in the same repository.

--- a/docs/Licensing-strategy.md
+++ b/docs/Licensing-strategy.md
@@ -22,4 +22,4 @@ licensing and allows automated tooling to check the license of individual
 files.

 This SPDX licence identifier requirement is enforced by the
-[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/master/.ci/static-checks.sh).
+[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/main/.ci/static-checks.sh).
--- a/docs/README.md
+++ b/docs/README.md
@@ -49,6 +49,7 @@ Documents that help to understand and contribute to Kata Containers.
 ### Design and Implementations

 * [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
+* [Kata Containers E2E Flow](design/end-to-end-flow.md): The entire end-to-end flow of Kata Containers
 * [Kata Containers design](./design/README.md): More Kata Containers design documents

 ### How to Contribute
--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -79,9 +79,9 @@
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  # Note: OLD_VERSION is where the script should start to get changes.
-  $ ./runtime-release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
+  $ ./release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
  # Edit the `notes.md` file to review and make any changes to the release notes.
-  # Add the release notes in GitHub runtime.
+  # Add the release notes in the project's GitHub.
  $ hub release edit -F notes.md "${NEW_VERSION}"
  ```

--- a/docs/design/arch-images/katacontainers-e2e-with-bg.jpg
+++ b/docs/design/arch-images/katacontainers-e2e-with-bg.jpg
--- a/docs/design/arch-images/katacontainers-e2e.svg
+++ b/docs/design/arch-images/katacontainers-e2e.svg
--- a/docs/design/architecture.md
+++ b/docs/design/architecture.md
@@ -137,7 +137,7 @@ The runtime uses a TOML format configuration file called `configuration.toml`. B

 The actual configuration file paths can be determined by running:
 ```
-$ kata-runtime --kata-show-default-config-paths
+$ kata-runtime --show-default-config-paths
 ```
 Most users will not need to modify the configuration file.

--- a/docs/design/end-to-end-flow.md
+++ b/docs/design/end-to-end-flow.md
@@ -0,0 +1,4 @@
+# Kata Containers E2E Flow
+
+
+![Kata containers e2e flow](arch-images/katacontainers-e2e-with-bg.jpg)
--- a/docs/how-to/how-to-import-kata-logs-with-fluentd.md
+++ b/docs/how-to/how-to-import-kata-logs-with-fluentd.md
@@ -185,7 +185,7 @@ in Kibana:
 ![Kata tags in EFK](./images/efk_syslog_entry_detail.png).

 We can however further sub-parse the Kata entries using the
-[Fluentd plugins](https://docs.fluentbit.io/manual/parser/logfmt) that will parse
+[Fluentd plugins](https://docs.fluentbit.io/manual/pipeline/parsers/logfmt) that will parse
 `logfmt` formatted data. We can utilise these to parse the sub-fields using a Fluentd filter
 section. At the same time, we will prefix the new fields with `kata_` to make it clear where
 they have come from:
@@ -222,7 +222,7 @@ test to check the parsing works. The resulting output from Fluentd is:
  "_COMM":"kata-runtime",
  "_EXE":"/opt/kata/bin/kata-runtime",
  "SYSLOG_TIMESTAMP":"Feb 21 10:31:27 ",
-  "_CMDLINE":"/opt/kata/bin/kata-runtime --kata-config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
+  "_CMDLINE":"/opt/kata/bin/kata-runtime --config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
  "SYSLOG_PID":"14314",
  "_PID":"14314",
  "MESSAGE":"time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox",
@@ -281,7 +281,7 @@ own file (rather than into the system journal).

 ```bash
 #!/bin/bash
-/opt/kata/bin/kata-runtime --kata-config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
+/opt/kata/bin/kata-runtime --config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
 ```

 And then we'll add the Fluentd config section to parse that file. Note, we inform the parser that Kata is
--- a/docs/how-to/how-to-set-prometheus-in-k8s.md
+++ b/docs/how-to/how-to-set-prometheus-in-k8s.md
@@ -34,7 +34,7 @@ Also you should ensure that `kubectl` working correctly.
 Start Prometheus by utilizing our sample manifest:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/prometheus.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/prometheus.yml
 ```

 This will create a new namespace, `prometheus`, and create the following resources:
@@ -60,7 +60,7 @@ go_gc_duration_seconds{quantile="0.75"} 0.000229911
 `kata-monitor` can be started on the cluster as follows:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/kata-monitor-daemonset.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/kata-monitor-daemonset.yml
 ```

 This will create a new namespace `kata-system` and a `daemonset` in it.
@@ -73,7 +73,7 @@ Once the `daemonset` is running, Prometheus should discover `kata-monitor` as a
 Run this command to run Grafana in Kubernetes:

 ```
-$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/grafana.yml
+$ kubectl apply -f https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/grafana.yml
 ```

 This will create deployment and service for Grafana under namespace `prometheus`.
@@ -99,7 +99,7 @@ You can import this dashboard using Grafana UI, or using `curl` command in conso
 $ curl -XPOST -i localhost:3000/api/dashboards/import \
    -u admin:admin \
    -H "Content-Type: application/json" \
-	-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/2.0-dev/docs/how-to/data/dashboard.json )}"
+	-d "{\"dashboard\":$(curl -sL https://raw.githubusercontent.com/kata-containers/kata-containers/main/docs/how-to/data/dashboard.json )}"
 ```

 ## References
--- a/docs/how-to/how-to-use-kata-containers-with-acrn.md
+++ b/docs/how-to/how-to-use-kata-containers-with-acrn.md
@@ -91,7 +91,7 @@ To configure Kata Containers with ACRN, copy the generated `configuration-acrn.t
 The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)

 ```bash
-$ sudo kata-runtime --kata-show-default-config-paths
+$ sudo kata-runtime --show-default-config-paths
 ```

 >**Warning:** Please offline CPUs using [this](offline_cpu.sh) script, else VM launches will fail.
--- a/docs/how-to/how-to-use-virtio-fs-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-fs-with-kata.md
@@ -1,61 +1,12 @@
 # Kata Containers with virtio-fs

- [Introduction](#introduction)
- [Pre-requisites](#pre-requisites)
- [Install Kata Containers with virtio-fs support](#install-kata-containers-with-virtio-fs-support)
- [Run a Kata Container utilizing virtio-fs](#run-a-kata-container-utilizing-virtio-fs)
+- [Kata Containers with virtio-fs](#kata-containers-with-virtio-fs)
+  - [Introduction](#introduction)

 ## Introduction

 Container deployments utilize explicit or implicit file sharing between host filesystem and containers. From a trust perspective, avoiding a shared file-system between the trusted host and untrusted container is recommended. This is not always feasible. In Kata Containers, block-based volumes are preferred as they allow usage of either device pass through or `virtio-blk` for access within the virtual machine.

-As of the 1.7 release of Kata Containers, [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt) is the default filesystem sharing mechanism. While this does allow for workload compatibility, it does so with degraded performance and potential for POSIX compliance limitations.
+As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.

-To help address these limitations, [virtio-fs](https://virtio-fs.gitlab.io/) has been developed. virtio-fs is a shared file system that lets virtual machines access a directory tree on the host. In Kata Containers, virtio-fs can be used to share container volumes, secrets, config-maps, configuration files (hostname, hosts, `resolv.conf`) and the container rootfs on the host with the guest.  virtio-fs provides significant performance and POSIX compliance improvements compared to 9pfs.
-
-Enabling of virtio-fs requires changes in the guest kernel as well as the VMM. For Kata Containers, experimental virtio-fs support is enabled through `qemu` and `cloud-hypervisor` VMMs.
-
-**Note: virtio-fs support is experimental in the 1.7 release of Kata Containers. Work is underway to improve stability, performance and upstream integration. This is available for early preview - use at your own risk**
-
-This document describes how to get Kata Containers to work with virtio-fs.
-
-## Pre-requisites
-
-Before Kata 1.8 this feature required the host to have hugepages support enabled. Enable this with the `sysctl vm.nr_hugepages=1024` command on the host.In later versions of Kata, virtio-fs leverages `/dev/shm` as the shared memory backend. The default size of `/dev/shm` on a system is typically half of the total system memory. This can pose a physical limit to the maximum number of pods that can be launched with virtio-fs. This can be overcome by increasing the size of `/dev/shm` as shown below:
-
-```bash
-$ mount -o remount,size=${desired_shm_size} /dev/shm
-```
- 
-## Install Kata Containers with virtio-fs support
-
-The Kata Containers `qemu` configuration with virtio-fs and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/runtime/releases) artifacts starting with the 1.9 release. Installation is available through [distribution packages](https://github.com/kata-containers/documentation/blob/master/install/README.md#supported-distributions) as well through [`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
-
-**Note: Support for virtio-fs was first introduced in `NEMU` hypervisor in Kata 1.8 release. This hypervisor has been deprecated.**
-
-Install the latest release of Kata with `kata-deploy` as follows:
-```
-docker run --runtime=runc -v /opt/kata:/opt/kata -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd -v /etc/docker:/etc/docker -it katadocker/kata-deploy kata-deploy-docker install
-```
-
-This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-qemu-virtiofs`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
-
-## Run a Kata Container utilizing virtio-fs
-
-Once installed, start a new container, utilizing `qemu` + `virtiofs`:
-```bash
-$ docker run --runtime=kata-qemu-virtiofs -it busybox
-```
-
-Verify the new container is running with the `qemu` hypervisor as well as using `virtiofsd`. To do this look for the hypervisor path and the `virtiofs` daemon process on the host:
-```bash
-$ ps -aux | grep virtiofs
-root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
-...  -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
-root ... /home/foo/build-x86_64_virt/virtiofsd-x86_64 ...
-```
-
-You can also try out virtio-fs using `cloud-hypervisor` VMM:
-```bash
-$ docker run --runtime=kata-clh -it busybox
-```
+virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
--- a/docs/install/snap-installation-guide.md
+++ b/docs/install/snap-installation-guide.md
@@ -110,8 +110,8 @@ can be used as runtime.

 Read the following documents to know how to run Kata Containers 2.x with `containerd`.

-* [How to use Kata Containers and Containerd](https://github.com/kata-containers/kata-containers/blob/2.0-dev/docs/how-to/containerd-kata.md)
-* [Install Kata Containers with containerd](https://github.com/kata-containers/kata-containers/blob/2.0-dev/docs/install/container-manager/containerd/containerd-install.md)
+* [How to use Kata Containers and Containerd](https://github.com/kata-containers/kata-containers/blob/main/docs/how-to/containerd-kata.md)
+* [Install Kata Containers with containerd](https://github.com/kata-containers/kata-containers/blob/main/docs/install/container-manager/containerd/containerd-install.md)


 ## Remove Kata Containers snap package
--- a/docs/use-cases/using-Intel-QAT-and-kata.md
+++ b/docs/use-cases/using-Intel-QAT-and-kata.md
@@ -1,56 +1,62 @@
 # Table of Contents

-* [Table of Contents](#table-of-contents)
-* [Introduction](#introduction)
-    * [Helpful Links before starting](#helpful-links-before-starting)
-    * [Steps to enable Intel QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
-    * [Script variables](#script-variables)
-        * [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
-    * [Prepare the Clear Linux Host](#prepare-the-clear-linux-host)
-        * [Identify which PCI Bus the Intel QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
-        * [Install necessary bundles for Clear Linux](#install-necessary-bundles-for-clear-linux)
-        * [Download Intel QAT drivers](#download-intel-qat-drivers)
-        * [Copy Intel QAT configuration files and enable Virtual Functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
-        * [Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
-        * [Check Intel QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
-    * [Prepare Kata Containers](#prepare-kata-containers)
-        * [Download Kata kernel Source](#download-kata-kernel-source)
-        * [Build Kata kernel](#build-kata-kernel)
-        * [Copy Kata kernel](#copy-kata-kernel)
-        * [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
-        * [Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
-        * [Copy Kata rootfs](#copy-kata-rootfs)
-        * [Update Kata configuration to point to custom kernel and rootfs](#update-kata-configuration-to-point-to-custom-kernel-and-rootfs)
-    * [Verify Intel QAT works in a Docker Kata Containers container](#verify-intel-qat-works-in-a-docker-kata-containers-container)
-    * [Build OpenSSL Intel QAT engine container](#build-openssl-intel-qat-engine-container)
-        * [Test Intel QAT in Docker](#test-intel-qat-in-docker)
-        * [Troubleshooting](#troubleshooting)
-    * [Optional Scripts](#optional-scripts)
-        * [Verify Intel QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)
+- [Table of Contents](#table-of-contents)
+- [Introduction](#introduction)
+  - [Helpful Links before starting](#helpful-links-before-starting)
+  - [Steps to enable Intel® QAT in Kata Containers](#steps-to-enable-intel-qat-in-kata-containers)
+  - [Script variables](#script-variables)
+    - [Set environment variables (Every Reboot)](#set-environment-variables-every-reboot)
+  - [Prepare the Ubuntu Host](#prepare-the-ubuntu-host)
+    - [Identify which PCI Bus the Intel® QAT card is on](#identify-which-pci-bus-the-intel-qat-card-is-on)
+    - [Install necessary packages for Ubuntu](#install-necessary-packages-for-ubuntu)
+    - [Download Intel® QAT drivers](#download-intel-qat-drivers)
+    - [Copy Intel® QAT configuration files and enable virtual functions](#copy-intel-qat-configuration-files-and-enable-virtual-functions)
+    - [Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot)
+    - [Check Intel® QAT virtual functions are enabled](#check-intel-qat-virtual-functions-are-enabled)
+  - [Prepare Kata Containers](#prepare-kata-containers)
+    - [Download Kata kernel Source](#download-kata-kernel-source)
+    - [Build Kata kernel](#build-kata-kernel)
+    - [Copy Kata kernel](#copy-kata-kernel)
+    - [Prepare Kata root filesystem](#prepare-kata-root-filesystem)
+    - [Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs](#compile-intel-qat-drivers-for-kata-containers-kernel-and-add-to-kata-containers-rootfs)
+    - [Copy Kata rootfs](#copy-kata-rootfs)
+  - [Verify Intel® QAT works in a container](#verify-intel-qat-works-in-a-container)
+    - [Build OpenSSL Intel® QAT engine container](#build-openssl-intel-qat-engine-container)
+    - [Test Intel® QAT with the ctr tool](#test-intel-qat-with-the-ctr-tool)
+    - [Test Intel® QAT in Kubernetes](#test-intel-qat-in-kubernetes)
+    - [Troubleshooting](#troubleshooting)
+  - [Optional Scripts](#optional-scripts)
+    - [Verify Intel® QAT card counters are incremented](#verify-intel-qat-card-counters-are-incremented)

 # Introduction

-Intel QuickAssist Technology (Intel QAT) provides hardware acceleration 
+Intel® QuickAssist Technology (QAT) provides hardware acceleration 
 for security (cryptography) and compression. These instructions cover the 
-steps for [Clear Linux](https://clearlinux.org) but can be adapted to any 
-Linux distribution. Your distribution may already have the Intel QAT 
-drivers, but it is likely they do not contain the necessary user space 
-components. These instructions guide the user on how to download the kernel 
-sources, compile kernel driver modules against those sources, and load them 
-onto the host as well as preparing a specially built Kata Containers kernel 
-and custom Kata Containers rootfs.  
+steps for the latest [Ubuntu LTS release](https://ubuntu.com/download/desktop) 
+which already include the QAT host driver. These instructions can be adapted to 
+any Linux distribution. These instructions guide the user on how to download 
+the kernel sources, compile kernel driver modules against those sources, and 
+load them onto the host as well as preparing a specially built Kata Containers 
+kernel and custom Kata Containers rootfs.
+
+* Download kernel sources
+* Compile Kata kernel
+* Compile kernel driver modules against those sources
+* Download rootfs
+* Add driver modules to rootfs
+* Build rootfs image 

 ## Helpful Links before starting

-[Intel QAT Engine](https://github.com/intel/QAT_Engine)
+[Intel® QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)

-[Intel QuickAssist Technology at `01.org`](https://01.org/intel-quickassist-technology)
+[Intel® QuickAssist Technology Engine for OpenSSL](https://github.com/intel/QAT_Engine)

 [Intel Device Plugin for Kubernetes](https://github.com/intel/intel-device-plugins-for-kubernetes)

-[Intel QuickAssist Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)
+[Intel® QuickAssist Technology for Crypto Poll Mode Driver](https://dpdk-docs.readthedocs.io/en/latest/cryptodevs/qat.html)

-## Steps to enable Intel QAT in Kata Containers
+## Steps to enable Intel® QAT in Kata Containers

 There are some steps to complete only once, some steps to complete with every
 reboot, and some steps to complete when the host kernel changes.
@@ -67,91 +73,95 @@ needed to point to updated drivers or different install locations.
 Make sure to check [`01.org`](https://01.org/intel-quickassist-technology) for 
 the latest driver.

-```sh
-$ export QAT_DRIVER_VER=qat1.7.l.4.8.0-00005.tar.gz 
-$ export QAT_DRIVER_URL=https://01.org/sites/default/files/downloads/${QAT_DRIVER_VER}
+```bash
+$ export QAT_DRIVER_VER=qat1.7.l.4.12.0-00011.tar.gz
+$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
 $ export QAT_CONF_LOCATION=~/QAT_conf
 $ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile
 $ export QAT_SRC=~/src/QAT
 $ export GOPATH=~/src/go
-$ export OSBUILDER=~/src/osbuilder
 $ export KATA_KERNEL_LOCATION=~/kata
 $ export KATA_ROOTFS_LOCATION=~/kata
 ```

-## Prepare the Clear Linux Host
+## Prepare the Ubuntu Host

 The host could be a bare metal instance or a virtual machine. If using a 
 virtual machine, make sure that KVM nesting is enabled. The following 
-instructions reference an Intel QAT. Some of the instructions must be 
-modified if using a different Intel QAT device. You can identify the Intel QAT
-chipset by executing the following.
+instructions reference an Intel® C62X chipset. Some of the instructions must be 
+modified if using a different Intel® QAT device. The Intel® QAT chipset can be
+identified by executing the following.

-### Identify which PCI Bus the Intel QAT card is on
+### Identify which PCI Bus the Intel® QAT card is on

-```sh
+```bash
 $ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
 ```

-### Install necessary bundles for Clear Linux
+### Install necessary packages for Ubuntu

-Clear Linux version 30780 (Released August 13, 2019) includes a 
-`linux-firmware-qat` bundle that has the necessary QAT firmware along with a
-functional QAT host driver that works with Kata Containers. 
+These packages are necessary to compile the Kata kernel, Intel® QAT driver, and to
+prepare the rootfs for Kata. [Docker](https://docs.docker.com/engine/install/ubuntu/)
+also needs to be installed to be able to build the rootfs. To test that 
+everything works a Kubernetes pod is started requesting Intel® QAT resources. For the
+pass through of the virtual functions the kernel boot parameter needs to have
+`INTEL_IOMMU=on`.

-```sh
-$ sudo swupd bundle-add network-basic linux-firmware-qat make c-basic go-basic containers-virt dev-utils devpkg-elfutils devpkg-systemd devpkg-ssl
-$ sudo clr-boot-manager update
-$ sudo systemctl enable --now docker
+```bash
+$ sudo apt update
+$ sudo apt install -y golang-go build-essential python pkg-config zlib1g-dev libudev-dev bison libelf-dev flex libtool automake autotools-dev autoconf bc libpixman-1-dev coreutils libssl-dev
+$ sudo sed -i 's/GRUB_CMDLINE_LINUX_DEFAULT=""/GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"/' /etc/default/grub
+$ sudo update-grub
 $ sudo reboot
 ```

-### Download Intel QAT drivers
+### Download Intel® QAT drivers

-This will download the Intel QAT drivers from [`01.org`](https://01.org/intel-quickassist-technology). 
+This will download the [Intel® QAT drivers](https://01.org/intel-quickassist-technology). 
 Make sure to check the website for the latest version.

-```sh
+```bash
 $ mkdir -p $QAT_SRC
 $ cd $QAT_SRC
 $ curl -L $QAT_DRIVER_URL | tar zx
 ```

-### Copy Intel QAT configuration files and enable Virtual Functions
+### Copy Intel® QAT configuration files and enable virtual functions

-Modify the instructions below as necessary if using a different QAT hardware 
+Modify the instructions below as necessary if using a different Intel® QAT hardware 
 platform. You can learn more about customizing configuration files at the 
-[Intel QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
+[Intel® QAT Engine repository](https://github.com/intel/QAT_Engine/#copy-the-correct-intel-quickassist-technology-driver-config-files)
 This section starts from a base config file and changes the `SSL` section to 
 `SHIM` to support the OpenSSL engine. There are more tweaks that you can make
-depending on the use case and how many Intel QAT engines should be run. You
+depending on the use case and how many Intel® QAT engines should be run. You
 can find more information about how to customize in the 
 [Intel® QuickAssist Technology Software for Linux* - Programmer's Guide.](https://01.org/sites/default/files/downloads/336210qatswprogrammersguiderev006.pdf) 

-> **Note: This section assumes that a QAT `c6xx` platform is used.**
+> **Note: This section assumes that a Intel® QAT `c6xx` platform is used.**

-```sh
+```bash
 $ mkdir -p $QAT_CONF_LOCATION
 $ cp $QAT_SRC/quickassist/utilities/adf_ctl/conf_files/c6xxvf_dev0.conf.vm $QAT_CONF_LOCATION/c6xxvf_dev0.conf
 $ sed -i 's/\[SSL\]/\[SHIM\]/g' $QAT_CONF_LOCATION/c6xxvf_dev0.conf
 ```

-### Expose and Bind Intel QAT virtual functions to VFIO-PCI (Every reboot)
+### Expose and Bind Intel® QAT virtual functions to VFIO-PCI (Every reboot)

 To enable virtual functions, the host OS should have IOMMU groups enabled. In 
-the UEFI Firmware Intel Virtualization Technology for Directed I/O 
-(Intel VT-d) must be enabled. Also, the kernel boot parameter should be 
-`intel_iommu=on` or `intel_iommu=ifgx_off`. The default in Clear Linux currently 
-is `intel_iommu=igfx_off` which should work with the Intel QAT device. The 
-following commands assume you installed an Intel QAT card, IOMMU is on, and
+the UEFI Firmware Intel® Virtualization Technology for Directed I/O 
+(Intel® VT-d) must be enabled. Also, the kernel boot parameter should be 
+`intel_iommu=on` or `intel_iommu=ifgx_off`. This should have been set from
+the instructions above. Check the output of `/proc/cmdline` to confirm. The 
+following commands assume you installed an Intel® QAT card, IOMMU is on, and
 VT-d is enabled. The vendor and device ID add to the `VFIO-PCI` driver so that
 each exposed virtual function can be bound to the `VFIO-PCI` driver. Once
 complete, each virtual function passes into a Kata Containers container using
-the PCIe device passthrough feature. For Kubernetes, the Intel device plugin
-for Kubernetes handles the binding of the driver but the VF’s still must be
+the PCIe device passthrough feature. For Kubernetes, the 
+[Intel device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes)
+for Kubernetes handles the binding of the driver, but the VF’s still must be
 enabled.

-```sh
+```bash
 $ sudo modprobe vfio-pci
 $ QAT_PCI_BUS_PF_NUMBERS=$((lspci -d :435 && lspci -d :37c8 && lspci -d :19e2 && lspci -d :6f54) | cut -d ' ' -f 1)
 $ QAT_PCI_BUS_PF_1=$(echo $QAT_PCI_BUS_PF_NUMBERS | cut -d ' ' -f 1)
@@ -160,8 +170,10 @@ $ QAT_PCI_ID_VF=$(cat /sys/bus/pci/devices/0000:${QAT_PCI_BUS_PF_1}/virtfn0/ueve
 $ QAT_VENDOR_AND_ID_VF=$(echo ${QAT_PCI_ID_VF/PCI_ID=} | sed 's/:/ /')
 $ echo $QAT_VENDOR_AND_ID_VF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/new_id
 ```
+
 Loop through all the virtual functions and bind to the VFIO driver
-```sh
+
+```bash
 $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
  do QAT_PCI_BUS_VF=$(basename $(readlink $f))
   echo $QAT_PCI_BUS_VF | sudo tee --append /sys/bus/pci/drivers/c6xxvf/unbind
@@ -169,22 +181,23 @@ $ for f in /sys/bus/pci/devices/0000:$QAT_PCI_BUS_PF_1/virtfn*
  done
 ```

-### Check Intel QAT virtual functions are enabled
+### Check Intel® QAT virtual functions are enabled

 If the following command returns empty, then the virtual functions are not 
 properly enabled. This command checks the enumerated device IDs for just the 
-virtual functions. Using the Intel QAT as an example, the physical device ID 
+virtual functions. Using the Intel® QAT as an example, the physical device ID 
 is `37c8` and virtual function device ID is `37c9`. The following command checks 
-if VF's are enabled for any of the currently known Intel QAT device ID's. The
+if VF's are enabled for any of the currently known Intel® QAT device ID's. The
 following `ls` command should show the 16 VF's bound to `VFIO-PCI`.

-```sh
+```bash
 $ for i in 0442 0443 37c9 19e3; do lspci -d 8086:$i; done
 ```

 Another way to check is to see what PCI devices that `VFIO-PCI` is mapped to.
 It should match the device ID's of the VF's.
-```sh
+
+```bash
 $ ls -la /sys/bus/pci/drivers/vfio-pci
 ```

@@ -201,16 +214,16 @@ There are some patches that must be installed as well, which the
 `build-kernel.sh` script should automatically apply. If you are using a
 different kernel version, then you might need to manually apply them. Since
 the Kata Containers kernel has a minimal set of kernel flags set, you must
-create a QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
+create a Intel® QAT kernel fragment with the necessary `CONFIG_CRYPTO_*` options set.
 Update the config to set some of the `CRYPTO` flags to enabled. This might
-change with different kernel versions. We tested the following instructions
-with kernel `v4.19.28-41`.
+change with different kernel versions. The following instructions were tested
+with kernel `v5.4.0-64-generic`.

-```sh
+```bash
 $ mkdir -p $GOPATH
 $ cd $GOPATH
-$ go get -v github.com/kata-containers/packaging
-$ cat << EOF > $GOPATH/src/github.com/kata-containers/packaging/kernel/configs/fragments/common/qat.conf
+$ go get -v github.com/kata-containers/kata-containers
+$ cat << EOF > $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/configs/fragments/common/qat.conf
 CONFIG_PCIEAER=y
 CONFIG_UIO=y
 CONFIG_CRYPTO_HW=y
@@ -221,61 +234,70 @@ CONFIG_MODULE_SIG=y
 CONFIG_CRYPTO_AUTHENC=y
 CONFIG_CRYPTO_DH=y
 EOF
-$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh setup
+$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh setup
 ```

 ### Build Kata kernel

-```sh
-$ export LINUX_VER=$(ls -d kata*)
+```bash
+$ cd $GOPATH
+$ export LINUX_VER=$(ls -d kata-linux-*)
 $ sed -i 's/EXTRAVERSION =/EXTRAVERSION = .qat.container/' $LINUX_VER/Makefile
-$ $GOPATH/src/github.com/kata-containers/packaging/kernel/build-kernel.sh build
+$ $GOPATH/src/github.com/kata-containers/kata-containers/tools/packaging/kernel/build-kernel.sh build
 ```

-
 ### Copy Kata kernel

-```sh
+```bash
+$ export KATA_KERNEL_NAME=vmlinux-${LINUX_VER}_qat
 $ mkdir -p $KATA_KERNEL_LOCATION
-$ cp $LINUX_VER/arch/x86/boot/bzImage $KATA_KERNEL_LOCATION/vmlinuz-${LINUX_VER}_qat
+$ cp ${GOPATH}/${LINUX_VER}/vmlinux ${KATA_KERNEL_LOCATION}/${KATA_KERNEL_NAME}
 ```

 ### Prepare Kata root filesystem

 These instructions build upon the OS builder instructions located in the 
-[Developer Guide](../Developer-Guide.md). The following instructions use Clear
-Linux (Kata Containers default) as the root filesystem with systemd as the 
-init and will add in the `kmod` binary, which is not a standard binary in a 
-Kata rootfs image. The `kmod` binary is necessary to load the QAT kernel 
-modules when the virtual machine rootfs boots. You should install Docker on
-your system before running the following commands. If you need to use a custom 
-`kata-agent`, then refer to the previous link on how to add it in.
+[Developer Guide](../Developer-Guide.md). At this point it is recommended that
+[Docker](https://docs.docker.com/engine/install/ubuntu/) is installed first, and
+then [Kata-deploy](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy)
+is use to install Kata. This will make sure that the correct `agent` version 
+is installed into the rootfs in the steps below.

-```sh
-$ mkdir -p $OSBUILDER
-$ cd $OSBUILDER
-$ git clone https://github.com/kata-containers/osbuilder.git
-$ export ROOTFS_DIR=${OSBUILDER}/osbuilder/rootfs-builder/rootfs
+The following instructions use Debian as the root filesystem with systemd as 
+the init and will add in the `kmod` binary, which is not a standard binary in 
+a Kata rootfs image. The `kmod` binary is necessary to load the Intel® QAT 
+kernel modules when the virtual machine rootfs boots. 
+
+```bash
+$ export OSBUILDER=$GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
+$ export ROOTFS_DIR=${OSBUILDER}/rootfs-builder/rootfs
 $ export EXTRA_PKGS='kmod'
 ```
+
 Make sure that the `kata-agent` version matches the installed `kata-runtime`
-version.
-```sh
+version. Also make sure the `kata-runtime` install location is in your `PATH` 
+variable. The following `AGENT_VERSION` can be set manually to match
+the `kata-runtime` version if the following commands don't work.
+
+```bash
+$ export PATH=$PATH:/opt/kata/bin
+$ cd $GOPATH
 $ export AGENT_VERSION=$(kata-runtime version | head -n 1 | grep -o "[0-9.]\+")
-$ cd ${OSBUILDER}/osbuilder/rootfs-builder
+$ cd ${OSBUILDER}/rootfs-builder
 $ sudo rm -rf ${ROOTFS_DIR}
-$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh clearlinux'
+$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh debian'
 ```

-### Compile Intel QAT drivers for Kata Containers kernel and add to Kata Containers rootfs
+### Compile Intel® QAT drivers for Kata Containers kernel and add to Kata Containers rootfs

 After the Kata Containers kernel builds with the proper configuration flags, 
-you must build the Intel QAT drivers against that Kata Containers kernel
+you must build the Intel® QAT drivers against that Kata Containers kernel
 version in a similar way they were previously built for the host OS. You must 
 set the `KERNEL_SOURCE_ROOT` variable to the Kata Containers kernel source 
-directory and build the Intel QAT drivers again.
+directory and build the Intel® QAT drivers again. The  `make` command will
+install the Intel® QAT modules into the Kata rootfs.

-```sh
+```bash
 $ cd $GOPATH
 $ export LINUX_VER=$(ls -d kata*)
 $ export KERNEL_MAJOR_VERSION=$(awk '/^VERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
@@ -284,16 +306,18 @@ $ export KERNEL_SUBLEVEL=$(awk '/^SUBLEVEL =/{print $NF}' $GOPATH/$LINUX_VER/Mak
 $ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX_VER/Makefile)
 $ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
 $ cd $QAT_SRC
-$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --disable-qat-lkcf --enable-icp-sriov=guest
+$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
 $ sudo -E make all -j$(nproc)
 $ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j$(nproc)
 ```
+
 The `usdm_drv` module also needs to be copied into the rootfs modules path and
 `depmod` should be run. 
-```sh
-$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/usr/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers  
+
+```bash
+$ sudo cp $QAT_SRC/build/usdm_drv.ko $ROOTFS_DIR/lib/modules/${KERNEL_ROOTFS_DIR}/updates/drivers  
 $ sudo depmod -a -b ${ROOTFS_DIR} ${KERNEL_ROOTFS_DIR}
-$ cd ${OSBUILDER}/osbuilder/image-builder
+$ cd ${OSBUILDER}/image-builder
 $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
 ```

@@ -302,84 +326,225 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'

 ### Copy Kata rootfs

-```sh
+```bash
 $ mkdir -p $KATA_ROOTFS_LOCATION
-$ cp ${OSBUILDER}/osbuilder/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
+$ cp ${OSBUILDER}/image-builder/kata-containers.img $KATA_ROOTFS_LOCATION
 ```

-### Update Kata configuration to point to custom kernel and rootfs
+## Verify Intel® QAT works in a container

-You must update the `configuration.toml` for Kata Containers to point to the 
-custom kernel, custom rootfs, and to specify which modules to load when the 
-virtual machine is booted when a container is run. The following example
-assumes you installed an Intel QAT, and you need to load those modules.
-
-```sh
-$ sudo mkdir -p /etc/kata-containers
-$ sudo cp /usr/share/defaults/kata-containers/configuration-qemu.toml /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|kernel_params = \"\"|kernel_params = \"modules-load=usdm_drv,qat_c62xvf\"|g" /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|\/usr\/share\/kata-containers\/kata-containers.img|${KATA_KERNEL_LOCATION}\/kata-containers.img|g" /etc/kata-containers/configuration.toml
-$ sudo sed -i "s|\/usr\/share\/kata-containers\/vmlinuz.container|${KATA_ROOTFS_LOCATION}\/vmlinuz-${LINUX_VER}_qat|g" /etc/kata-containers/configuration.toml
-```
-
-## Verify Intel QAT works in a Docker Kata Containers container
-
-The following instructions leverage an OpenSSL Dockerfile that builds the 
-Intel QAT engine to allow OpenSSL to offload crypto functions. It is a 
-convenient way to test that VFIO device passthrough for the Intel QAT VF’s are
+The following instructions uses a OpenSSL Dockerfile that builds the 
+Intel® QAT engine to allow OpenSSL to offload crypto functions. It is a 
+convenient way to test that VFIO device passthrough for the Intel® QAT VF’s are
 working properly with the Kata Containers VM.

-## Build OpenSSL Intel QAT engine container
+### Build OpenSSL Intel® QAT engine container

-Use the OpenSSL Intel QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
+Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
 to build a container image with an optimized OpenSSL engine for 
-Intel QAT. Using `docker build` with the Kata Containers runtime can sometimes
-have issues. Therefore, we recommended you change the default runtime to
-`runc` before doing a build. Instructions for this are below.
+Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
+have issues. Therefore, make sure that `runc` is the default Docker container 
+runtime.

-```sh
+```bash
 $ cd $QAT_SRC
 $ curl -O $QAT_DOCKERFILE
-$ sudo sed -i 's/kata-runtime/runc/g' /etc/systemd/system/docker.service.d/50-runtime.conf
-$ sudo systemctl daemon-reload && sudo systemctl restart docker
 $ sudo docker build -t openssl-qat-engine .
 ```

-> **Note: The Intel QAT driver version in this container might not match the 
-> Intel QAT driver compiled and loaded on the host when compiling.**
+> **Note: The Intel® QAT driver version in this container might not match the 
+> Intel® QAT driver compiled and loaded on the host when compiling.**

-### Test Intel QAT in Docker
+### Test Intel® QAT with the ctr tool

-The host should already be setup with 16 virtual functions of the Intel QAT 
-card bound to `VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing
-of devices. Replace the number 90 with one of the VF’s exposed in `/dev/vfio`.
-It might require you to add an `IPC_LOCK` capability to your Docker runtime
-depending on which rootfs you use.
+The `ctr` tool can be used to interact with the containerd daemon. It may be 
+more convenient to use this tool to verify the kernel and image instead of
+setting up a Kubernetes cluster. The correct Kata runtimes need to be added
+to the containerd `config.toml`. Below is a sample snippet that can be added
+to allow QEMU and Cloud Hypervisor (CLH) to work with `ctr`.

-```sh
-$ sudo docker run -it --runtime=kata-runtime --cap-add=IPC_LOCK --cap-add=SYS_ADMIN --device=/dev/vfio/90 -v /dev:/dev -v ${QAT_CONF_LOCATION}:/etc openssl-qat-engine bash
+```
+[plugins.cri.containerd.runtimes.kata-qemu]
+  runtime_type = "io.containerd.kata-qemu.v2"
+  privileged_without_host_devices = true
+  pod_annotations = ["io.katacontainers.*"]
+  [plugins.cri.containerd.runtimes.kata-qemu.options]
+    ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
+[plugins.cri.containerd.runtimes.kata-clh]
+  runtime_type = "io.containerd.kata-clh.v2"
+  privileged_without_host_devices = true
+  pod_annotations = ["io.katacontainers.*"]
+  [plugins.cri.containerd.runtimes.kata-clh.options]
+    ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration-clh.toml"
 ```

-Below are some commands to run in the container image to verify Intel QAT is 
+In addition, containerd expects the binary to be in `/usr/local/bin` so add 
+this small script so that it redirects to be able to use either QEMU or
+Cloud Hypervisor with Kata.
+
+```bash
+$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-qemu-v2
+$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-qemu.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-qemu-v2
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-qemu-v2
+$ echo '#!/bin/bash' | sudo tee /usr/local/bin/containerd-shim-kata-clh-v2
+$ echo 'KATA_CONF_FILE=/opt/kata/share/defaults/kata-containers/configuration-clh.toml /opt/kata/bin/containerd-shim-kata-v2 $@' | sudo tee -a /usr/local/bin/containerd-shim-kata-clh-v2
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-clh-v2
+```
+
+After the OpenSSL image is built and imported into containerd, a Intel® QAT 
+virtual function exposed in the step above can be added to the `ctr` command. 
+Make sure to change the `/dev/vfio` number to one that actually exists on the 
+host system. When using the `ctr` tool, the`configuration.toml` for Kata needs 
+to point to the custom Kata kernel and rootfs built above and the Intel® QAT 
+modules in the Kata rootfs need to load at boot. The following steps assume that 
+`kata-deploy` was used to install Kata and QEMU is being tested. If using a 
+different hypervisor, different install method for Kata, or a different 
+Intel® QAT chipset then the command will need to be modified. 
+
+> **Note: The following was tested with 
+[containerd v1.3.9](https://github.com/containerd/containerd/releases/tag/v1.3.9).**
+
+```bash
+$ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
+$ sudo sed -i "/kernel =/c kernel = "\"${KATA_ROOTFS_LOCATION}/${KATA_KERNEL_NAME}\""" $config_file
+$ sudo sed -i "/image =/c image = "\"${KATA_KERNEL_LOCATION}/kata-containers.img\""" $config_file
+$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 modules-load=usdm_drv,qat_c62xvf"/g' $config_file 
+$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
+$ sudo ctr images import openssl-qat-engine.tar
+$ sudo ctr run --runtime io.containerd.run.kata-qemu.v2 --privileged -t --rm --device=/dev/vfio/180 --mount type=bind,src=/dev,dst=/dev,options=rbind:rw --mount type=bind,src=${QAT_CONF_LOCATION}/c6xxvf_dev0.conf,dst=/etc/c6xxvf_dev0.conf,options=rbind:rw  docker.io/library/openssl-qat-engine:latest bash
+```
+
+Below are some commands to run in the container image to verify Intel® QAT is 
 working

 ```sh
-bash-5.0# cat /proc/modules
-bash-5.0# adf_ctl restart
-bash-5.0# adf_ctl status
-bash-5.0# openssl engine -c -t qat
+root@67561dc2757a/ # cat /proc/modules
+qat_c62xvf 16384 - - Live 0xffffffffc00d9000 (OE)
+usdm_drv 86016 - - Live 0xffffffffc00e8000 (OE)
+intel_qat 249856 - - Live 0xffffffffc009b000 (OE)
+
+root@67561dc2757a/ # adf_ctl restart
+Restarting all devices.
+Processing /etc/c6xxvf_dev0.conf
+
+root@67561dc2757a/ # adf_ctl status
+Checking status of all devices.
+There is 1 QAT acceleration device(s) in the system:
+ qat_dev0 - type: c6xxvf,  inst_id: 0,  node_id: 0,  bsf: 0000:01:01.0,  #accel: 1 #engines: 1 state: up
+
+root@67561dc2757a/ # openssl engine -c -t qat-hw
+(qat-hw) Reference implementation of QAT crypto engine v0.6.1
+ [RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
+     [ available ]
 ```

-Test with Intel QAT card acceleration
+### Test Intel® QAT in Kubernetes

-```sh
-bash-5.0# openssl speed -engine qat -elapsed -async_jobs 72 rsa2048 
+Start a Kubernetes cluster with containerd as the CRI. The host should 
+already be setup with 16 virtual functions of the Intel® QAT card bound to 
+`VFIO-PCI`. Verify this by looking in `/dev/vfio` for a listing of devices. 
+You might need to disable Docker before initializing Kubernetes. Be aware 
+that the OpenSSL container image built above will need to be exported from
+Docker and imported into containerd.
+
+If Kata is installed through [`kata-deploy`](https://github.com/kata-containers/kata-containers/blob/stable-2.0/tools/packaging/kata-deploy/README.md)
+there will be multiple `configuration.toml` files associated with different 
+hypervisors. Rather than add in the custom Kata kernel, Kata rootfs, and 
+kernel modules to each `configuration.toml` as the default, instead use
+[annotations](https://github.com/kata-containers/kata-containers/blob/stable-2.0/docs/how-to/how-to-load-kernel-modules-with-kata.md)
+in the Kubernetes YAML file to tell Kata which kernel and rootfs to use. The 
+easy way to do this is to use `kata-deploy` which will install the Kata binaries
+to `/opt` and properly configure the `/etc/containerd/config.toml` with annotation 
+support. However, the `configuration.toml` needs to enable support for
+annotations as well. The following configures both QEMU and Cloud Hypervisor
+`configuration.toml` files that are currently available with Kata Container 
+versions 2.0 and higher.
+
+```bash
+$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-qemu.toml
+$ sudo sed -i 's/enable_annotations\s=\s\[\]/enable_annotations = [".*"]/' /opt/kata/share/defaults/kata-containers/configuration-clh.toml
 ```

-Test with CPU acceleration
+Export the OpenSSL image from Docker and import into containerd.
+
+```bash
+$ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
+$ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
+```
+
+The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/master/cmd/qat_plugin/README.md)
+needs to be started so that the virtual functions can be discovered and
+used by Kubernetes. 
+
+The following YAML file can be used to start a Kata container with Intel® QAT
+support. If Kata is installed with `kata-deploy`, then the containerd 
+`configuration.toml` should have all of the Kata runtime classes already 
+populated and annotations supported. To use a Intel® QAT virtual function, the 
+Intel® QAT plugin needs to be started after the VF's are bound to `VFIO-PCI` as 
+described [above](#expose-and-bind-intel-qat-virtual-functions-to-vfio-pci-every-reboot). 
+Edit the following to point to the correct Kata kernel and rootfs location 
+built with Intel® QAT support.
+
+```bash
+$ cat << EOF > kata-openssl-qat.yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kata-openssl-qat
+  labels:
+    app: kata-openssl-qat
+  annotations:
+    io.katacontainers.config.hypervisor.kernel: "$KATA_KERNEL_LOCATION/$KATA_KERNEL_NAME"
+    io.katacontainers.config.hypervisor.image: "$KATA_ROOTFS_LOCATION/kata-containers.img"
+    io.katacontainers.config.hypervisor.kernel_params: "modules-load=usdm_drv,qat_c62xvf"
+spec:
+  runtimeClassName: kata-qemu
+  containers:
+  - name: kata-openssl-qat
+    image: docker.io/library/openssl-qat-engine:latest
+    imagePullPolicy: IfNotPresent
+    resources:
+      limits:
+        qat.intel.com/generic: 1
+        cpu: 1
+    securityContext:
+      capabilities:
+        add: ["IPC_LOCK", "SYS_ADMIN"]
+    volumeMounts:
+      - mountPath: /etc/c6xxvf_dev0.conf
+        name: etc-mount
+      - mountPath: /dev
+        name: dev-mount
+  volumes:
+    - name: dev-mount
+      hostPath:
+        path: /dev
+    - name: etc-mount
+      hostPath:
+        path: $QAT_CONF_LOCATION/c6xxvf_dev0.conf
+EOF
+```
+
+Use `kubectl` to start the pod. Verify that Intel® QAT card acceleration is 
+working with the Intel® QAT engine.
+```bash
+$ kubectl apply -f kata-openssl-qat.yaml
+```

 ```sh
-bash-5.0# openssl speed -elapsed rsa2048
+$ kubectl exec -it kata-openssl-qat -- adf_ctl restart
+Restarting all devices.
+Processing /etc/c6xxvf_dev0.conf
+
+$ kubectl exec -it kata-openssl-qat -- adf_ctl status
+Checking status of all devices.
+There is 1 QAT acceleration device(s) in the system:
+ qat_dev0 - type: c6xxvf,  inst_id: 0,  node_id: 0,  bsf: 0000:01:01.0,  #accel: 1 #engines: 1 state: up
+
+$ kubectl exec -it kata-openssl-qat -- openssl engine -c -t qat-hw
+(qat-hw) Reference implementation of QAT crypto engine v0.6.1
+ [RSA, DSA, DH, AES-128-CBC-HMAC-SHA1, AES-128-CBC-HMAC-SHA256, AES-256-CBC-HMAC-SHA1, AES-256-CBC-HMAC-SHA256, TLS1-PRF, HKDF, X25519, X448]
+     [ available ]
 ```

 ### Troubleshooting
@@ -412,9 +577,9 @@ c6xxvf_dev10.conf  c6xxvf_dev13.conf  c6xxvf_dev2.conf   c6xxvf_dev5.conf c6xxvf
 ```

 * Check `dmesg` inside the container to see if there are any issues with the 
-Intel QAT driver.
+Intel® QAT driver.

-* If there are issues building the OpenSSL Intel QAT container image, then 
+* If there are issues building the OpenSSL Intel® QAT container image, then 
 check to make sure that runc is the default runtime for building container.

 ```sh
@@ -425,17 +590,18 @@ Environment="DOCKER_DEFAULT_RUNTIME=--default-runtime runc"

 ## Optional Scripts

-### Verify Intel QAT card counters are incremented
+### Verify Intel® QAT card counters are incremented

-Use the `lspci` command to figure out which PCI bus the Intel QAT accelerators
-are on. The counters will increase when the accelerator is actively being
-used. To verify QAT is actively accelerating the containerized application,
-use the following instructions to check if any of the counters are
-incrementing. You will have to change the PCI device ID to match your system.
+To check the built in firmware counters, the Intel® QAT driver has to be compiled 
+and installed to the host and can't rely on the built in host driver. The 
+counters will increase when the accelerator is actively being used. To verify 
+Intel® QAT is actively accelerating the containerized application, use the 
+following instructions to check if any of the counters increment. Make 
+sure to change the PCI Device ID to match whats in the system.

-```sh
+```bash
 $ for i in 0434 0435 37c8 1f18 1f19; do lspci -d 8086:$i; done
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b1\:00.0/fw_counters
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b3\:00.0/fw_counters
 $ sudo watch cat /sys/kernel/debug/qat_c6xx_0000\:b5\:00.0/fw_counters
-```
+```
--- a/pkg/logging/src/lib.rs
+++ b/pkg/logging/src/lib.rs
@@ -21,7 +21,12 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[
 ];

 // XXX: 'writer' param used to make testing possible.
-pub fn create_logger<W>(name: &str, source: &str, level: slog::Level, writer: W) -> slog::Logger
+pub fn create_logger<W>(
+    name: &str,
+    source: &str,
+    level: slog::Level,
+    writer: W,
+) -> (slog::Logger, slog_async::AsyncGuard)
 where
    W: Write + Send + Sync + 'static,
 {
@@ -37,17 +42,21 @@ where
    let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();

    // Ensure the logger is thread-safe
-    let async_drain = slog_async::Async::new(filter_drain).build().fuse();
+    let (async_drain, guard) = slog_async::Async::new(filter_drain)
+        .thread_name("slog-async-logger".into())
+        .build_with_guard();

    // Add some "standard" fields
-    slog::Logger::root(
+    let logger = slog::Logger::root(
        async_drain.fuse(),
        o!("version" => env!("CARGO_PKG_VERSION"),
            "subsystem" => "root",
            "pid" => process::id().to_string(),
            "name" => name.to_string(),
            "source" => source.to_string()),
-    )
+    );
+
+    (logger, guard)
 }

 pub fn get_log_levels() -> Vec<&'static str> {
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -69,7 +69,7 @@ parts:
      tar -xf ${tarfile} --strip-components=1

  image:
-    after: [godeps]
+    after: [godeps, qemu, kernel]
    plugin: nil
    build-packages:
      - docker.io
@@ -89,6 +89,8 @@ parts:
      export GOROOT=${SNAPCRAFT_STAGE}
      export PATH="${GOROOT}/bin:${PATH}"

+      http_proxy=${http_proxy:-""}
+      https_proxy=${https_proxy:-""}
      if [ -n "$http_proxy" ]; then
        echo "Setting proxy $http_proxy"
        sudo -E systemctl set-environment http_proxy=$http_proxy || true
@@ -169,7 +171,7 @@ parts:
      fi

  kernel:
-    after: [godeps, image]
+    after: [godeps]
    plugin: nil
    build-packages:
      - libelf-dev
@@ -183,8 +185,8 @@ parts:

      cd ${kata_dir}/tools/packaging/kernel

-      # Say 'no' to everithing, fix issues with incomplete .config files
-      yes "n" | ./build-kernel.sh setup
+      # Setup and build kernel
+      ./build-kernel.sh -d setup
      kernel_dir_prefix="kata-linux-"
      cd ${kernel_dir_prefix}*
      version=$(basename ${PWD} | sed 's|'"${kernel_dir_prefix}"'||' | cut -d- -f1)
@@ -206,7 +208,7 @@ parts:

  qemu:
    plugin: make
-    after: [godeps, runtime]
+    after: [godeps]
    build-packages:
      - gcc
      - python3
@@ -226,6 +228,7 @@ parts:
      - libffi-dev
      - libmount-dev
      - libselinux1-dev
+      - ninja-build
    override-build: |
      yq=${SNAPCRAFT_STAGE}/yq
      export GOPATH=${SNAPCRAFT_STAGE}/gopath
@@ -242,10 +245,11 @@ parts:
        ;;

        *)
-          branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.tag)"
+          branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)"
          url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)"
          commit=""
          patches_dir="${kata_dir}/tools/packaging/qemu/patches/$(echo ${branch} | sed -e 's/.[[:digit:]]*$//' -e 's/^v//').x"
+          patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}"
        ;;
      esac

@@ -258,31 +262,23 @@ parts:
      [ -n "$(ls -A ui/keycodemapdb)" ] || git clone https://github.com/qemu/keycodemapdb ui/keycodemapdb/
      [ -n "$(ls -A capstone)" ] || git clone https://github.com/qemu/capstone capstone

-      # Apply patches
-      for patch in ${patches_dir}/*.patch; do
-        echo "Applying $(basename "$patch") ..."
-        patch \
-          --batch \
-          --forward \
-          --strip 1 \
-          --input "$patch"
-      done
+      # Apply branch patches
+      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_dir}"
+      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"

      # Only x86_64 supports libpmem
      [ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev libseccomp-dev

      configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
      chmod +x ${configure_hypervisor}
-      # static build
-      echo "$(${configure_hypervisor} -s qemu) \
-        --disable-rbd
-        --prefix=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr \
-        --datadir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share \
-        --libexecdir=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/libexec/qemu" \
+      # static build. The --prefix, --libdir, --libexecdir, --datadir arguments are
+      # based on PREFIX and set by configure-hypervisor.sh
+      echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \
+        --disable-rbd " \
        | xargs ./configure

      # Copy QEMU configurations (Kconfigs)
-      cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/
+      cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs/devices/

      # build and install
      make -j $(($(nproc)-1))
@@ -293,7 +289,6 @@ parts:
      - -usr/bin/qemu-pr-helper
      - -usr/bin/virtfs-proxy-helper
      - -usr/include/
-      - -usr/libexec/
      - -usr/share/applications/
      - -usr/share/icons/
      - -usr/var/
--- a/src/agent/.gitignore
+++ b/src/agent/.gitignore
@@ -0,0 +1 @@
+tarpaulin-report.html
--- a/src/agent/.rustfmt.toml
+++ b/src/agent/.rustfmt.toml
@@ -0,0 +1 @@
+edition = "2018"
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -9,19 +9,26 @@ oci = { path = "oci" }
 logging = { path = "../../pkg/logging" }
 rustjail = { path = "rustjail" }
 protocols = { path = "protocols" }
-netlink = { path = "netlink", features = ["with-log", "with-agent-handler"] }
 lazy_static = "1.3.0"
-ttrpc = "0.3.0"
+ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
 protobuf = "=2.14.0"
 libc = "0.2.58"
 nix = "0.17.0"
 prctl = "1.0.0"
 serde_json = "1.0.39"
-signal-hook = "0.1.9"
 scan_fmt = "0.2.3"
 scopeguard = "1.0.0"
 regex = "1"

+async-trait = "0.1.42"
+tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
+futures = "0.3.12"
+netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
+tokio-vsock = "0.3.0"
+rtnetlink = "0.7.0"
+netlink-packet-utils = "0.4.0"
+ipnetwork = "0.17.0"
+
 # slog:
 # - Dynamic keys required to allow HashMap keys to be slog::Serialized.
 # - The 'max_*' features allow changing the log level at runtime
@@ -38,11 +45,10 @@ tempfile = "3.1.0"
 prometheus = { version = "0.9.0", features = ["process"] }
 procfs = "0.7.9"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.0" }
+cgroups = { package = "cgroups-rs", version = "0.2.2" }

 [workspace]
 members = [
-    "netlink",
    "oci",
    "protocols",
    "rustjail",
--- a/src/agent/Makefile
+++ b/src/agent/Makefile
@@ -3,6 +3,11 @@
 # SPDX-License-Identifier: Apache-2.0
 #

+# To show variables or targets help on `make help`
+# Use the following format:
+# '##VAR VARIABLE_NAME: help about variable'
+# '##TARGET TARGET_NAME: help about target'
+
 PROJECT_NAME = Kata Containers
 PROJECT_URL = https://github.com/kata-containers
 PROJECT_COMPONENT = kata-agent
@@ -16,16 +21,18 @@ SOURCES := \
 VERSION_FILE := ./VERSION
 VERSION := $(shell grep -v ^\# $(VERSION_FILE))
 COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true)
-COMMIT_NO_SHORT := $(shell git rev-parse --short HEAD 2>/dev/null || true)
 COMMIT := $(if $(shell git status --porcelain --untracked-files=no 2>/dev/null || true),${COMMIT_NO}-dirty,${COMMIT_NO})
 COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)

 # Exported to allow cargo to see it
 export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION))

+##VAR BUILD_TYPE=release|debug type of rust build
 BUILD_TYPE = release

+##VAR ARCH=arch target to build (format: uname -m)
 ARCH = $(shell uname -m)
+##VAR LIBC=musl|gnu
 LIBC ?= musl
 ifneq ($(LIBC),musl)
    ifeq ($(LIBC),gnu)
@@ -41,6 +48,11 @@ ifeq ($(ARCH), ppc64le)
    $(warning "WARNING: powerpc64le-unknown-linux-musl target is unavailable")
 endif

+ifeq ($(ARCH), s390x)
+    override LIBC = gnu
+    $(warning "WARNING: s390x-unknown-linux-musl target is unavailable")
+endif
+

 EXTRA_RUSTFLAGS :=
 ifeq ($(ARCH), aarch64)
@@ -52,10 +64,12 @@ TRIPLE = $(ARCH)-unknown-linux-$(LIBC)

 TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)

+##VAR DESTDIR=<path> is a directory prepended to each installed target file
 DESTDIR :=
+##VAR BINDIR=<path> is a directory for installing executable programs
 BINDIR := /usr/bin

-# Define if agent will be installed as init
+##VAR INIT=yes|no define if agent will be installed as init
 INIT := no

 # Path to systemd unit directory if installed as not init.
@@ -103,6 +117,7 @@ define INSTALL_FILE
 	install -D -m 644 $1 $(DESTDIR)$2/$1 || exit 1;
 endef

+##TARGET default: build code
 default: $(TARGET) show-header

 $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
@@ -110,42 +125,55 @@ $(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
 $(TARGET_PATH): $(SOURCES) | show-summary
 	@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)

+$(GENERATED_FILES): %: %.in
+	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
+
+##TARGET optimize: optimized  build
 optimize: $(SOURCES) | show-summary show-header
 	@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny-warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE)

-show-header:
-	@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"

+##TARGET clippy: run clippy linter
 clippy: $(GENERATED_CODE)
 	cargo clippy --all-targets --all-features --release \
 		-- \
 		-Aclippy::redundant_allocation \
 		-D warnings

-$(GENERATED_FILES): %: %.in
-	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"
+format:
+	cargo fmt -- --check

-install: build-service
+
+##TARGET install: install agent
+install: install-services
 	@install -D $(TARGET_PATH) $(DESTDIR)/$(BINDIR)/$(TARGET)

+##TARGET clean: clean build
 clean:
 	@cargo clean
 	@rm -f $(GENERATED_FILES)
+	@rm -f tarpaulin-report.html

+#TARGET test: run cargo tests
 test:
 	@cargo test --all --target $(TRIPLE)

-check: test
+##TARGET check: run test
+check: clippy format

+##TARGET run: build and run agent
 run:
 	@cargo run --target $(TRIPLE)

-build-service: $(GENERATED_FILES)
+install-services: $(GENERATED_FILES)
 ifeq ($(INIT),no)
 	@echo "Installing systemd unit files..."
 	$(foreach f,$(UNIT_FILES),$(call INSTALL_FILE,$f,$(UNIT_DIR)))
 endif

+show-header:
+	@printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)"
+
 show-summary: show-header
 	@printf "project:\n"
 	@printf "  name: $(PROJECT_NAME)\n"
@@ -161,7 +189,35 @@ show-summary: show-header
 	@printf "  %s\n" "$(call get_toolchain_version)"
 	@printf "\n"

-help: show-summary
+## help: Show help comments that start with `##VAR` and `##TARGET`
+help: Makefile show-summary
+	@echo "==========================Help============================="
+	@echo "Variables:"
+	@sed -n 's/^##VAR//p' $< | sort
+	@echo ""
+	@echo "Targets:"
+	@sed -n 's/^##TARGET//p' $< | sort
+
+TARPAULIN_ARGS:=-v --workspace
+install-tarpaulin:
+	cargo install cargo-tarpaulin
+
+# Check if cargo tarpaulin is installed
+HAS_TARPAULIN:= $(shell cargo --list | grep tarpaulin 2>/dev/null)
+check_tarpaulin:
+ifndef  HAS_TARPAULIN
+	$(error "tarpaulin is not available please: run make install-tarpaulin ")
+else
+	$(info OK: tarpaulin installed)
+endif
+
+##TARGET codecov: Generate code coverage report
+codecov: check_tarpaulin
+	cargo tarpaulin $(TARPAULIN_ARGS)
+
+##TARGET codecov-html: Generate code coverage html report
+codecov-html: check_tarpaulin
+	cargo tarpaulin $(TARPAULIN_ARGS) -o Html

 .PHONY: \
 	help \
@@ -169,5 +225,6 @@ help: show-summary
 	show-summary \
 	optimize

+##TARGET generate-protocols: generate/update grpc agent protocols
 generate-protocols:
 	protocols/hack/update-generated-proto.sh all
--- a/src/agent/README.md
+++ b/src/agent/README.md
@@ -49,6 +49,11 @@ $ rustup target add "${arch}-unknown-linux-musl"
 $ sudo ln -s /usr/bin/g++ /bin/musl-g++
 ```

+ppc64le-only: Manually install `protoc`, e.g.
+```bash
+$ sudo dnf install protobuf-compiler
+```
+
 Download the source files in the Kata containers repository and build the agent:
 ```bash
 $ GOPATH="${GOPATH:-$HOME/go}"
--- a/src/agent/netlink/Cargo.toml
+++ b/src/agent/netlink/Cargo.toml
@@ -1,20 +0,0 @@
-[package]
-name = "netlink"
-version = "0.1.0"
-authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-libc = "0.2.58"
-nix = "0.17.0"
-
-protobuf = { version = "=2.14.0", optional = true }
-protocols = { path = "../protocols", optional = true }
-slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"], optional = true }
-slog-scope = { version = "4.1.2", optional = true }
-
-[features]
-with-log = ["slog", "slog-scope"]
-with-agent-handler = ["protobuf", "protocols"]
--- a/src/agent/netlink/src/agent_handler.rs
+++ b/src/agent/netlink/src/agent_handler.rs
@@ -1,572 +0,0 @@
-// Copyright (c) 2020 Ant Financial
-// Copyright (C) 2020 Alibaba Cloud. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-//! Dedicated Netlink interfaces for Kata agent protocol handler.
-
-use std::convert::TryFrom;
-
-use protobuf::RepeatedField;
-use protocols::types::{ARPNeighbor, IPAddress, IPFamily, Interface, Route};
-
-use super::*;
-
-#[cfg(feature = "with-log")]
-// Convenience macro to obtain the scope logger
-macro_rules! sl {
-    () => {
-        slog_scope::logger().new(o!("subsystem" => "netlink"))
-    };
-}
-
-impl super::RtnlHandle {
-    pub fn update_interface(&mut self, iface: &Interface) -> Result<Interface> {
-        // the reliable way to find link is using hardware address
-        // as filter. However, hardware filter might not be supported
-        // by netlink, we may have to dump link list and the find the
-        // target link. filter using name or family is supported, but
-        // we cannot use that to find target link.
-        // let's try if hardware address filter works. -_-
-
-        let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
-
-        // bring down interface if it is up
-        if ifinfo.ifi_flags & libc::IFF_UP as u32 != 0 {
-            self.set_link_status(&ifinfo, false)?;
-        }
-
-        // delete all addresses associated with the link
-        let del_addrs: Vec<RtIPAddr> = self.get_link_addresses(&ifinfo)?;
-        self.delete_all_addrs(&ifinfo, del_addrs.as_ref())?;
-
-        // add new ip addresses in request
-        for grpc_addr in &iface.IPAddresses {
-            let rtip = RtIPAddr::try_from(grpc_addr.clone())?;
-            self.add_one_address(&ifinfo, &rtip)?;
-        }
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
-
-        // set name, set mtu, IFF_NOARP. in one rtnl_talk.
-        nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>() as u32) as __u32;
-        nlh.nlmsg_type = RTM_NEWLINK;
-        nlh.nlmsg_flags = NLM_F_REQUEST;
-        self.assign_seqnum(nlh);
-
-        ifi.ifi_family = ifinfo.ifi_family;
-        ifi.ifi_type = ifinfo.ifi_type;
-        ifi.ifi_index = ifinfo.ifi_index;
-        if iface.raw_flags & libc::IFF_NOARP as u32 != 0 {
-            ifi.ifi_change |= libc::IFF_NOARP as u32;
-            ifi.ifi_flags |= libc::IFF_NOARP as u32;
-        }
-
-        // Safe because we have allocated enough buffer space.
-        unsafe {
-            nlh.addattr32(IFLA_MTU, iface.mtu as u32);
-
-            // if str is null terminated, use addattr_var.
-            // otherwise, use addattr_str
-            nlh.addattr_var(IFLA_IFNAME, iface.name.as_ref());
-        }
-
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        // TODO: why the result is ignored here?
-        let _ = self.set_link_status(&ifinfo, true);
-
-        Ok(iface.clone())
-    }
-
-    /// Delete this interface/link per request
-    pub fn remove_interface(&mut self, iface: &Interface) -> Result<Interface> {
-        let ifinfo = self.find_link_by_hwaddr(iface.hwAddr.as_str())?;
-
-        self.set_link_status(&ifinfo, false)?;
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ifi = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ifinfomsg) };
-
-        // No attributes needed?
-        nlh.nlmsg_len = NLMSG_LENGTH!(mem::size_of::<ifinfomsg>()) as __u32;
-        nlh.nlmsg_type = RTM_DELLINK;
-        nlh.nlmsg_flags = NLM_F_REQUEST;
-        self.assign_seqnum(nlh);
-
-        ifi.ifi_family = ifinfo.ifi_family;
-        ifi.ifi_index = ifinfo.ifi_index;
-        ifi.ifi_type = ifinfo.ifi_type;
-
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        Ok(iface.clone())
-    }
-
-    pub fn list_interfaces(&mut self) -> Result<Vec<Interface>> {
-        let mut ifaces: Vec<Interface> = Vec::new();
-        let (_slv, lv) = self.dump_all_links()?;
-        let (_sav, av) = self.dump_all_addresses(0)?;
-
-        for link in &lv {
-            // Safe because dump_all_links() returns valid pointers.
-            let nlh = unsafe { &**link };
-            if nlh.nlmsg_type != RTM_NEWLINK && nlh.nlmsg_type != RTM_DELLINK {
-                continue;
-            }
-
-            if nlh.nlmsg_len < NLMSG_SPACE!(mem::size_of::<ifinfomsg>()) {
-                info!(
-                    sl!(),
-                    "invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}",
-                    nlh.nlmsg_len,
-                    NLMSG_SPACE!(mem::size_of::<ifinfomsg>())
-                );
-                break;
-            }
-
-            // Safe because we have just validated available buffer space above.
-            let ifi = unsafe { &*(NLMSG_DATA!(nlh) as *const ifinfomsg) };
-            let rta: *mut rtattr = IFLA_RTA!(ifi as *const ifinfomsg) as *mut rtattr;
-            let rtalen = IFLA_PAYLOAD!(nlh) as u32;
-            let attrs = unsafe { parse_attrs(rta, rtalen, (IFLA_MAX + 1) as usize)? };
-
-            // fill out some fields of Interface,
-            let mut iface: Interface = Interface::default();
-
-            // Safe because parse_attrs() returns valid pointers.
-            unsafe {
-                if !attrs[IFLA_IFNAME as usize].is_null() {
-                    let t = attrs[IFLA_IFNAME as usize];
-                    iface.name = String::from_utf8(getattr_var(t as *const rtattr))?;
-                }
-
-                if !attrs[IFLA_MTU as usize].is_null() {
-                    let t = attrs[IFLA_MTU as usize];
-                    iface.mtu = getattr32(t) as u64;
-                }
-
-                if !attrs[IFLA_ADDRESS as usize].is_null() {
-                    let alen = RTA_PAYLOAD!(attrs[IFLA_ADDRESS as usize]);
-                    let a: *const u8 = RTA_DATA!(attrs[IFLA_ADDRESS as usize]) as *const u8;
-                    iface.hwAddr = parser::format_address(a, alen as u32)?;
-                }
-            }
-
-            // get ip address info from av
-            let mut ads: Vec<IPAddress> = Vec::new();
-            for address in &av {
-                // Safe because dump_all_addresses() returns valid pointers.
-                let alh = unsafe { &**address };
-                if alh.nlmsg_type != RTM_NEWADDR {
-                    continue;
-                }
-
-                let tlen = NLMSG_SPACE!(mem::size_of::<ifaddrmsg>());
-                if alh.nlmsg_len < tlen {
-                    info!(
-                        sl!(),
-                        "invalid nlmsg! nlmsg_len: {}, nlmsg_space: {}", alh.nlmsg_len, tlen
-                    );
-                    break;
-                }
-
-                // Safe becahse we have checked avialable buffer space by NLMSG_SPACE above.
-                let ifa = unsafe { &*(NLMSG_DATA!(alh) as *const ifaddrmsg) };
-                let arta: *mut rtattr = IFA_RTA!(ifa) as *mut rtattr;
-                let artalen = IFA_PAYLOAD!(alh) as u32;
-
-                if ifa.ifa_index as u32 == ifi.ifi_index as u32 {
-                    // found target addresses, parse attributes and fill out Interface
-                    let addrs = unsafe { parse_attrs(arta, artalen, (IFA_MAX + 1) as usize)? };
-
-                    // fill address field of Interface
-                    let mut one: IPAddress = IPAddress::default();
-                    let tattr: *const rtattr = if !addrs[IFA_ADDRESS as usize].is_null() {
-                        addrs[IFA_ADDRESS as usize]
-                    } else {
-                        addrs[IFA_LOCAL as usize]
-                    };
-
-                    one.mask = format!("{}", ifa.ifa_prefixlen);
-                    one.family = IPFamily::v4;
-                    if ifa.ifa_family == libc::AF_INET6 as u8 {
-                        one.family = IPFamily::v6;
-                    }
-
-                    // Safe because parse_attrs() returns valid pointers.
-                    unsafe {
-                        let a: *const u8 = RTA_DATA!(tattr) as *const u8;
-                        let alen = RTA_PAYLOAD!(tattr);
-                        one.address = parser::format_address(a, alen as u32)?;
-                    }
-
-                    ads.push(one);
-                }
-            }
-
-            iface.IPAddresses = RepeatedField::from_vec(ads);
-            ifaces.push(iface);
-        }
-
-        Ok(ifaces)
-    }
-
-    pub fn update_routes(&mut self, rt: &[Route]) -> Result<Vec<Route>> {
-        let rs = self.get_all_routes()?;
-        self.delete_all_routes(&rs)?;
-
-        for grpcroute in rt {
-            if grpcroute.gateway.as_str() == "" {
-                let r = RtRoute::try_from(grpcroute.clone())?;
-                if r.index == -1 {
-                    continue;
-                }
-                self.add_one_route(&r)?;
-            }
-        }
-
-        for grpcroute in rt {
-            if grpcroute.gateway.as_str() != "" {
-                let r = RtRoute::try_from(grpcroute.clone())?;
-                if r.index == -1 {
-                    continue;
-                }
-                self.add_one_route(&r)?;
-            }
-        }
-
-        Ok(rt.to_owned())
-    }
-
-    pub fn list_routes(&mut self) -> Result<Vec<Route>> {
-        // currently, only dump routes from main table for ipv4
-        // ie, rtmsg.rtmsg_family = AF_INET, set RT_TABLE_MAIN
-        // attribute in dump request
-        // Fix Me: think about othe tables, ipv6..
-        let mut rs: Vec<Route> = Vec::new();
-        let (_srv, rv) = self.dump_all_routes()?;
-
-        // parse out routes and store in rs
-        for r in &rv {
-            // Safe because dump_all_routes() returns valid pointers.
-            let nlh = unsafe { &**r };
-            if nlh.nlmsg_type != RTM_NEWROUTE && nlh.nlmsg_type != RTM_DELROUTE {
-                info!(sl!(), "not route message!");
-                continue;
-            }
-            let tlen = NLMSG_SPACE!(mem::size_of::<rtmsg>());
-            if nlh.nlmsg_len < tlen {
-                info!(
-                    sl!(),
-                    "invalid nlmsg! nlmsg_len: {}, nlmsg_spae: {}", nlh.nlmsg_len, tlen
-                );
-                break;
-            }
-
-            // Safe because we have just validated available buffer space above.
-            let rtm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut rtmsg) };
-            if rtm.rtm_table != RT_TABLE_MAIN as u8 {
-                continue;
-            }
-            let rta: *mut rtattr = RTM_RTA!(rtm) as *mut rtattr;
-            let rtalen = RTM_PAYLOAD!(nlh) as u32;
-            let attrs = unsafe { parse_attrs(rta, rtalen, (RTA_MAX + 1) as usize)? };
-
-            let t = attrs[RTA_TABLE as usize];
-            if !t.is_null() {
-                // Safe because parse_attrs() returns valid pointers
-                let table = unsafe { getattr32(t) };
-                if table != RT_TABLE_MAIN {
-                    continue;
-                }
-            }
-
-            // find source, destination, gateway, scope, and and device name
-            let mut t = attrs[RTA_DST as usize];
-            let mut rte: Route = Route::default();
-
-            // Safe because parse_attrs() returns valid pointers
-            unsafe {
-                // destination
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.dest =
-                        format!("{}/{}", parser::format_address(data, len)?, rtm.rtm_dst_len);
-                }
-
-                // gateway
-                t = attrs[RTA_GATEWAY as usize];
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.gateway = parser::format_address(data, len)?;
-
-                    // for gateway, destination is 0.0.0.0
-                    rte.dest = "0.0.0.0".to_string();
-                }
-
-                // source
-                t = attrs[RTA_SRC as usize];
-                if t.is_null() {
-                    t = attrs[RTA_PREFSRC as usize];
-                }
-                if !t.is_null() {
-                    let data: *const u8 = RTA_DATA!(t) as *const u8;
-                    let len = RTA_PAYLOAD!(t) as u32;
-                    rte.source = parser::format_address(data, len)?;
-
-                    if rtm.rtm_src_len != 0 {
-                        rte.source = format!("{}/{}", rte.source.as_str(), rtm.rtm_src_len);
-                    }
-                }
-
-                // scope
-                rte.scope = rtm.rtm_scope as u32;
-
-                // oif
-                t = attrs[RTA_OIF as usize];
-                if !t.is_null() {
-                    let data = &*(RTA_DATA!(t) as *const i32);
-                    assert_eq!(RTA_PAYLOAD!(t), 4);
-
-                    rte.device = self
-                        .get_name_by_index(*data)
-                        .unwrap_or_else(|_| "unknown".to_string());
-                }
-            }
-
-            rs.push(rte);
-        }
-
-        Ok(rs)
-    }
-
-    pub fn add_arp_neighbors(&mut self, neighs: &[ARPNeighbor]) -> Result<()> {
-        for neigh in neighs {
-            self.add_one_arp_neighbor(&neigh)?;
-        }
-
-        Ok(())
-    }
-
-    pub fn add_one_arp_neighbor(&mut self, neigh: &ARPNeighbor) -> Result<()> {
-        let to_ip = match neigh.toIPAddress.as_ref() {
-            None => return nix_errno(Errno::EINVAL),
-            Some(v) => {
-                if v.address.is_empty() {
-                    return nix_errno(Errno::EINVAL);
-                }
-                v.address.as_ref()
-            }
-        };
-
-        let dev = self.find_link_by_name(&neigh.device)?;
-
-        let mut v: Vec<u8> = vec![0; DEFAULT_NETLINK_BUF_SIZE];
-        // Safe because we have allocated enough buffer space.
-        let nlh = unsafe { &mut *(v.as_mut_ptr() as *mut nlmsghdr) };
-        let ndm = unsafe { &mut *(NLMSG_DATA!(nlh) as *mut ndmsg) };
-
-        nlh.nlmsg_len = NLMSG_LENGTH!(std::mem::size_of::<ndmsg>()) as u32;
-        nlh.nlmsg_type = RTM_NEWNEIGH;
-        nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-        self.assign_seqnum(nlh);
-
-        ndm.ndm_family = libc::AF_UNSPEC as __u8;
-        ndm.ndm_state = IFA_F_PERMANENT as __u16;
-        // process lladdr
-        if neigh.lladdr != "" {
-            let llabuf = parser::parse_mac_addr(&neigh.lladdr)?;
-
-            // Safe because we have allocated enough buffer space.
-            unsafe { nlh.addattr_var(NDA_LLADDR, llabuf.as_ref()) };
-        }
-
-        let (family, ip_data) = parser::parse_ip_addr_with_family(&to_ip)?;
-        ndm.ndm_family = family;
-        // Safe because we have allocated enough buffer space.
-        unsafe { nlh.addattr_var(NDA_DST, ip_data.as_ref()) };
-
-        // process state
-        if neigh.state != 0 {
-            ndm.ndm_state = neigh.state as __u16;
-        }
-
-        // process flags
-        ndm.ndm_flags = (*ndm).ndm_flags | neigh.flags as __u8;
-
-        // process dev
-        ndm.ndm_ifindex = dev.ifi_index;
-
-        // send
-        self.rtnl_talk(v.as_mut_slice(), false)?;
-
-        Ok(())
-    }
-}
-
-impl TryFrom<IPAddress> for RtIPAddr {
-    type Error = nix::Error;
-
-    fn try_from(ipi: IPAddress) -> std::result::Result<Self, Self::Error> {
-        let ip_family = if ipi.family == IPFamily::v4 {
-            libc::AF_INET
-        } else {
-            libc::AF_INET6
-        } as __u8;
-
-        let ip_mask = parser::parse_u8(ipi.mask.as_str(), 10)?;
-        let addr = parser::parse_ip_addr(ipi.address.as_ref())?;
-
-        Ok(Self {
-            ip_family,
-            ip_mask,
-            addr,
-        })
-    }
-}
-
-impl TryFrom<Route> for RtRoute {
-    type Error = nix::Error;
-
-    fn try_from(r: Route) -> std::result::Result<Self, Self::Error> {
-        // only handle ipv4
-
-        let index = {
-            let mut rh = RtnlHandle::new(NETLINK_ROUTE, 0)?;
-            match rh.find_link_by_name(r.device.as_str()) {
-                Ok(ifi) => ifi.ifi_index,
-                Err(_) => -1,
-            }
-        };
-
-        let (dest, dst_len) = if r.dest.is_empty() {
-            (Some(vec![0 as u8; 4]), 0)
-        } else {
-            let (dst, mask) = parser::parse_cidr(r.dest.as_str())?;
-            (Some(dst), mask)
-        };
-
-        let (source, src_len) = if r.source.is_empty() {
-            (None, 0)
-        } else {
-            let (src, mask) = parser::parse_cidr(r.source.as_str())?;
-            (Some(src), mask)
-        };
-
-        let gateway = if r.gateway.is_empty() {
-            None
-        } else {
-            Some(parser::parse_ip_addr(r.gateway.as_str())?)
-        };
-
-        Ok(Self {
-            dest,
-            source,
-            src_len,
-            dst_len,
-            index,
-            gateway,
-            scope: r.scope as u8,
-            protocol: RTPROTO_UNSPEC,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{RtnlHandle, NETLINK_ROUTE};
-    use protocols::types::IPAddress;
-    use std::process::Command;
-
-    fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
-        // ip link delete dummy
-        Command::new("ip")
-            .args(&["link", "delete", dummy_name])
-            .output()
-            .expect("prepare: failed to delete dummy");
-
-        // ip neigh del dev dummy ip
-        Command::new("ip")
-            .args(&["neigh", "del", dummy_name, ip])
-            .output()
-            .expect("prepare: failed to delete neigh");
-    }
-
-    fn prepare_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
-        clean_env_for_test_add_one_arp_neighbor(dummy_name, ip);
-        // modprobe dummy
-        Command::new("modprobe")
-            .arg("dummy")
-            .output()
-            .expect("failed to run modprobe dummy");
-
-        // ip link add dummy type dummy
-        Command::new("ip")
-            .args(&["link", "add", dummy_name, "type", "dummy"])
-            .output()
-            .expect("failed to add dummy interface");
-
-        // ip addr add 192.168.0.2/16 dev dummy
-        Command::new("ip")
-            .args(&["addr", "add", "192.168.0.2/16", "dev", dummy_name])
-            .output()
-            .expect("failed to add ip for dummy");
-
-        // ip link set dummy up;
-        Command::new("ip")
-            .args(&["link", "set", dummy_name, "up"])
-            .output()
-            .expect("failed to up dummy");
-    }
-
-    #[test]
-    fn test_add_one_arp_neighbor() {
-        // skip_if_not_root
-        if !nix::unistd::Uid::effective().is_root() {
-            println!("INFO: skipping {} which needs root", module_path!());
-            return;
-        }
-
-        let mac = "6a:92:3a:59:70:aa";
-        let to_ip = "169.254.1.1";
-        let dummy_name = "dummy_for_arp";
-
-        prepare_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
-
-        let mut ip_address = IPAddress::new();
-        ip_address.set_address(to_ip.to_string());
-
-        let mut neigh = ARPNeighbor::new();
-        neigh.set_toIPAddress(ip_address);
-        neigh.set_device(dummy_name.to_string());
-        neigh.set_lladdr(mac.to_string());
-        neigh.set_state(0x80);
-
-        let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
-
-        rtnl.add_one_arp_neighbor(&neigh).unwrap();
-
-        // ip neigh show dev dummy ip
-        let stdout = Command::new("ip")
-            .args(&["neigh", "show", "dev", dummy_name, to_ip])
-            .output()
-            .expect("failed to show neigh")
-            .stdout;
-
-        let stdout = std::str::from_utf8(&stdout).expect("failed to conveert stdout");
-
-        assert_eq!(stdout, format!("{} lladdr {} PERMANENT\n", to_ip, mac));
-
-        clean_env_for_test_add_one_arp_neighbor(dummy_name, to_ip);
-    }
-}
--- a/src/agent/netlink/src/lib.rs
+++ b/src/agent/netlink/src/lib.rs
--- a/src/agent/netlink/src/parser.rs
+++ b/src/agent/netlink/src/parser.rs
@@ -1,201 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-
-//! Parser for IPv4/IPv6/MAC addresses.
-
-use std::net::{Ipv4Addr, Ipv6Addr};
-use std::str::FromStr;
-
-use super::{Errno, Result, __u8, nix_errno};
-
-#[inline]
-pub(crate) fn parse_u8(s: &str, radix: u32) -> Result<u8> {
-    if radix >= 2 && radix <= 36 {
-        u8::from_str_radix(s, radix).map_err(|_| nix::Error::Sys(Errno::EINVAL))
-    } else {
-        u8::from_str(s).map_err(|_| nix::Error::Sys(Errno::EINVAL))
-    }
-}
-
-pub fn parse_ipv4_addr(s: &str) -> Result<Vec<u8>> {
-    match Ipv4Addr::from_str(s) {
-        Ok(v) => Ok(Vec::from(v.octets().as_ref())),
-        Err(_e) => nix_errno(Errno::EINVAL),
-    }
-}
-
-pub fn parse_ip_addr(s: &str) -> Result<Vec<u8>> {
-    if let Ok(v6) = Ipv6Addr::from_str(s) {
-        Ok(Vec::from(v6.octets().as_ref()))
-    } else {
-        parse_ipv4_addr(s)
-    }
-}
-
-pub fn parse_ip_addr_with_family(ip_address: &str) -> Result<(__u8, Vec<u8>)> {
-    if let Ok(v6) = Ipv6Addr::from_str(ip_address) {
-        Ok((libc::AF_INET6 as __u8, Vec::from(v6.octets().as_ref())))
-    } else {
-        parse_ipv4_addr(ip_address).map(|v| (libc::AF_INET as __u8, v))
-    }
-}
-
-pub fn parse_ipv4_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
-    let fields: Vec<&str> = s.split('/').collect();
-
-    if fields.len() != 2 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok((parse_ipv4_addr(fields[0])?, parse_u8(fields[1], 10)?))
-    }
-}
-
-pub fn parse_cidr(s: &str) -> Result<(Vec<u8>, u8)> {
-    let fields: Vec<&str> = s.split('/').collect();
-
-    if fields.len() != 2 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok((parse_ip_addr(fields[0])?, parse_u8(fields[1], 10)?))
-    }
-}
-
-pub fn parse_mac_addr(hwaddr: &str) -> Result<Vec<u8>> {
-    let fields: Vec<&str> = hwaddr.split(':').collect();
-
-    if fields.len() != 6 {
-        nix_errno(Errno::EINVAL)
-    } else {
-        Ok(vec![
-            parse_u8(fields[0], 16)?,
-            parse_u8(fields[1], 16)?,
-            parse_u8(fields[2], 16)?,
-            parse_u8(fields[3], 16)?,
-            parse_u8(fields[4], 16)?,
-            parse_u8(fields[5], 16)?,
-        ])
-    }
-}
-
-/// Format an IPv4/IPv6/MAC address.
-///
-/// # Safety
-/// Caller needs to ensure that addr and len are valid.
-pub unsafe fn format_address(addr: *const u8, len: u32) -> Result<String> {
-    let mut a: String;
-    if len == 4 {
-        // ipv4
-        let mut i = 1;
-        let mut p = addr as i64;
-
-        a = format!("{}", *(p as *const u8));
-        while i < len {
-            p += 1;
-            i += 1;
-            a.push_str(format!(".{}", *(p as *const u8)).as_str());
-        }
-
-        return Ok(a);
-    }
-
-    if len == 6 {
-        // hwaddr
-        let mut i = 1;
-        let mut p = addr as i64;
-
-        a = format!("{:0>2X}", *(p as *const u8));
-        while i < len {
-            p += 1;
-            i += 1;
-            a.push_str(format!(":{:0>2X}", *(p as *const u8)).as_str());
-        }
-
-        return Ok(a);
-    }
-
-    if len == 16 {
-        // ipv6
-        let p = addr as *const u8 as *const libc::c_void;
-        let mut ar: [u8; 16] = [0; 16];
-        let mut v: Vec<u8> = vec![0; 16];
-        let dp: *mut libc::c_void = v.as_mut_ptr() as *mut libc::c_void;
-        libc::memcpy(dp, p, 16);
-
-        ar.copy_from_slice(v.as_slice());
-
-        return Ok(Ipv6Addr::from(ar).to_string());
-    }
-
-    nix_errno(Errno::EINVAL)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use libc;
-
-    #[test]
-    fn test_ip_addr() {
-        let ip = parse_ipv4_addr("1.2.3.4").unwrap();
-        assert_eq!(ip, vec![0x1u8, 0x2u8, 0x3u8, 0x4u8]);
-        parse_ipv4_addr("1.2.3.4.5").unwrap_err();
-        parse_ipv4_addr("1.2.3-4").unwrap_err();
-        parse_ipv4_addr("1.2.3.a").unwrap_err();
-        parse_ipv4_addr("1.2.3.x").unwrap_err();
-        parse_ipv4_addr("-1.2.3.4").unwrap_err();
-        parse_ipv4_addr("+1.2.3.4").unwrap_err();
-
-        let (family, _) = parse_ip_addr_with_family("192.168.1.1").unwrap();
-        assert_eq!(family, libc::AF_INET as __u8);
-
-        let (family, ip) =
-            parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap();
-        assert_eq!(family, libc::AF_INET6 as __u8);
-        assert_eq!(ip.len(), 16);
-        parse_ip_addr_with_family("2001:0db8:85a3:0000:0000:8a2e:0370:73345").unwrap_err();
-
-        let ip = parse_ip_addr("::1").unwrap();
-        assert_eq!(ip[0], 0x0);
-        assert_eq!(ip[15], 0x1);
-    }
-
-    #[test]
-    fn test_parse_cidr() {
-        let (_, mask) = parse_ipv4_cidr("1.2.3.4/31").unwrap();
-        assert_eq!(mask, 31);
-
-        parse_ipv4_cidr("1.2.3/4/31").unwrap_err();
-        parse_ipv4_cidr("1.2.3.4/f").unwrap_err();
-        parse_ipv4_cidr("1.2.3/8").unwrap_err();
-        parse_ipv4_cidr("1.2.3.4.8").unwrap_err();
-
-        let (ip, mask) = parse_cidr("2001:db8:a::123/64").unwrap();
-        assert_eq!(mask, 64);
-        assert_eq!(ip[0], 0x20);
-        assert_eq!(ip[15], 0x23);
-    }
-
-    #[test]
-    fn test_parse_mac_addr() {
-        let mac = parse_mac_addr("FF:FF:FF:FF:FF:FE").unwrap();
-        assert_eq!(mac.len(), 6);
-        assert_eq!(mac[0], 0xff);
-        assert_eq!(mac[5], 0xfe);
-
-        parse_mac_addr("FF:FF:FF:FF:FF:FE:A0").unwrap_err();
-        parse_mac_addr("FF:FF:FF:FF:FF:FX").unwrap_err();
-        parse_mac_addr("FF:FF:FF:FF:FF").unwrap_err();
-    }
-
-    #[test]
-    fn test_format_address() {
-        let buf = [1u8, 2u8, 3u8, 4u8];
-        let addr = unsafe { format_address(&buf as *const u8, 4).unwrap() };
-        assert_eq!(addr, "1.2.3.4");
-
-        let buf = [1u8, 2u8, 3u8, 4u8, 5u8, 6u8];
-        let addr = unsafe { format_address(&buf as *const u8, 6).unwrap() };
-        assert_eq!(addr, "01:02:03:04:05:06");
-    }
-}
--- a/src/agent/oci/src/lib.rs
+++ b/src/agent/oci/src/lib.rs
@@ -784,7 +784,17 @@ pub struct LinuxIntelRdt {
    pub l3_cache_schema: String,
 }

-#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)]
+#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ContainerState {
+    CREATING,
+    CREATED,
+    RUNNING,
+    STOPPED,
+    PAUSED,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct State {
    #[serde(
        default,
@@ -794,8 +804,7 @@ pub struct State {
    pub version: String,
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub id: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    pub status: String,
+    pub status: ContainerState,
    #[serde(default)]
    pub pid: i32,
    #[serde(default, skip_serializing_if = "String::is_empty")]
@@ -806,6 +815,8 @@ pub struct State {

 #[cfg(test)]
 mod tests {
+    use super::*;
+
    #[test]
    fn test_deserialize_state() {
        let data = r#"{
@@ -818,10 +829,10 @@ mod tests {
                "myKey": "myValue"
            }
        }"#;
-        let expected = crate::State {
+        let expected = State {
            version: "0.2.0".to_string(),
            id: "oci-container1".to_string(),
-            status: "running".to_string(),
+            status: ContainerState::RUNNING,
            pid: 4422,
            bundle: "/containers/redis".to_string(),
            annotations: [("myKey".to_string(), "myValue".to_string())]
--- a/src/agent/protocols/Cargo.toml
+++ b/src/agent/protocols/Cargo.toml
@@ -5,9 +5,9 @@ authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
 edition = "2018"

 [dependencies]
-ttrpc = "0.3.0"
+ttrpc = { version = "0.5.0", features = ["async"] }
+async-trait = "0.1.42"
 protobuf = "=2.14.0"
-futures = "0.1.27"

 [build-dependencies]
-ttrpc-codegen = "0.1.2"
+ttrpc-codegen = "0.2.0"
--- a/src/agent/protocols/build.rs
+++ b/src/agent/protocols/build.rs
@@ -3,8 +3,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use std::fs::File;
-use std::io::{Read, Write};
+use std::fs;
+use ttrpc_codegen::{Codegen, Customize};

 fn main() {
    let protos = vec![
@@ -15,16 +15,15 @@ fn main() {
        "protos/oci.proto",
    ];

-    // Tell Cargo that if the .proto files changed, to rerun this build script.
-    protos
-        .iter()
-        .for_each(|p| println!("cargo:rerun-if-changed={}", &p));
-
-    ttrpc_codegen::Codegen::new()
+    Codegen::new()
        .out_dir("src")
        .inputs(&protos)
        .include("protos")
        .rust_protobuf()
+        .customize(Customize {
+            async_server: true,
+            ..Default::default()
+        })
        .run()
        .expect("Gen codes failed.");

@@ -40,16 +39,6 @@ fn main() {
 }

 fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> {
-    let mut src = File::open(file_name)?;
-    let mut contents = String::new();
-    src.read_to_string(&mut contents).unwrap();
-    drop(src);
-
-    let new_contents = contents.replace(from, to);
-
-    let mut dst = File::create(&file_name)?;
-    dst.write_all(new_contents.as_bytes())?;
-
-    Ok(())
+    let new_contents = fs::read_to_string(file_name)?.replace(from, to);
+    fs::write(&file_name, new_contents.as_bytes())
 }
-
--- a/src/agent/protocols/protos/types.proto
+++ b/src/agent/protocols/protos/types.proto
@@ -29,10 +29,8 @@ message Interface {
 	uint64 mtu = 4;
 	string hwAddr = 5;

-	// pciAddr is the PCI address in the format  "bridgeAddr/deviceAddr".
-	// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
-	// while deviceAddr is the address at which the network device is attached on the bridge.
-	string pciAddr = 6;
+	// PCI path for the device (see the pci::Path (Rust) or types.PciPath (Go) type for format details)
+	string pciPath = 6;

 	// Type defines the type of interface described by this structure.
 	// The expected values are the one that are defined by the netlink
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -10,23 +10,27 @@ serde_json = "1.0.39"
 serde_derive = "1.0.91"
 oci = { path = "../oci" }
 protocols = { path ="../protocols" }
-caps = "0.3.0"
+caps = "0.5.0"
 nix = "0.17.0"
 scopeguard = "1.0.0"
 prctl = "1.0.0"
 lazy_static = "1.3.0"
 libc = "0.2.58"
-protobuf = "2.8.1"
+protobuf = "=2.14.0"
 slog = "2.5.2"
 slog-scope = "4.1.2"
 scan_fmt = "0.2"
 regex = "1.1"
 path-absolutize = "1.2.0"
-dirs = "3.0.1"
 anyhow = "1.0.32"
-cgroups = { package = "cgroups-rs", version = "0.2.0" }
+cgroups = { package = "cgroups-rs", version = "0.2.1" }
 tempfile = "3.1.0"
-epoll = "4.3.1"
+rlimit = "0.5.3"
+
+tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
+futures = "0.3"
+async-trait = "0.1.31"
+inotify = "0.9.2"

 [dev-dependencies]
 serial_test = "0.5.0"
--- a/src/agent/rustjail/src/capabilities.rs
+++ b/src/agent/rustjail/src/capabilities.rs
@@ -9,97 +9,44 @@
 use crate::log_child;
 use crate::sync::write_count;
 use anyhow::{anyhow, Result};
-use caps::{self, CapSet, Capability, CapsHashSet};
+use caps::{self, runtime, CapSet, Capability, CapsHashSet};
 use oci::LinuxCapabilities;
-use std::collections::HashMap;
 use std::os::unix::io::RawFd;
-
-lazy_static! {
-    pub static ref CAPSMAP: HashMap<String, Capability> = {
-        let mut m = HashMap::new();
-        m.insert("CAP_CHOWN".to_string(), Capability::CAP_CHOWN);
-        m.insert("CAP_DAC_OVERRIDE".to_string(), Capability::CAP_DAC_OVERRIDE);
-        m.insert(
-            "CAP_DAC_READ_SEARCH".to_string(),
-            Capability::CAP_DAC_READ_SEARCH,
-        );
-        m.insert("CAP_FOWNER".to_string(), Capability::CAP_FOWNER);
-        m.insert("CAP_FSETID".to_string(), Capability::CAP_FSETID);
-        m.insert("CAP_KILL".to_string(), Capability::CAP_KILL);
-        m.insert("CAP_SETGID".to_string(), Capability::CAP_SETGID);
-        m.insert("CAP_SETUID".to_string(), Capability::CAP_SETUID);
-        m.insert("CAP_SETPCAP".to_string(), Capability::CAP_SETPCAP);
-        m.insert(
-            "CAP_LINUX_IMMUTABLE".to_string(),
-            Capability::CAP_LINUX_IMMUTABLE,
-        );
-        m.insert(
-            "CAP_NET_BIND_SERVICE".to_string(),
-            Capability::CAP_NET_BIND_SERVICE,
-        );
-        m.insert(
-            "CAP_NET_BROADCAST".to_string(),
-            Capability::CAP_NET_BROADCAST,
-        );
-        m.insert("CAP_NET_ADMIN".to_string(), Capability::CAP_NET_ADMIN);
-        m.insert("CAP_NET_RAW".to_string(), Capability::CAP_NET_RAW);
-        m.insert("CAP_IPC_LOCK".to_string(), Capability::CAP_IPC_LOCK);
-        m.insert("CAP_IPC_OWNER".to_string(), Capability::CAP_IPC_OWNER);
-        m.insert("CAP_SYS_MODULE".to_string(), Capability::CAP_SYS_MODULE);
-        m.insert("CAP_SYS_RAWIO".to_string(), Capability::CAP_SYS_RAWIO);
-        m.insert("CAP_SYS_CHROOT".to_string(), Capability::CAP_SYS_CHROOT);
-        m.insert("CAP_SYS_PTRACE".to_string(), Capability::CAP_SYS_PTRACE);
-        m.insert("CAP_SYS_PACCT".to_string(), Capability::CAP_SYS_PACCT);
-        m.insert("CAP_SYS_ADMIN".to_string(), Capability::CAP_SYS_ADMIN);
-        m.insert("CAP_SYS_BOOT".to_string(), Capability::CAP_SYS_BOOT);
-        m.insert("CAP_SYS_NICE".to_string(), Capability::CAP_SYS_NICE);
-        m.insert("CAP_SYS_RESOURCE".to_string(), Capability::CAP_SYS_RESOURCE);
-        m.insert("CAP_SYS_TIME".to_string(), Capability::CAP_SYS_TIME);
-        m.insert(
-            "CAP_SYS_TTY_CONFIG".to_string(),
-            Capability::CAP_SYS_TTY_CONFIG,
-        );
-        m.insert("CAP_MKNOD".to_string(), Capability::CAP_MKNOD);
-        m.insert("CAP_LEASE".to_string(), Capability::CAP_LEASE);
-        m.insert("CAP_AUDIT_WRITE".to_string(), Capability::CAP_AUDIT_WRITE);
-        m.insert("CAP_AUDIT_CONTROL".to_string(), Capability::CAP_AUDIT_WRITE);
-        m.insert("CAP_SETFCAP".to_string(), Capability::CAP_SETFCAP);
-        m.insert("CAP_MAC_OVERRIDE".to_string(), Capability::CAP_MAC_OVERRIDE);
-        m.insert("CAP_SYSLOG".to_string(), Capability::CAP_SYSLOG);
-        m.insert("CAP_WAKE_ALARM".to_string(), Capability::CAP_WAKE_ALARM);
-        m.insert(
-            "CAP_BLOCK_SUSPEND".to_string(),
-            Capability::CAP_BLOCK_SUSPEND,
-        );
-        m.insert("CAP_AUDIT_READ".to_string(), Capability::CAP_AUDIT_READ);
-        m
-    };
-}
+use std::str::FromStr;

 fn to_capshashset(cfd_log: RawFd, caps: &[String]) -> CapsHashSet {
    let mut r = CapsHashSet::new();

    for cap in caps.iter() {
-        let c = CAPSMAP.get(cap);
-
-        if c.is_none() {
-            log_child!(cfd_log, "{} is not a cap", cap);
-            continue;
-        }
-
-        r.insert(*c.unwrap());
+        match Capability::from_str(cap) {
+            Err(_) => {
+                log_child!(cfd_log, "{} is not a cap", cap);
+                continue;
+            }
+            Ok(c) => r.insert(c),
+        };
    }

    r
 }

+pub fn get_all_caps() -> CapsHashSet {
+    let mut caps_set =
+        runtime::procfs_all_supported(None).unwrap_or_else(|_| runtime::thread_all_supported());
+    if caps_set.is_empty() {
+        caps_set = caps::all();
+    }
+    caps_set
+}
+
 pub fn reset_effective() -> Result<()> {
-    caps::set(None, CapSet::Effective, caps::all()).map_err(|e| anyhow!(e.to_string()))?;
+    let all = get_all_caps();
+    caps::set(None, CapSet::Effective, &all).map_err(|e| anyhow!(e.to_string()))?;
    Ok(())
 }

 pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
-    let all = caps::all();
+    let all = get_all_caps();

    for c in all.difference(&to_capshashset(cfd_log, caps.bounding.as_ref())) {
        caps::drop(None, CapSet::Bounding, *c).map_err(|e| anyhow!(e.to_string()))?;
@@ -108,26 +55,26 @@ pub fn drop_privileges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> {
    caps::set(
        None,
        CapSet::Effective,
-        to_capshashset(cfd_log, caps.effective.as_ref()),
+        &to_capshashset(cfd_log, caps.effective.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;
    caps::set(
        None,
        CapSet::Permitted,
-        to_capshashset(cfd_log, caps.permitted.as_ref()),
+        &to_capshashset(cfd_log, caps.permitted.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;
    caps::set(
        None,
        CapSet::Inheritable,
-        to_capshashset(cfd_log, caps.inheritable.as_ref()),
+        &to_capshashset(cfd_log, caps.inheritable.as_ref()),
    )
    .map_err(|e| anyhow!(e.to_string()))?;

    let _ = caps::set(
        None,
        CapSet::Ambient,
-        to_capshashset(cfd_log, caps.ambient.as_ref()),
+        &to_capshashset(cfd_log, caps.ambient.as_ref()),
    )
    .map_err(|_| log_child!(cfd_log, "failed to set ambient capability"));

--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -37,6 +37,8 @@ use std::collections::HashMap;
 use std::fs;
 use std::path::Path;

+const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online";
+
 // Convenience macro to obtain the scope logger
 macro_rules! sl {
    () => {
@@ -60,7 +62,6 @@ pub struct Manager {
    pub cpath: String,
    #[serde(skip)]
    cgroup: cgroups::Cgroup,
-    relative_paths: HashMap<String, String>,
 }

 // set_resource is used to set reources by cgroup controller.
@@ -104,21 +105,21 @@ impl CgroupManager for Manager {

        // set block_io resources
        if let Some(blkio) = &r.block_io {
-            set_block_io_resources(&self.cgroup, blkio, res)?;
+            set_block_io_resources(&self.cgroup, blkio, res);
        }

        // set hugepages resources
        if !r.hugepage_limits.is_empty() {
-            set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res)?;
+            set_hugepages_resources(&self.cgroup, &r.hugepage_limits, res);
        }

        // set network resources
        if let Some(network) = &r.network {
-            set_network_resources(&self.cgroup, network, res)?;
+            set_network_resources(&self.cgroup, network, res);
        }

        // set devices resources
-        set_devices_resources(&self.cgroup, &r.devices, res)?;
+        set_devices_resources(&self.cgroup, &r.devices, res);
        info!(sl!(), "resources after processed {:?}", res);

        // apply resources
@@ -199,7 +200,7 @@ fn set_network_resources(
    _cg: &cgroups::Cgroup,
    network: &LinuxNetwork,
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set network");

    // set classid
@@ -220,14 +221,13 @@ fn set_network_resources(
    }

    res.network.priorities = priorities;
-    Ok(())
 }

 fn set_devices_resources(
    _cg: &cgroups::Cgroup,
    device_resources: &[LinuxDeviceCgroup],
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set devices");
    let mut devices = vec![];

@@ -250,15 +250,13 @@ fn set_devices_resources(
    }

    res.devices.devices = devices;
-
-    Ok(())
 }

 fn set_hugepages_resources(
    _cg: &cgroups::Cgroup,
    hugepage_limits: &[LinuxHugepageLimit],
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set hugepage");
    let mut limits = vec![];

@@ -270,41 +268,25 @@ fn set_hugepages_resources(
        limits.push(hr);
    }
    res.hugepages.limits = limits;
-
-    Ok(())
 }

 fn set_block_io_resources(
-    cg: &cgroups::Cgroup,
+    _cg: &cgroups::Cgroup,
    blkio: &LinuxBlockIO,
    res: &mut cgroups::Resources,
-) -> Result<()> {
+) {
    info!(sl!(), "cgroup manager set block io");

-    if cg.v2() {
-        res.blkio.weight = convert_blk_io_to_v2_value(blkio.weight);
-        res.blkio.leaf_weight = convert_blk_io_to_v2_value(blkio.leaf_weight);
-    } else {
-        res.blkio.weight = blkio.weight;
-        res.blkio.leaf_weight = blkio.leaf_weight;
-    }
+    res.blkio.weight = blkio.weight;
+    res.blkio.leaf_weight = blkio.leaf_weight;

    let mut blk_device_resources = vec![];
    for d in blkio.weight_device.iter() {
-        let (w, lw) = if cg.v2() {
-            (
-                convert_blk_io_to_v2_value(blkio.weight),
-                convert_blk_io_to_v2_value(blkio.leaf_weight),
-            )
-        } else {
-            (blkio.weight, blkio.leaf_weight)
-        };
-
        let dr = BlkIoDeviceResource {
            major: d.blk.major as u64,
            minor: d.blk.minor as u64,
-            weight: w,
-            leaf_weight: lw,
+            weight: blkio.weight,
+            leaf_weight: blkio.leaf_weight,
        };
        blk_device_resources.push(dr);
    }
@@ -318,8 +300,6 @@ fn set_block_io_resources(
        build_blk_io_device_throttle_resource(&blkio.throttle_read_iops_device);
    res.blkio.throttle_write_iops_device =
        build_blk_io_device_throttle_resource(&blkio.throttle_write_iops_device);
-
-    Ok(())
 }

 fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCPU) -> Result<()> {
@@ -387,7 +367,7 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
    }

    if let Some(swappiness) = memory.swappiness {
-        if swappiness >= 0 && swappiness <= 100 {
+        if (0..=100).contains(&swappiness) {
            mem_controller.set_swappiness(swappiness as u64)?;
        } else {
            return Err(anyhow!(
@@ -946,38 +926,28 @@ pub fn get_mounts() -> Result<HashMap<String, String>> {
    Ok(m)
 }

-fn new_cgroup(
-    h: Box<dyn cgroups::Hierarchy>,
-    path: &str,
-    relative_paths: HashMap<String, String>,
-) -> Cgroup {
+fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Cgroup {
    let valid_path = path.trim_start_matches('/').to_string();
-    cgroups::Cgroup::new_with_relative_paths(h, valid_path.as_str(), relative_paths)
+    cgroups::Cgroup::new(h, valid_path.as_str())
 }

 impl Manager {
    pub fn new(cpath: &str) -> Result<Self> {
        let mut m = HashMap::new();
-        let mut relative_paths = HashMap::new();

        let paths = get_paths()?;
        let mounts = get_mounts()?;

-        for (key, value) in &paths {
+        for key in paths.keys() {
            let mnt = mounts.get(key);

            if mnt.is_none() {
                continue;
            }

-            let p = if value == "/" {
-                format!("{}/{}", mnt.unwrap(), cpath)
-            } else {
-                format!("{}{}/{}", mnt.unwrap(), value, cpath)
-            };
+            let p = format!("{}/{}", mnt.unwrap(), cpath);

            m.insert(key.to_string(), p);
-            relative_paths.insert(key.to_string(), value.to_string());
        }

        Ok(Self {
@@ -985,13 +955,12 @@ impl Manager {
            mounts,
            // rels: paths,
            cpath: cpath.to_string(),
-            cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath, relative_paths.clone()),
-            relative_paths,
+            cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath),
        })
    }

    pub fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> {
-        if guest_cpuset == "" {
+        if guest_cpuset.is_empty() {
            return Ok(());
        }
        info!(sl!(), "update_cpuset_path to: {}", guest_cpuset);
@@ -1031,11 +1000,7 @@ impl Manager {
                .unwrap()
                .trim_start_matches(root_path.to_str().unwrap());
            info!(sl!(), "updating cpuset for parent path {:?}", &r_path);
-            let cg = new_cgroup(
-                cgroups::hierarchies::auto(),
-                &r_path,
-                self.relative_paths.clone(),
-            );
+            let cg = new_cgroup(cgroups::hierarchies::auto(), &r_path);
            let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
            cpuset_controller.set_cpus(guest_cpuset)?;
        }
@@ -1064,23 +1029,10 @@ impl Manager {
    }
 }

+// get the guest's online cpus.
 pub fn get_guest_cpuset() -> Result<String> {
-    // for cgroup v2
-    if cgroups::hierarchies::is_cgroup2_unified_mode() {
-        let c = fs::read_to_string("/sys/fs/cgroup/cpuset.cpus.effective")?;
-        return Ok(c);
-    }
-
-    // for cgroup v1
-    let m = get_mounts()?;
-    if m.get("cpuset").is_none() {
-        warn!(sl!(), "no cpuset cgroup!");
-        return Err(nix::Error::Sys(Errno::ENOENT).into());
-    }
-
-    let p = format!("{}/cpuset.cpus", m.get("cpuset").unwrap());
-    let c = fs::read_to_string(p.as_str())?;
-    Ok(c)
+    let c = fs::read_to_string(GUEST_CPUS_PATH)?;
+    Ok(c.trim().to_string())
 }

 // Since the OCI spec is designed for cgroup v1, in some cases
@@ -1123,20 +1075,6 @@ fn convert_memory_swap_to_v2_value(memory_swap: i64, memory: i64) -> Result<i64>
    Ok(memory_swap - memory)
 }

-// Since the OCI spec is designed for cgroup v1, in some cases
-// there is need to convert from the cgroup v1 configuration to cgroup v2
-// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
-// convert linearly from [10-1000] to [1-10000]
-// https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c86414/libcontainer/cgroups/utils.go#L382
-fn convert_blk_io_to_v2_value(blk_io_weight: Option<u16>) -> Option<u16> {
-    let v = blk_io_weight.unwrap_or(0);
-    if v != 0 {
-        return None;
-    }
-
-    Some(1 + (v - 10) * 9999 / 990 as u16)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/agent/rustjail/src/cgroups/notifier.rs
+++ b/src/agent/rustjail/src/cgroups/notifier.rs
@@ -3,16 +3,18 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, Context, Result};
 use eventfd::{eventfd, EfdFlags};
 use nix::sys::eventfd;
-use nix::sys::inotify::{AddWatchFlags, InitFlags, Inotify};
 use std::fs::{self, File};
-use std::io::Read;
 use std::os::unix::io::{AsRawFd, FromRawFd};
 use std::path::{Path, PathBuf};
-use std::sync::mpsc::{self, Receiver};
-use std::thread;
+
+use crate::pipestream::PipeStream;
+use futures::StreamExt as _;
+use inotify::{Inotify, WatchMask};
+use tokio::io::AsyncReadExt;
+use tokio::sync::mpsc::{channel, Receiver};

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -21,11 +23,11 @@ macro_rules! sl {
    };
 }

-pub fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
+pub async fn notify_oom(cid: &str, cg_dir: String) -> Result<Receiver<String>> {
    if cgroups::hierarchies::is_cgroup2_unified_mode() {
-        return notify_on_oom_v2(cid, cg_dir);
+        return notify_on_oom_v2(cid, cg_dir).await;
    }
-    notify_on_oom(cid, cg_dir)
+    notify_on_oom(cid, cg_dir).await
 }

 // get_value_from_cgroup parse cgroup file with `Flat keyed`
@@ -52,11 +54,11 @@ fn get_value_from_cgroup(path: &PathBuf, key: &str) -> Result<i64> {

 // notify_on_oom returns channel on which you can expect event about OOM,
 // if process died without OOM this channel will be closed.
-pub fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
-    register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events")
+pub async fn notify_on_oom_v2(containere_id: &str, cg_dir: String) -> Result<Receiver<String>> {
+    register_memory_event_v2(containere_id, cg_dir, "memory.events", "cgroup.events").await
 }

-fn register_memory_event_v2(
+async fn register_memory_event_v2(
    containere_id: &str,
    cg_dir: String,
    memory_event_name: &str,
@@ -73,54 +75,54 @@ fn register_memory_event_v2(
        "register_memory_event_v2 cgroup_event_control_path: {:?}", &cgroup_event_control_path
    );

-    let fd = Inotify::init(InitFlags::empty()).unwrap();
+    let mut inotify = Inotify::init().context("Failed to initialize inotify")?;

    // watching oom kill
-    let ev_fd = fd
-        .add_watch(&event_control_path, AddWatchFlags::IN_MODIFY)
-        .unwrap();
+    let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?;
    // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
-    let cg_fd = fd
-        .add_watch(&cgroup_event_control_path, AddWatchFlags::IN_MODIFY)
-        .unwrap();
-    info!(sl!(), "ev_fd: {:?}", ev_fd);
-    info!(sl!(), "cg_fd: {:?}", cg_fd);
+    let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?;

-    let (sender, receiver) = mpsc::channel();
+    info!(sl!(), "ev_wd: {:?}", ev_wd);
+    info!(sl!(), "cg_wd: {:?}", cg_wd);
+
+    let (sender, receiver) = channel(100);
    let containere_id = containere_id.to_string();

-    thread::spawn(move || {
-        loop {
-            let events = fd.read_events().unwrap();
+    tokio::spawn(async move {
+        let mut buffer = [0; 32];
+        let mut stream = inotify
+            .event_stream(&mut buffer)
+            .expect("create inotify event stream failed");
+
+        while let Some(event_or_error) = stream.next().await {
+            let event = event_or_error.unwrap();
            info!(
                sl!(),
-                "container[{}] get events for container: {:?}", &containere_id, &events
+                "container[{}] get event for container: {:?}", &containere_id, &event
            );
+            // info!("is1: {}", event.wd == wd1);
+            info!(sl!(), "event.wd: {:?}", event.wd);

-            for event in events {
-                if event.mask & AddWatchFlags::IN_MODIFY != AddWatchFlags::IN_MODIFY {
-                    continue;
+            if event.wd == ev_wd {
+                let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
+                if oom.unwrap_or(0) > 0 {
+                    let _ = sender.send(containere_id.clone()).await.map_err(|e| {
+                        error!(sl!(), "send containere_id failed, error: {:?}", e);
+                    });
+                    return;
                }
-                info!(sl!(), "event.wd: {:?}", event.wd);
-
-                if event.wd == ev_fd {
-                    let oom = get_value_from_cgroup(&event_control_path, "oom_kill");
-                    if oom.unwrap_or(0) > 0 {
-                        sender.send(containere_id.clone()).unwrap();
-                        return;
-                    }
-                } else if event.wd == cg_fd {
-                    let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
-                    if pids.unwrap_or(-1) == 0 {
-                        return;
-                    }
+            } else if event.wd == cg_wd {
+                let pids = get_value_from_cgroup(&cgroup_event_control_path, "populated");
+                if pids.unwrap_or(-1) == 0 {
+                    return;
                }
            }
-            // When a cgroup is destroyed, an event is sent to eventfd.
-            // So if the control path is gone, return instead of notifying.
-            if !Path::new(&event_control_path).exists() {
-                return;
-            }
+        }
+
+        // When a cgroup is destroyed, an event is sent to eventfd.
+        // So if the control path is gone, return instead of notifying.
+        if !Path::new(&event_control_path).exists() {
+            return;
        }
    });

@@ -129,17 +131,17 @@ fn register_memory_event_v2(

 // notify_on_oom returns channel on which you can expect event about OOM,
 // if process died without OOM this channel will be closed.
-fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
-    if dir == "" {
+async fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
+    if dir.is_empty() {
        return Err(anyhow!("memory controller missing"));
    }

-    register_memory_event(cid, dir, "memory.oom_control", "")
+    register_memory_event(cid, dir, "memory.oom_control", "").await
 }

 // level is one of "low", "medium", or "critical"
-fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
-    if dir == "" {
+async fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
+    if dir.is_empty() {
        return Err(anyhow!("memory controller missing"));
    }

@@ -147,10 +149,10 @@ fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receive
        return Err(anyhow!("invalid pressure level {}", level));
    }

-    register_memory_event(cid, dir, "memory.pressure_level", level)
+    register_memory_event(cid, dir, "memory.pressure_level", level).await
 }

-fn register_memory_event(
+async fn register_memory_event(
    cid: &str,
    cg_dir: String,
    event_name: &str,
@@ -163,7 +165,7 @@ fn register_memory_event(

    let event_control_path = Path::new(&cg_dir).join("cgroup.event_control");
    let data;
-    if arg == "" {
+    if arg.is_empty() {
        data = format!("{} {}", eventfd, event_file.as_raw_fd());
    } else {
        data = format!("{} {} {}", eventfd, event_file.as_raw_fd(), arg);
@@ -171,15 +173,16 @@ fn register_memory_event(

    fs::write(&event_control_path, data)?;

-    let mut eventfd_file = unsafe { File::from_raw_fd(eventfd) };
+    let mut eventfd_stream = unsafe { PipeStream::from_raw_fd(eventfd) };

-    let (sender, receiver) = mpsc::channel();
+    let (sender, receiver) = tokio::sync::mpsc::channel(100);
    let containere_id = cid.to_string();

-    thread::spawn(move || {
+    tokio::spawn(async move {
        loop {
-            let mut buf = [0; 8];
-            match eventfd_file.read(&mut buf) {
+            let sender = sender.clone();
+            let mut buf = [0u8; 8];
+            match eventfd_stream.read(&mut buf).await {
                Err(err) => {
                    warn!(sl!(), "failed to read from eventfd: {:?}", err);
                    return;
@@ -188,7 +191,10 @@ fn register_memory_event(
                    let content = fs::read_to_string(path.clone());
                    info!(
                        sl!(),
-                        "OOM event for container: {}, content: {:?}", &containere_id, content
+                        "cgroup event for container: {}, path: {:?}, content: {:?}",
+                        &containere_id,
+                        &path,
+                        content
                    );
                }
            }
@@ -198,7 +204,10 @@ fn register_memory_event(
            if !Path::new(&event_control_path).exists() {
                return;
            }
-            sender.send(containere_id.clone()).unwrap();
+
+            let _ = sender.send(containere_id.clone()).await.map_err(|e| {
+                error!(sl!(), "send containere_id failed, error: {:?}", e);
+            });
        }
    });

--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -40,12 +40,13 @@ pub mod capabilities;
 pub mod cgroups;
 pub mod container;
 pub mod mount;
+pub mod pipestream;
 pub mod process;
-pub mod reaper;
 pub mod specconv;
 pub mod sync;
+pub mod sync_with_async;
+pub mod utils;
 pub mod validator;
-
 // pub mod factory;
 //pub mod configs;
 // pub mod devices;
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -235,7 +235,7 @@ pub fn init_rootfs(
            if m.r#type == "bind" {
                for o in &m.options {
                    if let Some(fl) = PROPAGATION.get(o.as_str()) {
-                        let dest = format!("{}{}", &rootfs, &m.destination);
+                        let dest = secure_join(rootfs, &m.destination);
                        mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
                    }
                }
@@ -677,6 +677,52 @@ fn parse_mount(m: &Mount) -> (MsFlags, String) {
    (flags, data.join(","))
 }

+// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
+// The resulting path is guaranteed to be ("below" / "in a directory under") the `rootfs` directory.
+//
+// Parameters:
+//
+// - `rootfs` is the absolute path to the root of the containers root filesystem directory.
+// - `unsafe_path` is path inside a container. It is unsafe since it may try to "escape" from the containers
+//    rootfs by using one or more "../" path elements or is its a symlink to path.
+fn secure_join(rootfs: &str, unsafe_path: &str) -> String {
+    let mut path = PathBuf::from(format!("{}/", rootfs));
+    let unsafe_p = Path::new(&unsafe_path);
+
+    for it in unsafe_p.iter() {
+        let it_p = Path::new(&it);
+
+        // if it_p leads with "/", path.push(it) will be replace as it, so ignore "/"
+        if it_p.has_root() {
+            continue;
+        };
+
+        path.push(it);
+        if let Ok(v) = path.read_link() {
+            if v.is_absolute() {
+                path = PathBuf::from(format!("{}{}", rootfs, v.to_str().unwrap().to_string()));
+            } else {
+                path.pop();
+                for it in v.iter() {
+                    path.push(it);
+                    if path.exists() {
+                        path = path.canonicalize().unwrap();
+                        if !path.starts_with(rootfs) {
+                            path = PathBuf::from(rootfs.to_string());
+                        }
+                    }
+                }
+            }
+        }
+        // skip any ".."
+        if path.ends_with("..") {
+            path.pop();
+        }
+    }
+
+    path.to_str().unwrap().to_string()
+}
+
 fn mount_from(
    cfd_log: RawFd,
    m: &Mount,
@@ -686,14 +732,14 @@ fn mount_from(
    _label: &str,
 ) -> Result<()> {
    let d = String::from(data);
-    let dest = format!("{}{}", rootfs, &m.destination);
+    let dest = secure_join(rootfs, &m.destination);

    let src = if m.r#type.as_str() == "bind" {
        let src = fs::canonicalize(m.source.as_str())?;
-        let dir = if src.is_file() {
-            Path::new(&dest).parent().unwrap()
-        } else {
+        let dir = if src.is_dir() {
            Path::new(&dest)
+        } else {
+            Path::new(&dest).parent().unwrap()
        };

        let _ = fs::create_dir_all(&dir).map_err(|e| {
@@ -706,7 +752,7 @@ fn mount_from(
        });

        // make sure file exists so we can bind over it
-        if src.is_file() {
+        if !src.is_dir() {
            let _ = OpenOptions::new().create(true).write(true).open(&dest);
        }
        src.to_str().unwrap().to_string()
@@ -970,6 +1016,10 @@ fn readonly_path(path: &str) -> Result<()> {
 mod tests {
    use super::*;
    use crate::skip_if_not_root;
+    use std::fs::create_dir;
+    use std::fs::create_dir_all;
+    use std::fs::remove_dir_all;
+    use std::os::unix::fs;
    use std::os::unix::io::AsRawFd;
    use tempfile::tempdir;

@@ -999,7 +1049,7 @@ mod tests {
        );

        let rootfs = tempdir().unwrap();
-        let ret = fs::create_dir(rootfs.path().join("dev"));
+        let ret = create_dir(rootfs.path().join("dev"));
        assert!(ret.is_ok(), "Got: {:?}", ret);

        spec.root = Some(oci::Root {
@@ -1010,8 +1060,8 @@ mod tests {
        // there is no spec.mounts, but should pass
        let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // Adding bad mount point to spec.mounts
        spec.mounts.push(oci::Mount {
@@ -1029,8 +1079,8 @@ mod tests {
            ret
        );
        spec.mounts.pop();
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // mounting a cgroup
        spec.mounts.push(oci::Mount {
@@ -1043,8 +1093,8 @@ mod tests {
        let ret = init_rootfs(stdout_fd, &spec, &cpath, &mounts, true);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
        spec.mounts.pop();
-        let _ = fs::remove_dir_all(rootfs.path().join("dev"));
-        let _ = fs::create_dir(rootfs.path().join("dev"));
+        let _ = remove_dir_all(rootfs.path().join("dev"));
+        let _ = create_dir(rootfs.path().join("dev"));

        // mounting /dev
        spec.mounts.push(oci::Mount {
@@ -1081,11 +1131,11 @@ mod tests {
        cgroup_mounts.insert("cpu".to_string(), "cpu".to_string());
        cgroup_mounts.insert("memory".to_string(), "memory".to_string());

-        let ret = fs::create_dir_all(tempdir.path().join("cgroups"));
+        let ret = create_dir_all(tempdir.path().join("cgroups"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
-        let ret = fs::create_dir_all(tempdir.path().join("cpu"));
+        let ret = create_dir_all(tempdir.path().join("cpu"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);
-        let ret = fs::create_dir_all(tempdir.path().join("memory"));
+        let ret = create_dir_all(tempdir.path().join("memory"));
        assert!(ret.is_ok(), "Should pass. Got {:?}", ret);

        let ret = mount_cgroups(
@@ -1233,4 +1283,89 @@ mod tests {

        assert!(check_proc_mount(&mount).is_err());
    }
+
+    #[test]
+    fn test_secure_join() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            name: &'a str,
+            rootfs: &'a str,
+            unsafe_path: &'a str,
+            symlink_path: &'a str,
+            result: &'a str,
+        }
+
+        // create tempory directory to simulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path().to_str().unwrap();
+
+        let tests = &[
+            TestData {
+                name: "rootfs_not_exist",
+                rootfs: "/home/rootfs",
+                unsafe_path: "a/b/c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "relative_path",
+                rootfs: "/home/rootfs",
+                unsafe_path: "../../../a/b/c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "skip any ..",
+                rootfs: "/home/rootfs",
+                unsafe_path: "../../../a/../../b/../../c",
+                symlink_path: "",
+                result: "/home/rootfs/a/b/c",
+            },
+            TestData {
+                name: "rootfs is null",
+                rootfs: "",
+                unsafe_path: "",
+                symlink_path: "",
+                result: "/",
+            },
+            TestData {
+                name: "relative softlink beyond container rootfs",
+                rootfs: rootfs_path,
+                unsafe_path: "1",
+                symlink_path: "../../../",
+                result: rootfs_path,
+            },
+            TestData {
+                name: "abs softlink points to the non-exist directory",
+                rootfs: rootfs_path,
+                unsafe_path: "2",
+                symlink_path: "/dddd",
+                result: &format!("{}/dddd", rootfs_path).as_str().to_owned(),
+            },
+            TestData {
+                name: "abs softlink points to the root",
+                rootfs: rootfs_path,
+                unsafe_path: "3",
+                symlink_path: "/",
+                result: &format!("{}/", rootfs_path).as_str().to_owned(),
+            },
+        ];
+
+        for (i, t) in tests.iter().enumerate() {
+            // Create a string containing details of the test
+            let msg = format!("test[{}]: {:?}", i, t);
+
+            // if is_symlink, then should be prepare the softlink environment
+            if t.symlink_path != "" {
+                fs::symlink(t.symlink_path, format!("{}/{}", t.rootfs, t.unsafe_path)).unwrap();
+            }
+            let result = secure_join(t.rootfs, t.unsafe_path);
+
+            // Update the test details string with the results of the call
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            // Perform the checks
+            assert!(result == t.result, msg);
+        }
+    }
 }
--- a/src/agent/rustjail/src/pipestream.rs
+++ b/src/agent/rustjail/src/pipestream.rs
@@ -0,0 +1,170 @@
+// Copyright (c) 2020 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! Async support for pipe or something has file descriptor
+
+use nix::unistd;
+use std::{
+    fmt, io,
+    io::{Read, Result, Write},
+    mem,
+    os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd},
+    pin::Pin,
+    task::{Context, Poll},
+};
+
+use futures::ready;
+use tokio::io::{unix::AsyncFd, AsyncRead, AsyncWrite, ReadBuf};
+
+fn set_nonblocking(fd: RawFd) {
+    unsafe {
+        libc::fcntl(fd, libc::F_SETFL, libc::O_NONBLOCK);
+    }
+}
+
+struct StreamFd(RawFd);
+
+impl io::Read for &StreamFd {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match unistd::read(self.0, buf) {
+            Ok(l) => Ok(l),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+}
+
+impl io::Write for &StreamFd {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match unistd::write(self.0, buf) {
+            Ok(l) => Ok(l),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+impl StreamFd {
+    fn close(&mut self) -> io::Result<()> {
+        match unistd::close(self.0) {
+            Ok(()) => Ok(()),
+            Err(e) => Err(e.as_errno().unwrap().into()),
+        }
+    }
+}
+
+impl Drop for StreamFd {
+    fn drop(&mut self) {
+        self.close().ok();
+    }
+}
+
+impl AsRawFd for StreamFd {
+    fn as_raw_fd(&self) -> RawFd {
+        self.0
+    }
+}
+
+pub struct PipeStream(AsyncFd<StreamFd>);
+
+impl PipeStream {
+    pub fn new(fd: RawFd) -> Result<Self> {
+        set_nonblocking(fd);
+        Ok(Self(AsyncFd::new(StreamFd(fd))?))
+    }
+
+    pub fn shutdown(&mut self) -> io::Result<()> {
+        self.0.get_mut().close()
+    }
+
+    pub fn from_fd(fd: RawFd) -> Self {
+        unsafe { Self::from_raw_fd(fd) }
+    }
+}
+
+impl AsRawFd for PipeStream {
+    fn as_raw_fd(&self) -> RawFd {
+        self.0.as_raw_fd()
+    }
+}
+
+impl IntoRawFd for PipeStream {
+    fn into_raw_fd(self) -> RawFd {
+        let fd = self.as_raw_fd();
+        mem::forget(self);
+        fd
+    }
+}
+
+impl FromRawFd for PipeStream {
+    unsafe fn from_raw_fd(fd: RawFd) -> Self {
+        Self::new(fd).unwrap()
+    }
+}
+
+impl fmt::Debug for PipeStream {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "PipeStream({})", self.as_raw_fd())
+    }
+}
+
+impl AsyncRead for PipeStream {
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<Result<()>> {
+        let b;
+        unsafe {
+            b = &mut *(buf.unfilled_mut() as *mut [mem::MaybeUninit<u8>] as *mut [u8]);
+        };
+
+        loop {
+            let mut guard = ready!(self.0.poll_read_ready(cx))?;
+
+            match guard.try_io(|inner| inner.get_ref().read(b)) {
+                Ok(Ok(n)) => {
+                    unsafe {
+                        buf.assume_init(n);
+                    }
+                    buf.advance(n);
+                    return Ok(()).into();
+                }
+                Ok(Err(e)) => return Err(e).into(),
+                Err(_would_block) => {
+                    continue;
+                }
+            }
+        }
+    }
+}
+
+impl AsyncWrite for PipeStream {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<io::Result<usize>> {
+        loop {
+            let mut guard = ready!(self.0.poll_write_ready(cx))?;
+
+            match guard.try_io(|inner| inner.get_ref().write(buf)) {
+                Ok(result) => return Poll::Ready(result),
+                Err(_would_block) => continue,
+            }
+        }
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        Poll::Ready(Ok(()))
+    }
+
+    fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        self.get_mut().shutdown()?;
+        Poll::Ready(Ok(()))
+    }
+}
--- a/src/agent/rustjail/src/process.rs
+++ b/src/agent/rustjail/src/process.rs
@@ -6,7 +6,7 @@
 use libc::pid_t;
 use std::fs::File;
 use std::os::unix::io::RawFd;
-use std::sync::mpsc::Sender;
+use tokio::sync::mpsc::Sender;

 use nix::fcntl::{fcntl, FcntlArg, OFlag};
 use nix::sys::signal::{self, Signal};
@@ -14,10 +14,31 @@ use nix::sys::wait::{self, WaitStatus};
 use nix::unistd::{self, Pid};
 use nix::Result;

-use crate::reaper::Epoller;
 use oci::Process as OCIProcess;
 use slog::Logger;

+use crate::pipestream::PipeStream;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::io::{split, ReadHalf, WriteHalf};
+use tokio::sync::Mutex;
+use tokio::sync::Notify;
+
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+pub enum StreamType {
+    Stdin,
+    Stdout,
+    Stderr,
+    ExitPipeR,
+    TermMaster,
+    ParentStdin,
+    ParentStdout,
+    ParentStderr,
+}
+
+type Reader = Arc<Mutex<ReadHalf<PipeStream>>>;
+type Writer = Arc<Mutex<WriteHalf<PipeStream>>>;
+
 #[derive(Debug)]
 pub struct Process {
    pub exec_id: String,
@@ -41,7 +62,10 @@ pub struct Process {
    pub exit_watchers: Vec<Sender<i32>>,
    pub oci: OCIProcess,
    pub logger: Logger,
-    pub epoller: Option<Epoller>,
+    pub term_exit_notifier: Arc<Notify>,
+
+    readers: HashMap<StreamType, Reader>,
+    writers: HashMap<StreamType, Writer>,
 }

 pub trait ProcessOperations {
@@ -93,7 +117,9 @@ impl Process {
            exit_watchers: Vec::new(),
            oci: ocip.clone(),
            logger: logger.clone(),
-            epoller: None,
+            term_exit_notifier: Arc::new(Notify::new()),
+            readers: HashMap::new(),
+            writers: HashMap::new(),
        };

        info!(logger, "before create console socket!");
@@ -116,27 +142,59 @@ impl Process {
        Ok(p)
    }

-    pub fn close_epoller(&mut self) {
-        if let Some(epoller) = self.epoller.take() {
-            epoller.close();
+    pub fn notify_term_close(&mut self) {
+        let notify = self.term_exit_notifier.clone();
+        notify.notify_one();
+    }
+
+    fn get_fd(&self, stream_type: &StreamType) -> Option<RawFd> {
+        match stream_type {
+            StreamType::Stdin => self.stdin,
+            StreamType::Stdout => self.stdout,
+            StreamType::Stderr => self.stderr,
+            StreamType::ExitPipeR => self.exit_pipe_r,
+            StreamType::TermMaster => self.term_master,
+            StreamType::ParentStdin => self.parent_stdin,
+            StreamType::ParentStdout => self.parent_stdout,
+            StreamType::ParentStderr => self.parent_stderr,
        }
    }

-    pub fn create_epoller(&mut self) -> anyhow::Result<()> {
-        match self.term_master {
-            Some(term_master) => {
-                // add epoller to process
-                let epoller = Epoller::new(&self.logger, term_master)?;
-                self.epoller = Some(epoller)
-            }
-            None => {
-                info!(
-                    self.logger,
-                    "try to add epoller to a process without a term master fd"
-                );
-            }
+    fn get_stream_and_store(&mut self, stream_type: StreamType) -> Option<(Reader, Writer)> {
+        let fd = self.get_fd(&stream_type)?;
+        let stream = PipeStream::from_fd(fd);
+
+        let (reader, writer) = split(stream);
+        let reader = Arc::new(Mutex::new(reader));
+        let writer = Arc::new(Mutex::new(writer));
+
+        self.readers.insert(stream_type.clone(), reader.clone());
+        self.writers.insert(stream_type, writer.clone());
+
+        Some((reader, writer))
+    }
+
+    pub fn get_reader(&mut self, stream_type: StreamType) -> Option<Reader> {
+        if let Some(reader) = self.readers.get(&stream_type) {
+            return Some(reader.clone());
        }
-        Ok(())
+
+        let (reader, _) = self.get_stream_and_store(stream_type)?;
+        Some(reader)
+    }
+
+    pub fn get_writer(&mut self, stream_type: StreamType) -> Option<Writer> {
+        if let Some(writer) = self.writers.get(&stream_type) {
+            return Some(writer.clone());
+        }
+
+        let (_, writer) = self.get_stream_and_store(stream_type)?;
+        Some(writer)
+    }
+
+    pub fn close_stream(&mut self, stream_type: StreamType) {
+        let _ = self.readers.remove(&stream_type);
+        let _ = self.writers.remove(&stream_type);
    }
 }

@@ -195,7 +253,6 @@ mod tests {

        // -1 by default
        assert_eq!(process.pid, -1);
-        assert!(process.wait().is_err());
        // signal to every process in the process
        // group of the calling process.
        process.pid = 0;
--- a/src/agent/rustjail/src/reaper.rs
+++ b/src/agent/rustjail/src/reaper.rs
@@ -1,150 +0,0 @@
-// Copyright (c) 2020 Ant Group
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use nix::fcntl::OFlag;
-use slog::Logger;
-
-use nix::unistd;
-use std::os::unix::io::RawFd;
-
-use anyhow::Result;
-
-const MAX_EVENTS: usize = 2;
-
-#[derive(Debug, Clone)]
-pub struct Epoller {
-    logger: Logger,
-    epoll_fd: RawFd,
-    // rfd and wfd are a pipe's files two ends, this pipe is
-    // used to sync between the readStdio and the process exits.
-    // once the process exits, it will close one end to notify
-    // the readStdio that the process has exited and it should not
-    // wait on the process's terminal which has been inherited
-    // by it's children and hasn't exited.
-    rfd: RawFd,
-    wfd: RawFd,
-}
-
-impl Epoller {
-    pub fn new(logger: &Logger, fd: RawFd) -> Result<Epoller> {
-        let epoll_fd = epoll::create(true)?;
-        let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-
-        let mut epoller = Self {
-            logger: logger.clone(),
-            epoll_fd,
-            rfd,
-            wfd,
-        };
-
-        epoller.add(rfd)?;
-        epoller.add(fd)?;
-
-        Ok(epoller)
-    }
-
-    pub fn close_wfd(&self) {
-        let _ = unistd::close(self.wfd);
-    }
-
-    pub fn close(&self) {
-        let _ = unistd::close(self.rfd);
-        let _ = unistd::close(self.wfd);
-        let _ = unistd::close(self.epoll_fd);
-    }
-
-    fn add(&mut self, fd: RawFd) -> Result<()> {
-        info!(self.logger, "Epoller add fd {}", fd);
-        // add creates an epoll which is used to monitor the process's pty's master and
-        // one end of its exit notify pipe. Those files will be registered with level-triggered
-        // notification.
-        epoll::ctl(
-            self.epoll_fd,
-            epoll::ControlOptions::EPOLL_CTL_ADD,
-            fd,
-            epoll::Event::new(
-                epoll::Events::EPOLLHUP
-                    | epoll::Events::EPOLLIN
-                    | epoll::Events::EPOLLERR
-                    | epoll::Events::EPOLLRDHUP,
-                fd as u64,
-            ),
-        )?;
-
-        Ok(())
-    }
-
-    // There will be three cases on the epoller once it poll:
-    // a: only pty's master get an event(other than self.rfd);
-    // b: only the pipe get an event(self.rfd);
-    // c: both of pty and pipe have event occur;
-    // for case a, it means there is output in process's terminal and what needed to do is
-    // just read the terminal and send them out; for case b, it means the process has exited
-    // and there is no data in the terminal, thus just return the "EOF" to end the io;
-    // for case c, it means the process has exited but there is some data in the terminal which
-    // hasn't been send out, thus it should send those data out first and then send "EOF" last to
-    // end the io.
-    pub fn poll(&self) -> Result<RawFd> {
-        let mut rfd = self.rfd;
-        let mut epoll_events = vec![epoll::Event::new(epoll::Events::empty(), 0); MAX_EVENTS];
-
-        loop {
-            let event_count = match epoll::wait(self.epoll_fd, -1, epoll_events.as_mut_slice()) {
-                Ok(ec) => ec,
-                Err(e) => {
-                    info!(self.logger, "loop wait err {:?}", e);
-                    // EINTR: The call was interrupted by a signal handler before either
-                    // any of the requested events occurred or the timeout expired
-                    if e.kind() == std::io::ErrorKind::Interrupted {
-                        continue;
-                    }
-                    return Err(e.into());
-                }
-            };
-
-            for event in epoll_events.iter().take(event_count) {
-                let fd = event.data as i32;
-                // fd has been assigned with one end of process's exited pipe by default, and
-                // here to check is there any event occur on process's terminal, if "yes", it
-                // should be dealt first, otherwise, it means the process has exited and there
-                // is nothing left in the process's terminal needed to be read.
-                if fd != rfd {
-                    rfd = fd;
-                    break;
-                }
-            }
-            break;
-        }
-
-        Ok(rfd)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Epoller;
-    use nix::fcntl::OFlag;
-    use nix::unistd;
-    use std::thread;
-
-    #[test]
-    fn test_epoller_poll() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
-        let epoller = Epoller::new(&logger, rfd).unwrap();
-
-        let child = thread::spawn(move || {
-            let _ = unistd::write(wfd, "temporary file's content".as_bytes());
-        });
-
-        // wait write to finish
-        let _ = child.join();
-
-        let fd = epoller.poll().unwrap();
-        assert_eq!(fd, rfd, "Should get rfd");
-
-        epoller.close();
-    }
-}
--- a/src/agent/rustjail/src/sync.rs
+++ b/src/agent/rustjail/src/sync.rs
@@ -14,8 +14,8 @@ pub const SYNC_SUCCESS: i32 = 1;
 pub const SYNC_FAILED: i32 = 2;
 pub const SYNC_DATA: i32 = 3;

-const DATA_SIZE: usize = 100;
-const MSG_SIZE: usize = mem::size_of::<i32>();
+pub const DATA_SIZE: usize = 100;
+pub const MSG_SIZE: usize = mem::size_of::<i32>();

 #[macro_export]
 macro_rules! log_child {
--- a/src/agent/rustjail/src/sync_with_async.rs
+++ b/src/agent/rustjail/src/sync_with_async.rs
@@ -0,0 +1,148 @@
+// Copyright (c) 2020 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! The async version of sync module used for IPC
+
+use crate::pipestream::PipeStream;
+use anyhow::{anyhow, Result};
+use nix::errno::Errno;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+use crate::sync::{DATA_SIZE, MSG_SIZE, SYNC_DATA, SYNC_FAILED, SYNC_SUCCESS};
+
+async fn write_count(pipe_w: &mut PipeStream, buf: &[u8], count: usize) -> Result<usize> {
+    let mut len = 0;
+
+    loop {
+        match pipe_w.write(&buf[len..]).await {
+            Ok(l) => {
+                len += l;
+                if len == count {
+                    break;
+                }
+            }
+
+            Err(e) => {
+                if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
+                    return Err(e.into());
+                }
+            }
+        }
+    }
+
+    Ok(len)
+}
+
+async fn read_count(pipe_r: &mut PipeStream, count: usize) -> Result<Vec<u8>> {
+    let mut v: Vec<u8> = vec![0; count];
+    let mut len = 0;
+
+    loop {
+        match pipe_r.read(&mut v[len..]).await {
+            Ok(l) => {
+                len += l;
+                if len == count || l == 0 {
+                    break;
+                }
+            }
+
+            Err(e) => {
+                if e.raw_os_error().unwrap() != Errno::EINTR as i32 {
+                    return Err(e.into());
+                }
+            }
+        }
+    }
+
+    Ok(v[0..len].to_vec())
+}
+
+pub async fn read_async(pipe_r: &mut PipeStream) -> Result<Vec<u8>> {
+    let buf = read_count(pipe_r, MSG_SIZE).await?;
+    if buf.len() != MSG_SIZE {
+        return Err(anyhow!(
+            "process: {} failed to receive async message from peer: got msg length: {}, expected: {}",
+            std::process::id(),
+            buf.len(),
+            MSG_SIZE
+        ));
+    }
+    let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
+    let msg: i32 = i32::from_be_bytes(buf_array);
+    match msg {
+        SYNC_SUCCESS => Ok(Vec::new()),
+        SYNC_DATA => {
+            let buf = read_count(pipe_r, MSG_SIZE).await?;
+            let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]];
+            let msg_length: i32 = i32::from_be_bytes(buf_array);
+            let data_buf = read_count(pipe_r, msg_length as usize).await?;
+
+            Ok(data_buf)
+        }
+        SYNC_FAILED => {
+            let mut error_buf = vec![];
+            loop {
+                let buf = read_count(pipe_r, DATA_SIZE).await?;
+
+                error_buf.extend(&buf);
+                if DATA_SIZE == buf.len() {
+                    continue;
+                } else {
+                    break;
+                }
+            }
+
+            let error_str = match std::str::from_utf8(&error_buf) {
+                Ok(v) => String::from(v),
+                Err(e) => {
+                    return Err(
+                        anyhow!(e).context("receive error message from child process failed")
+                    );
+                }
+            };
+
+            Err(anyhow!(error_str))
+        }
+        _ => Err(anyhow!("error in receive sync message")),
+    }
+}
+
+pub async fn write_async(pipe_w: &mut PipeStream, msg_type: i32, data_str: &str) -> Result<()> {
+    let buf = msg_type.to_be_bytes();
+    let count = write_count(pipe_w, &buf, MSG_SIZE).await?;
+    if count != MSG_SIZE {
+        return Err(anyhow!("error in send sync message"));
+    }
+
+    match msg_type {
+        SYNC_FAILED => match write_count(pipe_w, data_str.as_bytes(), data_str.len()).await {
+            Ok(_) => pipe_w.shutdown()?,
+            Err(e) => {
+                pipe_w.shutdown()?;
+                return Err(anyhow!(e).context("error in send message to process"));
+            }
+        },
+        SYNC_DATA => {
+            let length: i32 = data_str.len() as i32;
+            write_count(pipe_w, &length.to_be_bytes(), MSG_SIZE)
+                .await
+                .or_else(|e| {
+                    pipe_w.shutdown()?;
+                    Err(anyhow!(e).context("error in send message to process"))
+                })?;
+
+            write_count(pipe_w, data_str.as_bytes(), data_str.len())
+                .await
+                .or_else(|e| {
+                    pipe_w.shutdown()?;
+                    Err(anyhow!(e).context("error in send message to process"))
+                })?;
+        }
+
+        _ => (),
+    };
+
+    Ok(())
+}
--- a/src/agent/rustjail/src/utils.rs
+++ b/src/agent/rustjail/src/utils.rs
@@ -0,0 +1,119 @@
+// Copyright (c) 2021 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+use anyhow::{anyhow, Context, Result};
+use libc::gid_t;
+use libc::uid_t;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+
+const PASSWD_FILE: &str = "/etc/passwd";
+
+// An entry from /etc/passwd
+#[derive(Debug, PartialEq, PartialOrd)]
+pub struct PasswdEntry {
+    // username
+    pub name: String,
+    // user password
+    pub passwd: String,
+    // user id
+    pub uid: uid_t,
+    // group id
+    pub gid: gid_t,
+    // user Information
+    pub gecos: String,
+    // home directory
+    pub dir: String,
+    // User's Shell
+    pub shell: String,
+}
+
+// get an entry for a given `uid` from `/etc/passwd`
+fn get_entry_by_uid(uid: uid_t, path: &str) -> Result<PasswdEntry> {
+    let file = File::open(path).with_context(|| format!("open file {}", path))?;
+    let mut reader = BufReader::new(file);
+
+    let mut line = String::new();
+    loop {
+        line.clear();
+        match reader.read_line(&mut line) {
+            Ok(0) => return Err(anyhow!(format!("file {} is empty", path))),
+            Ok(_) => (),
+            Err(e) => {
+                return Err(anyhow!(format!(
+                    "failed to read file {} with {:?}",
+                    path, e
+                )))
+            }
+        }
+
+        if line.starts_with('#') {
+            continue;
+        }
+
+        let parts: Vec<&str> = line.split(':').map(|part| part.trim()).collect();
+        if parts.len() != 7 {
+            continue;
+        }
+
+        match parts[2].parse() {
+            Err(_e) => continue,
+            Ok(new_uid) => {
+                if uid != new_uid {
+                    continue;
+                }
+
+                let entry = PasswdEntry {
+                    name: parts[0].to_string(),
+                    passwd: parts[1].to_string(),
+                    uid: new_uid,
+                    gid: parts[3].parse().unwrap_or(0),
+                    gecos: parts[4].to_string(),
+                    dir: parts[5].to_string(),
+                    shell: parts[6].to_string(),
+                };
+
+                return Ok(entry);
+            }
+        }
+    }
+}
+
+pub fn home_dir(uid: uid_t) -> Result<String> {
+    get_entry_by_uid(uid, PASSWD_FILE).map(|entry| entry.dir)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+    use tempfile::Builder;
+
+    #[test]
+    fn test_get_entry_by_uid() {
+        let tmpdir = Builder::new().tempdir().unwrap();
+        let tmpdir_path = tmpdir.path().to_str().unwrap();
+        let temp_passwd = format!("{}/passwd", tmpdir_path);
+
+        let mut tempf = File::create(temp_passwd.as_str()).unwrap();
+        writeln!(tempf, "root:x:0:0:root:/root0:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:1:0:root:/root1:/bin/bash").unwrap();
+        writeln!(tempf, "#root:x:1:0:root:/rootx:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:2:0:root:/root2:/bin/bash").unwrap();
+        writeln!(tempf, "root:x:3:0:root:/root3").unwrap();
+        writeln!(tempf, "root:x:3:0:root:/root3:/bin/bash").unwrap();
+
+        let entry = get_entry_by_uid(0, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root0");
+
+        let entry = get_entry_by_uid(1, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root1");
+
+        let entry = get_entry_by_uid(2, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root2");
+
+        let entry = get_entry_by_uid(3, temp_passwd.as_str()).unwrap();
+        assert_eq!(entry.dir.as_str(), "/root3");
+    }
+}
--- a/src/agent/rustjail/src/validator.rs
+++ b/src/agent/rustjail/src/validator.rs
@@ -78,12 +78,8 @@ fn rootfs(root: &str) -> Result<()> {
    Ok(())
 }

-fn network(_oci: &Spec) -> Result<()> {
-    Ok(())
-}
-
 fn hostname(oci: &Spec) -> Result<()> {
-    if oci.hostname.is_empty() || oci.hostname == "" {
+    if oci.hostname.is_empty() {
        return Ok(());
    }

@@ -301,7 +297,6 @@ pub fn validate(conf: &Config) -> Result<()> {
    };

    rootfs(root).context("rootfs")?;
-    network(oci).context("network")?;
    hostname(oci).context("hostname")?;
    security(oci).context("security")?;
    usernamespace(oci).context("usernamespace")?;
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
@@ -10,6 +10,7 @@ use std::time;
 const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
 const DEV_MODE_FLAG: &str = "agent.devmode";
 const LOG_LEVEL_OPTION: &str = "agent.log";
+const SERVER_ADDR_OPTION: &str = "agent.server_addr";
 const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
 const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
 const LOG_VPORT_OPTION: &str = "agent.log_vport";
@@ -26,12 +27,24 @@ const VSOCK_PORT: u16 = 1024;
 const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
 const LOG_LEVEL_ENV_VAR: &str = "KATA_AGENT_LOG_LEVEL";

-// FIXME: unused
-const TRACE_MODE_FLAG: &str = "agent.trace";
-const USE_VSOCK_FLAG: &str = "agent.use_vsock";
+const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
+const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
+const ERR_INVALID_GET_VALUE_PARAM: &str = "expected name=value";
+const ERR_INVALID_GET_VALUE_NO_NAME: &str = "name=value parameter missing name";
+const ERR_INVALID_GET_VALUE_NO_VALUE: &str = "name=value parameter missing value";
+const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
+
+const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
+const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
+const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
+
+const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
+const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
+const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
+const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";

 #[derive(Debug)]
-pub struct agentConfig {
+pub struct AgentConfig {
    pub debug_console: bool,
    pub dev_mode: bool,
    pub log_level: slog::Level,
@@ -73,9 +86,9 @@ macro_rules! parse_cmdline_param {
    };
 }

-impl agentConfig {
-    pub fn new() -> agentConfig {
-        agentConfig {
+impl AgentConfig {
+    pub fn new() -> AgentConfig {
+        AgentConfig {
            debug_console: false,
            dev_mode: false,
            log_level: DEFAULT_LOG_LEVEL,
@@ -98,6 +111,12 @@ impl agentConfig {

            // parse cmdline options
            parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
+            parse_cmdline_param!(
+                param,
+                SERVER_ADDR_OPTION,
+                self.server_addr,
+                get_string_value
+            );

            // ensure the timeout is a positive value
            parse_cmdline_param!(
@@ -105,7 +124,7 @@ impl agentConfig {
                HOTPLUG_TIMOUT_OPTION,
                self.hotplug_timeout,
                get_hotplug_timeout,
-                |hotplugTimeout: time::Duration| hotplugTimeout.as_secs() > 0
+                |hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
            );

            // vsock port should be positive values
@@ -181,7 +200,7 @@ fn logrus_to_slog_level(logrus_level: &str) -> Result<slog::Level> {
        "trace" => slog::Level::Trace,

        _ => {
-            return Err(anyhow!("invalid log level"));
+            return Err(anyhow!(ERR_INVALID_LOG_LEVEL));
        }
    };

@@ -192,11 +211,11 @@ fn get_log_level(param: &str) -> Result<slog::Level> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid log level parameter"));
+        return Err(anyhow!(ERR_INVALID_LOG_LEVEL_PARAM));
    }

    if fields[0] != LOG_LEVEL_OPTION {
-        Err(anyhow!("invalid log level key name"))
+        Err(anyhow!(ERR_INVALID_LOG_LEVEL_KEY))
    } else {
        Ok(logrus_to_slog_level(fields[1])?)
    }
@@ -206,17 +225,17 @@ fn get_hotplug_timeout(param: &str) -> Result<time::Duration> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid hotplug timeout parameter"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT));
    }

    let key = fields[0];
    if key != HOTPLUG_TIMOUT_OPTION {
-        return Err(anyhow!("invalid hotplug timeout key name"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_KEY));
    }

    let value = fields[1].parse::<u64>();
    if value.is_err() {
-        return Err(anyhow!("unable to parse hotplug timeout"));
+        return Err(anyhow!(ERR_INVALID_HOTPLUG_TIMEOUT_PARAM));
    }

    Ok(time::Duration::from_secs(value.unwrap()))
@@ -238,26 +257,54 @@ fn get_bool_value(param: &str) -> Result<bool> {
    })
 }

+// Return the value from a "name=value" string.
+//
+// Note:
+//
+// - A name *and* a value is required.
+// - A value can contain any number of equal signs.
+// - We could/should maybe check if the name is pure whitespace
+//   since this is considered to be invalid.
+fn get_string_value(param: &str) -> Result<String> {
+    let fields: Vec<&str> = param.split('=').collect();
+
+    if fields.len() < 2 {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_PARAM));
+    }
+
+    // We need name (but the value can be blank)
+    if fields[0] == "" {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_NAME));
+    }
+
+    let value = fields[1..].join("=");
+    if value == "" {
+        return Err(anyhow!(ERR_INVALID_GET_VALUE_NO_VALUE));
+    }
+
+    Ok(value)
+}
+
 fn get_container_pipe_size(param: &str) -> Result<i32> {
    let fields: Vec<&str> = param.split('=').collect();

    if fields.len() != 2 {
-        return Err(anyhow!("invalid container pipe size parameter"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE));
    }

    let key = fields[0];
    if key != CONTAINER_PIPE_SIZE_OPTION {
-        return Err(anyhow!("invalid container pipe size key name"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_KEY));
    }

    let res = fields[1].parse::<i32>();
    if res.is_err() {
-        return Err(anyhow!("unable to parse container pipe size"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM));
    }

    let value = res.unwrap();
    if value < 0 {
-        return Err(anyhow!("container pipe size should not be negative"));
+        return Err(anyhow!(ERR_INVALID_CONTAINER_PIPE_NEGATIVE));
    }

    Ok(value)
@@ -272,19 +319,6 @@ mod tests {
    use std::time;
    use tempfile::tempdir;

-    const ERR_INVALID_LOG_LEVEL: &str = "invalid log level";
-    const ERR_INVALID_LOG_LEVEL_PARAM: &str = "invalid log level parameter";
-    const ERR_INVALID_LOG_LEVEL_KEY: &str = "invalid log level key name";
-
-    const ERR_INVALID_HOTPLUG_TIMEOUT: &str = "invalid hotplug timeout parameter";
-    const ERR_INVALID_HOTPLUG_TIMEOUT_PARAM: &str = "unable to parse hotplug timeout";
-    const ERR_INVALID_HOTPLUG_TIMEOUT_KEY: &str = "invalid hotplug timeout key name";
-
-    const ERR_INVALID_CONTAINER_PIPE_SIZE: &str = "invalid container pipe size parameter";
-    const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container pipe size";
-    const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
-    const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
-
    // helper function to make errors less crazy-long
    fn make_err(desc: &str) -> Error {
        anyhow!(desc.to_string())
@@ -303,19 +337,22 @@ mod tests {
                assert!(*expected_level == actual_level, $msg);
            } else {
                let expected_error = $expected_result.as_ref().unwrap_err();
-                let actual_error = $actual_result.unwrap_err();
-
                let expected_error_msg = format!("{:?}", expected_error);
-                let actual_error_msg = format!("{:?}", actual_error);

-                assert!(expected_error_msg == actual_error_msg, $msg);
+                if let Err(actual_error) = $actual_result {
+                    let actual_error_msg = format!("{:?}", actual_error);
+
+                    assert!(expected_error_msg == actual_error_msg, $msg);
+                } else {
+                    assert!(expected_error_msg == "expected error, got OK", $msg);
+                }
            }
        };
    }

    #[test]
    fn test_new() {
-        let config = agentConfig::new();
+        let config = AgentConfig::new();
        assert_eq!(config.debug_console, false);
        assert_eq!(config.dev_mode, false);
        assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -813,6 +850,61 @@ mod tests {
                server_addr: TEST_SERVER_ADDR,
                unified_cgroup_hierarchy: false,
            },
+            TestData {
+                contents: "server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: "agent.server_address=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: TEST_SERVER_ADDR,
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: "agent.server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: " agent.server_addr=unix:///tmp/foo.socket",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+            },
+            TestData {
+                contents: " agent.server_addr=unix:///tmp/foo.socket a",
+                env_vars: Vec::new(),
+                debug_console: false,
+                dev_mode: false,
+                log_level: DEFAULT_LOG_LEVEL,
+                hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
+                container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
+                server_addr: "unix:///tmp/foo.socket",
+                unified_cgroup_hierarchy: false,
+            },
        ];

        let dir = tempdir().expect("failed to create tmpdir");
@@ -822,7 +914,7 @@ mod tests {

        let filename = file_path.to_str().expect("failed to create filename");

-        let mut config = agentConfig::new();
+        let mut config = AgentConfig::new();
        let result = config.parse_cmdline(&filename.to_owned());
        assert!(result.is_err());

@@ -854,7 +946,7 @@ mod tests {
                vars_to_unset.push(name);
            }

-            let mut config = agentConfig::new();
+            let mut config = AgentConfig::new();
            assert_eq!(config.debug_console, false, "{}", msg);
            assert_eq!(config.dev_mode, false, "{}", msg);
            assert_eq!(config.unified_cgroup_hierarchy, false, "{}", msg);
@@ -1199,4 +1291,82 @@ mod tests {
            assert_result!(d.result, result, msg);
        }
    }
+
+    #[test]
+    fn test_get_string_value() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            param: &'a str,
+            result: Result<String>,
+        }
+
+        let tests = &[
+            TestData {
+                param: "",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_PARAM)),
+            },
+            TestData {
+                param: "=",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
+            },
+            TestData {
+                param: "==",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_NAME)),
+            },
+            TestData {
+                param: "x=",
+                result: Err(make_err(ERR_INVALID_GET_VALUE_NO_VALUE)),
+            },
+            TestData {
+                param: "x==",
+                result: Ok("=".into()),
+            },
+            TestData {
+                param: "x===",
+                result: Ok("==".into()),
+            },
+            TestData {
+                param: "x==x",
+                result: Ok("=x".into()),
+            },
+            TestData {
+                param: "x=x",
+                result: Ok("x".into()),
+            },
+            TestData {
+                param: "x=x=",
+                result: Ok("x=".into()),
+            },
+            TestData {
+                param: "x=x=x",
+                result: Ok("x=x".into()),
+            },
+            TestData {
+                param: "foo=bar",
+                result: Ok("bar".into()),
+            },
+            TestData {
+                param: "x= =",
+                result: Ok(" =".into()),
+            },
+            TestData {
+                param: "x= =",
+                result: Ok(" =".into()),
+            },
+            TestData {
+                param: "x= = ",
+                result: Ok(" = ".into()),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = get_string_value(d.param);
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            assert_result!(d.result, result, msg);
+        }
+    }
 }
--- a/src/agent/src/device.rs
+++ b/src/agent/src/device.rs
@@ -9,10 +9,13 @@ use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::MetadataExt;
 use std::path::Path;
-use std::sync::{mpsc, Arc, Mutex};
+use std::str::FromStr;
+use std::sync::Arc;
+use tokio::sync::Mutex;

 use crate::linux_abi::*;
-use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE};
+use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
+use crate::pci;
 use crate::sandbox::Sandbox;
 use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER};
 use anyhow::{anyhow, Result};
@@ -35,22 +38,6 @@ struct DevIndexEntry {

 struct DevIndex(HashMap<String, DevIndexEntry>);

-// DeviceHandler is the type of callback to be defined to handle every type of device driver.
-type DeviceHandler = fn(&Device, &mut Spec, &Arc<Mutex<Sandbox>>, &DevIndex) -> Result<()>;
-
-// DEVICEHANDLERLIST lists the supported drivers.
-#[rustfmt::skip]
-lazy_static! {
-    static ref DEVICEHANDLERLIST: HashMap<&'static str, DeviceHandler> = {
-        let mut m: HashMap<&'static str, DeviceHandler> = HashMap::new();
-        m.insert(DRIVERBLKTYPE, virtio_blk_device_handler);
-        m.insert(DRIVERMMIOBLKTYPE, virtiommio_blk_device_handler);
-        m.insert(DRIVERNVDIMMTYPE, virtio_nvdimm_device_handler);
-        m.insert(DRIVERSCSITYPE, virtio_scsi_device_handler);
-        m
-    };
-}
-
 pub fn rescan_pci_bus() -> Result<()> {
    online_device(SYSFS_PCI_BUS_RESCAN_FILE)
 }
@@ -60,64 +47,50 @@ pub fn online_device(path: &str) -> Result<()> {
    Ok(())
 }

-// get_pci_device_address fetches the complete PCI address in sysfs, based on the PCI
-// identifier provided. This should be in the format: "bridgeAddr/deviceAddr".
-// Here, bridgeAddr is the address at which the bridge is attached on the root bus,
-// while deviceAddr is the address at which the device is attached on the bridge.
-fn get_pci_device_address(pci_id: &str) -> Result<String> {
-    let tokens: Vec<&str> = pci_id.split('/').collect();
+// pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
+// the sysfs path for the PCI host bridge, based on the PCI path
+// provided.
+fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
+    let mut bus = "0000:00".to_string();
+    let mut relpath = String::new();

-    if tokens.len() != 2 {
-        return Err(anyhow!(
-            "PCI Identifier for device should be of format [bridgeAddr/deviceAddr], got {}",
-            pci_id
-        ));
+    for i in 0..pcipath.len() {
+        let bdf = format!("{}:{}.0", bus, pcipath[i]);
+
+        relpath = format!("{}/{}", relpath, bdf);
+
+        if i == pcipath.len() - 1 {
+            // Final device need not be a bridge
+            break;
+        }
+
+        // Find out the bus exposed by bridge
+        let bridgebuspath = format!("{}{}/pci_bus", root_bus_sysfs, relpath);
+        let mut files: Vec<_> = fs::read_dir(&bridgebuspath)?.collect();
+
+        if files.len() != 1 {
+            return Err(anyhow!(
+                "Expected exactly one PCI bus in {}, got {} instead",
+                bridgebuspath,
+                files.len()
+            ));
+        }
+
+        // unwrap is safe, because of the length test above
+        let busfile = files.pop().unwrap()?;
+        bus = busfile
+            .file_name()
+            .into_string()
+            .map_err(|e| anyhow!("Bad filename under {}: {:?}", &bridgebuspath, e))?;
    }

-    let bridge_id = tokens[0];
-    let device_id = tokens[1];
-
-    // Deduce the complete bridge address based on the bridge address identifier passed
-    // and the fact that bridges are attached on the main bus with function 0.
-    let pci_bridge_addr = format!("0000:00:{}.0", bridge_id);
-
-    // Find out the bus exposed by bridge
-    let bridge_bus_path = format!("{}/{}/pci_bus/", SYSFS_PCI_BUS_PREFIX, pci_bridge_addr);
-
-    let files_slice: Vec<_> = fs::read_dir(&bridge_bus_path)
-        .unwrap()
-        .map(|res| res.unwrap().path())
-        .collect();
-    let bus_num = files_slice.len();
-
-    if bus_num != 1 {
-        return Err(anyhow!(
-            "Expected an entry for bus in {}, got {} entries instead",
-            bridge_bus_path,
-            bus_num
-        ));
-    }
-
-    let bus = files_slice[0].file_name().unwrap().to_str().unwrap();
-
-    // Device address is based on the bus of the bridge to which it is attached.
-    // We do not pass devices as multifunction, hence the trailing 0 in the address.
-    let pci_device_addr = format!("{}:{}.0", bus, device_id);
-
-    let bridge_device_pci_addr = format!("{}/{}", pci_bridge_addr, pci_device_addr);
-
-    info!(
-        sl!(),
-        "Fetched PCI address for device PCIAddr:{}\n", bridge_device_pci_addr
-    );
-
-    Ok(bridge_device_pci_addr)
+    Ok(relpath)
 }

-fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
+async fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<String> {
    // Keep the same lock order as uevent::handle_block_add_event(), otherwise it may cause deadlock.
-    let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
-    let sb = sandbox.lock().unwrap();
+    let mut w = GLOBAL_DEVICE_WATCHER.lock().await;
+    let sb = sandbox.lock().await;
    for (key, value) in sb.pci_device_map.iter() {
        if key.contains(dev_addr) {
            info!(sl!(), "Device {} found in pci device map", dev_addr);
@@ -131,36 +104,58 @@ fn get_device_name(sandbox: &Arc<Mutex<Sandbox>>, dev_addr: &str) -> Result<Stri
    // The key of the watchers map is the device we are interested in.
    // Note this is done inside the lock, not to miss any events from the
    // global udev listener.
-    let (tx, rx) = mpsc::channel::<String>();
-    w.insert(dev_addr.to_string(), tx);
+    let (tx, rx) = tokio::sync::oneshot::channel::<String>();
+    w.insert(dev_addr.to_string(), Some(tx));
    drop(w);

    info!(sl!(), "Waiting on channel for device notification\n");
-    let hotplug_timeout = AGENT_CONFIG.read().unwrap().hotplug_timeout;
-    let dev_name = rx.recv_timeout(hotplug_timeout).map_err(|_| {
-        GLOBAL_DEVICE_WATCHER.lock().unwrap().remove_entry(dev_addr);
-        anyhow!(
-            "Timeout reached after {:?} waiting for device {}",
-            hotplug_timeout,
-            dev_addr
-        )
-    })?;
+    let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
+
+    let dev_name = match tokio::time::timeout(hotplug_timeout, rx).await {
+        Ok(v) => v?,
+        Err(_) => {
+            let watcher = GLOBAL_DEVICE_WATCHER.clone();
+            let mut w = watcher.lock().await;
+            w.remove_entry(dev_addr);
+
+            return Err(anyhow!(
+                "Timeout reached after {:?} waiting for device {}",
+                hotplug_timeout,
+                dev_addr
+            ));
+        }
+    };

    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &dev_name))
 }

-pub fn get_scsi_device_name(sandbox: &Arc<Mutex<Sandbox>>, scsi_addr: &str) -> Result<String> {
+pub async fn get_scsi_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    scsi_addr: &str,
+) -> Result<String> {
    let dev_sub_path = format!("{}{}/{}", SCSI_HOST_CHANNEL, scsi_addr, SCSI_BLOCK_SUFFIX);

    scan_scsi_bus(scsi_addr)?;
-    get_device_name(sandbox, &dev_sub_path)
+    get_device_name(sandbox, &dev_sub_path).await
 }

-pub fn get_pci_device_name(sandbox: &Arc<Mutex<Sandbox>>, pci_id: &str) -> Result<String> {
-    let pci_addr = get_pci_device_address(pci_id)?;
+pub async fn get_pci_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    pcipath: &pci::Path,
+) -> Result<String> {
+    let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
+    let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;

    rescan_pci_bus()?;
-    get_device_name(sandbox, &pci_addr)
+    get_device_name(sandbox, &sysfs_rel_path).await
+}
+
+pub async fn get_pmem_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    pmem_devname: &str,
+) -> Result<String> {
+    let dev_sub_path = format!("/{}/{}", SCSI_BLOCK_SUFFIX, pmem_devname);
+    get_device_name(sandbox, &dev_sub_path).await
 }

 /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
@@ -204,7 +199,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

    // If no container_path is provided, we won't be able to match and
    // update the device in the OCI spec device list. This is an error.
-    if device.container_path == "" {
+    if device.container_path.is_empty() {
        return Err(anyhow!(
            "container_path cannot empty for device {:?}",
            device
@@ -274,23 +269,21 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

 // device.Id should be the predicted device name (vda, vdb, ...)
 // device.VmPath already provides a way to send it in
-fn virtiommio_blk_device_handler(
+async fn virtiommio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
    _sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
-    if device.vm_path == "" {
+    if device.vm_path.is_empty() {
        return Err(anyhow!("Invalid path for virtio mmio blk device"));
    }

    update_spec_device_list(device, spec, devidx)
 }

-// device.Id should be the PCI address in the format  "bridgeAddr/deviceAddr".
-// Here, bridgeAddr is the address at which the brige is attached on the root bus,
-// while deviceAddr is the address at which the device is attached on the bridge.
-fn virtio_blk_device_handler(
+// device.Id should be a PCI path string
+async fn virtio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -298,34 +291,36 @@ fn virtio_blk_device_handler(
 ) -> Result<()> {
    let mut dev = device.clone();

-    // When "Id (PCIAddr)" is not set, we allow to use the predicted "VmPath" passed from kata-runtime
-    // Note this is a special code path for cloud-hypervisor when BDF information is not available
-    if device.id != "" {
-        dev.vm_path = get_pci_device_name(sandbox, &device.id)?;
+    // When "Id (PCI path)" is not set, we allow to use the predicted
+    // "VmPath" passed from kata-runtime Note this is a special code
+    // path for cloud-hypervisor when BDF information is not available
+    if !device.id.is_empty() {
+        let pcipath = pci::Path::from_str(&device.id)?;
+        dev.vm_path = get_pci_device_name(sandbox, &pcipath).await?;
    }

    update_spec_device_list(&dev, spec, devidx)
 }

 // device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
-fn virtio_scsi_device_handler(
+async fn virtio_scsi_device_handler(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
    let mut dev = device.clone();
-    dev.vm_path = get_scsi_device_name(sandbox, &device.id)?;
+    dev.vm_path = get_scsi_device_name(sandbox, &device.id).await?;
    update_spec_device_list(&dev, spec, devidx)
 }

-fn virtio_nvdimm_device_handler(
+async fn virtio_nvdimm_device_handler(
    device: &Device,
    spec: &mut Spec,
    _sandbox: &Arc<Mutex<Sandbox>>,
    devidx: &DevIndex,
 ) -> Result<()> {
-    if device.vm_path == "" {
+    if device.vm_path.is_empty() {
        return Err(anyhow!("Invalid path for nvdimm device"));
    }

@@ -357,7 +352,7 @@ impl DevIndex {
    }
 }

-pub fn add_devices(
+pub async fn add_devices(
    devices: &[Device],
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -365,13 +360,13 @@ pub fn add_devices(
    let devidx = DevIndex::new(spec);

    for device in devices.iter() {
-        add_device(device, spec, sandbox, &devidx)?;
+        add_device(device, spec, sandbox, &devidx).await?;
    }

    Ok(())
 }

-fn add_device(
+async fn add_device(
    device: &Device,
    spec: &mut Spec,
    sandbox: &Arc<Mutex<Sandbox>>,
@@ -381,21 +376,24 @@ fn add_device(
    info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}",
          device.id, device.field_type, device.vm_path, device.container_path, device.options);

-    if device.field_type == "" {
+    if device.field_type.is_empty() {
        return Err(anyhow!("invalid type for device {:?}", device));
    }

-    if device.id == "" && device.vm_path == "" {
+    if device.id.is_empty() && device.vm_path.is_empty() {
        return Err(anyhow!("invalid ID and VM path for device {:?}", device));
    }

-    if device.container_path == "" {
+    if device.container_path.is_empty() {
        return Err(anyhow!("invalid container path for device {:?}", device));
    }

-    match DEVICEHANDLERLIST.get(device.field_type.as_str()) {
-        None => Err(anyhow!("Unknown device type {}", device.field_type)),
-        Some(dev_handler) => dev_handler(device, spec, sandbox, devidx),
+    match device.field_type.as_str() {
+        DRIVER_BLK_TYPE => virtio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
+        _ => Err(anyhow!("Unknown device type {}", device.field_type)),
    }
 }

@@ -433,12 +431,14 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
 mod tests {
    use super::*;
    use oci::Linux;
+    use tempfile::tempdir;

    #[test]
    fn test_update_device_cgroup() {
-        let mut spec = Spec::default();
-
-        spec.linux = Some(Linux::default());
+        let mut spec = Spec {
+            linux: Some(Linux::default()),
+            ..Default::default()
+        };

        update_device_cgroup(&mut spec).unwrap();

@@ -712,4 +712,68 @@ mod tests {
        assert_eq!(Some(host_major), specresources.devices[1].major);
        assert_eq!(Some(host_minor), specresources.devices[1].minor);
    }
+
+    #[test]
+    fn test_pcipath_to_sysfs() {
+        let testdir = tempdir().expect("failed to create tmpdir");
+        let rootbuspath = testdir.path().to_str().unwrap();
+
+        let path2 = pci::Path::from_str("02").unwrap();
+        let path23 = pci::Path::from_str("02/03").unwrap();
+        let path234 = pci::Path::from_str("02/03/04").unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert!(relpath.is_err());
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files for the device at 0000:00:02.0
+        let bridge2path = format!("{}{}", rootbuspath, "/0000:00:02.0");
+
+        fs::create_dir_all(&bridge2path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert!(relpath.is_err());
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files to indicate that 0000:00:02.0 is a bridge to bus 01
+        let bridge2bus = "0000:01";
+        let bus2path = format!("{}/pci_bus/{}", bridge2path, bridge2bus);
+
+        fs::create_dir_all(bus2path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert!(relpath.is_err());
+
+        // Create mock sysfs files for a bridge at 0000:01:03.0 to bus 02
+        let bridge3path = format!("{}/0000:01:03.0", bridge2path);
+        let bridge3bus = "0000:02";
+        let bus3path = format!("{}/pci_bus/{}", bridge3path, bridge3bus);
+
+        fs::create_dir_all(bus3path).unwrap();
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path2);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path23);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0");
+
+        let relpath = pcipath_to_sysfs(rootbuspath, &path234);
+        assert_eq!(relpath.unwrap(), "/0000:00:02.0/0000:01:03.0/0000:02:04.0");
+    }
 }
--- a/src/agent/src/linux_abi.rs
+++ b/src/agent/src/linux_abi.rs
@@ -9,7 +9,6 @@
 use std::fs;

 pub const SYSFS_DIR: &str = "/sys";
-pub const SYSFS_PCI_BUS_PREFIX: &str = "/sys/bus/pci/devices";
 pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
 #[cfg(any(
    target_arch = "powerpc64",
@@ -25,10 +24,18 @@ pub fn create_pci_root_bus_path() -> String {
 pub fn create_pci_root_bus_path() -> String {
    let ret = String::from("/devices/platform/4010000000.pcie/pci0000:00");

+    let acpi_root_bus_path = String::from("/devices/pci0000:00");
+    let mut acpi_sysfs_dir = String::from(SYSFS_DIR);
    let mut sysfs_dir = String::from(SYSFS_DIR);
    let mut start_root_bus_path = String::from("/devices/platform/");
    let end_root_bus_path = String::from("/pci0000:00");

+    // check if there is pci bus path for acpi
+    acpi_sysfs_dir.push_str(&acpi_root_bus_path);
+    if let Ok(_) = fs::metadata(&acpi_sysfs_dir) {
+        return acpi_root_bus_path;
+    }
+
    sysfs_dir.push_str(&start_root_bus_path);
    let entries = match fs::read_dir(sysfs_dir) {
        Ok(e) => e,
@@ -58,6 +65,13 @@ pub fn create_pci_root_bus_path() -> String {
    ret
 }

+// From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
+// The Linux kernel's core ACPI subsystem creates struct acpi_device
+// objects for ACPI namespace objects representing devices, power resources
+// processors, thermal zones. Those objects are exported to user space via
+// sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00
+pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM";
+
 pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu";

 pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block_size_bytes";
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -3,11 +3,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#![allow(non_camel_case_types)]
-#![allow(unused_parens)]
-#![allow(unused_unsafe)]
-#![allow(dead_code)]
-#![allow(non_snake_case)]
 #[macro_use]
 extern crate lazy_static;
 extern crate oci;
@@ -15,19 +10,15 @@ extern crate prctl;
 extern crate prometheus;
 extern crate protocols;
 extern crate regex;
-extern crate rustjail;
 extern crate scan_fmt;
 extern crate serde_json;
-extern crate signal_hook;

 #[macro_use]
 extern crate scopeguard;

 #[macro_use]
 extern crate slog;
-extern crate netlink;

-use crate::netlink::{RtnlHandle, NETLINK_ROUTE};
 use anyhow::{anyhow, Context, Result};
 use nix::fcntl::{self, OFlag};
 use nix::fcntl::{FcntlArg, FdFlag};
@@ -35,10 +26,8 @@ use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
 use nix::pty;
 use nix::sys::select::{select, FdSet};
 use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
-use nix::sys::wait::{self, WaitStatus};
+use nix::sys::wait;
 use nix::unistd::{self, close, dup, dup2, fork, setsid, ForkResult};
-use prctl::set_child_subreaper;
-use signal_hook::{iterator::Signals, SIGCHLD};
 use std::collections::HashMap;
 use std::env;
 use std::ffi::{CStr, CString, OsStr};
@@ -48,9 +37,7 @@ use std::os::unix::ffi::OsStrExt;
 use std::os::unix::fs as unixfs;
 use std::os::unix::io::AsRawFd;
 use std::path::Path;
-use std::sync::mpsc::{self, Sender};
-use std::sync::{Arc, Mutex, RwLock};
-use std::{io, thread, thread::JoinHandle};
+use std::sync::Arc;
 use unistd::Pid;

 mod config;
@@ -59,19 +46,40 @@ mod linux_abi;
 mod metrics;
 mod mount;
 mod namespace;
+mod netlink;
 mod network;
+mod pci;
 pub mod random;
 mod sandbox;
+mod signal;
 #[cfg(test)]
 mod test_utils;
 mod uevent;
+mod util;
 mod version;

 use mount::{cgroups_mount, general_mount};
 use sandbox::Sandbox;
+use signal::setup_signal_handler;
 use slog::Logger;
 use uevent::watch_uevents;

+use std::sync::Mutex as SyncMutex;
+
+use futures::future::join_all;
+use futures::StreamExt as _;
+use rustjail::pipestream::PipeStream;
+use tokio::{
+    io::AsyncWrite,
+    sync::{
+        oneshot::Sender,
+        watch::{channel, Receiver},
+        Mutex, RwLock,
+    },
+    task::JoinHandle,
+};
+use tokio_vsock::{Incoming, VsockListener, VsockStream};
+
 mod rpc;

 const NAME: &str = "kata-agent";
@@ -81,13 +89,13 @@ const CONSOLE_PATH: &str = "/dev/console";
 const DEFAULT_BUF_SIZE: usize = 8 * 1024;

 lazy_static! {
-    static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Sender<String>>>> =
+    static ref GLOBAL_DEVICE_WATCHER: Arc<Mutex<HashMap<String, Option<Sender<String>>>>> =
        Arc::new(Mutex::new(HashMap::new()));
-    static ref AGENT_CONFIG: Arc<RwLock<agentConfig>> =
-        Arc::new(RwLock::new(config::agentConfig::new()));
+    static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
+        Arc::new(RwLock::new(config::AgentConfig::new()));
 }

-fn announce(logger: &Logger, config: &agentConfig) {
+fn announce(logger: &Logger, config: &AgentConfig) {
    info!(logger, "announce";
    "agent-commit" => version::VERSION_COMMIT,

@@ -100,7 +108,168 @@ fn announce(logger: &Logger, config: &agentConfig) {
    );
 }

-fn main() -> Result<()> {
+fn set_fd_close_exec(fd: RawFd) -> Result<RawFd> {
+    if let Err(e) = fcntl::fcntl(fd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)) {
+        return Err(anyhow!("failed to set fd: {} as close-on-exec: {}", fd, e));
+    }
+    Ok(fd)
+}
+
+fn get_vsock_incoming(fd: RawFd) -> Incoming {
+    let incoming;
+    unsafe {
+        incoming = VsockListener::from_raw_fd(fd).incoming();
+    }
+    incoming
+}
+
+async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
+    let stream = get_vsock_incoming(fd).next().await.unwrap().unwrap();
+    set_fd_close_exec(stream.as_raw_fd())?;
+    Ok(stream)
+}
+
+// Create a thread to handle reading from the logger pipe. The thread will
+// output to the vsock port specified, or stdout.
+async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool>) -> Result<()> {
+    let mut reader = PipeStream::from_fd(rfd);
+    let mut writer: Box<dyn AsyncWrite + Unpin + Send>;
+
+    if vsock_port > 0 {
+        let listenfd = socket::socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )?;
+
+        let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, vsock_port);
+        socket::bind(listenfd, &addr).unwrap();
+        socket::listen(listenfd, 1).unwrap();
+
+        writer = Box::new(get_vsock_stream(listenfd).await.unwrap());
+    } else {
+        writer = Box::new(tokio::io::stdout());
+    }
+
+    let _ = util::interruptable_io_copier(&mut reader, &mut writer, shutdown).await;
+
+    Ok(())
+}
+
+async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
+    env::set_var("RUST_BACKTRACE", "full");
+
+    // List of tasks that need to be stopped for a clean shutdown
+    let mut tasks: Vec<JoinHandle<Result<()>>> = vec![];
+
+    lazy_static::initialize(&SHELLS);
+
+    lazy_static::initialize(&AGENT_CONFIG);
+
+    // support vsock log
+    let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
+
+    let (shutdown_tx, shutdown_rx) = channel(true);
+
+    let agent_config = AGENT_CONFIG.clone();
+
+    let init_mode = unistd::getpid() == Pid::from_raw(1);
+    if init_mode {
+        // dup a new file descriptor for this temporary logger writer,
+        // since this logger would be dropped and it's writer would
+        // be closed out of this code block.
+        let newwfd = dup(wfd)?;
+        let writer = unsafe { File::from_raw_fd(newwfd) };
+
+        // Init a temporary logger used by init agent as init process
+        // since before do the base mount, it wouldn't access "/proc/cmdline"
+        // to get the customzied debug level.
+        let (logger, logger_async_guard) =
+            logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
+
+        // Must mount proc fs before parsing kernel command line
+        general_mount(&logger).map_err(|e| {
+            error!(logger, "fail general mount: {}", e);
+            e
+        })?;
+
+        let mut config = agent_config.write().await;
+        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
+
+        init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
+        drop(logger_async_guard);
+    } else {
+        // once parsed cmdline and set the config, release the write lock
+        // as soon as possible in case other thread would get read lock on
+        // it.
+        let mut config = agent_config.write().await;
+        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
+    }
+    let config = agent_config.read().await;
+
+    let log_vport = config.log_vport as u32;
+
+    let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
+
+    tasks.push(log_handle);
+
+    let writer = unsafe { File::from_raw_fd(wfd) };
+
+    // Recreate a logger with the log level get from "/proc/cmdline".
+    let (logger, logger_async_guard) =
+        logging::create_logger(NAME, "agent", config.log_level, writer);
+
+    announce(&logger, &config);
+
+    // This variable is required as it enables the global (and crucially static) logger,
+    // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
+    let global_logger = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
+
+    // Allow the global logger to be modified later (for shutdown)
+    global_logger.cancel_reset();
+
+    let mut ttrpc_log_guard: Result<(), log::SetLoggerError> = Ok(());
+
+    if config.log_level == slog::Level::Trace {
+        // Redirect ttrpc log calls to slog iff full debug requested
+        ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
+    }
+
+    // Start the sandbox and wait for its ttRPC server to end
+    start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
+
+    // Install a NOP logger for the remainder of the shutdown sequence
+    // to ensure any log calls made by local crates using the scope logger
+    // don't fail.
+    let global_logger_guard2 =
+        slog_scope::set_global_logger(slog::Logger::root(slog::Discard, o!()));
+    global_logger_guard2.cancel_reset();
+
+    drop(logger_async_guard);
+
+    drop(ttrpc_log_guard);
+
+    // Trigger a controlled shutdown
+    shutdown_tx
+        .send(true)
+        .map_err(|e| anyhow!(e).context("failed to request shutdown"))?;
+
+    // Wait for all threads to finish
+    let results = join_all(tasks).await;
+
+    for result in results {
+        if let Err(e) = result {
+            return Err(anyhow!(e).into());
+        }
+    }
+
+    eprintln!("{} shutdown complete", NAME);
+
+    Ok(())
+}
+
+fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    let args: Vec<String> = env::args().collect();

    if args.len() == 2 && args[1] == "--version" {
@@ -120,243 +289,78 @@ fn main() -> Result<()> {
        exit(0);
    }

-    env::set_var("RUST_BACKTRACE", "full");
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()?;

-    lazy_static::initialize(&SHELLS);
-
-    lazy_static::initialize(&AGENT_CONFIG);
-
-    // support vsock log
-    let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-
-    let agentConfig = AGENT_CONFIG.clone();
-
-    let init_mode = unistd::getpid() == Pid::from_raw(1);
-    if init_mode {
-        // dup a new file descriptor for this temporary logger writer,
-        // since this logger would be dropped and it's writer would
-        // be closed out of this code block.
-        let newwfd = dup(wfd)?;
-        let writer = unsafe { File::from_raw_fd(newwfd) };
-
-        // Init a temporary logger used by init agent as init process
-        // since before do the base mount, it wouldn't access "/proc/cmdline"
-        // to get the customzied debug level.
-        let logger = logging::create_logger(NAME, "agent", slog::Level::Debug, writer);
-
-        // Must mount proc fs before parsing kernel command line
-        general_mount(&logger).map_err(|e| {
-            error!(logger, "fail general mount: {}", e);
-            e
-        })?;
-
-        let mut config = agentConfig.write().unwrap();
-        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
-
-        init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
-    } else {
-        // once parsed cmdline and set the config, release the write lock
-        // as soon as possible in case other thread would get read lock on
-        // it.
-        let mut config = agentConfig.write().unwrap();
-        config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
-    }
-    let config = agentConfig.read().unwrap();
-
-    let log_vport = config.log_vport as u32;
-    let log_handle = thread::spawn(move || -> Result<()> {
-        let mut reader = unsafe { File::from_raw_fd(rfd) };
-        if log_vport > 0 {
-            let listenfd = socket::socket(
-                AddressFamily::Vsock,
-                SockType::Stream,
-                SockFlag::SOCK_CLOEXEC,
-                None,
-            )?;
-            let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, log_vport);
-            socket::bind(listenfd, &addr)?;
-            socket::listen(listenfd, 1)?;
-            let datafd = socket::accept4(listenfd, SockFlag::SOCK_CLOEXEC)?;
-            let mut log_writer = unsafe { File::from_raw_fd(datafd) };
-            let _ = io::copy(&mut reader, &mut log_writer)?;
-            let _ = unistd::close(listenfd);
-            let _ = unistd::close(datafd);
-        }
-        // copy log to stdout
-        let mut stdout_writer = io::stdout();
-        let _ = io::copy(&mut reader, &mut stdout_writer)?;
-        Ok(())
-    });
-
-    let writer = unsafe { File::from_raw_fd(wfd) };
-    // Recreate a logger with the log level get from "/proc/cmdline".
-    let logger = logging::create_logger(NAME, "agent", config.log_level, writer);
-
-    announce(&logger, &config);
-
-    // This "unused" variable is required as it enables the global (and crucially static) logger,
-    // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
-    let _guard = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
-
-    let mut _log_guard: Result<(), log::SetLoggerError> = Ok(());
-
-    if config.log_level == slog::Level::Trace {
-        // Redirect ttrpc log calls to slog iff full debug requested
-        _log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
-    }
-
-    start_sandbox(&logger, &config, init_mode)?;
-
-    let _ = log_handle.join();
-
-    Ok(())
+    rt.block_on(real_main())
 }

-fn start_sandbox(logger: &Logger, config: &agentConfig, init_mode: bool) -> Result<()> {
+async fn start_sandbox(
+    logger: &Logger,
+    config: &AgentConfig,
+    init_mode: bool,
+    tasks: &mut Vec<JoinHandle<Result<()>>>,
+    shutdown: Receiver<bool>,
+) -> Result<()> {
    let shells = SHELLS.clone();
    let debug_console_vport = config.debug_console_vport as u32;

-    let mut shell_handle: Option<JoinHandle<()>> = None;
-    if config.debug_console {
+    let shell_handle = if config.debug_console {
        let thread_logger = logger.clone();
+        let shells = shells.lock().unwrap().to_vec();

-        let builder = thread::Builder::new();
-
-        let handle = builder.spawn(move || {
-            let shells = shells.lock().unwrap();
-            let result = setup_debug_console(&thread_logger, shells.to_vec(), debug_console_vport);
+        let handle = tokio::task::spawn_blocking(move || {
+            let result = setup_debug_console(&thread_logger, shells, debug_console_vport);
            if result.is_err() {
                // Report error, but don't fail
                warn!(thread_logger, "failed to setup debug console";
                    "error" => format!("{}", result.unwrap_err()));
            }
-        })?;
+        });

-        shell_handle = Some(handle);
-    }
+        Some(handle)
+    } else {
+        None
+    };

    // Initialize unique sandbox structure.
-    let mut s = Sandbox::new(&logger).context("Failed to create sandbox")?;
-
+    let s = Sandbox::new(&logger).context("Failed to create sandbox")?;
    if init_mode {
-        let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
-        rtnl.handle_localhost()?;
-
-        s.rtnl = Some(rtnl);
+        s.rtnl.handle_localhost().await?;
    }

    let sandbox = Arc::new(Mutex::new(s));

-    setup_signal_handler(&logger, sandbox.clone()).unwrap();
-    watch_uevents(sandbox.clone());
+    let signal_handler_task = tokio::spawn(setup_signal_handler(
+        logger.clone(),
+        sandbox.clone(),
+        shutdown.clone(),
+    ));

-    let (tx, rx) = mpsc::channel::<i32>();
-    sandbox.lock().unwrap().sender = Some(tx);
+    tasks.push(signal_handler_task);
+
+    let uevents_handler_task = tokio::spawn(watch_uevents(sandbox.clone(), shutdown.clone()));
+
+    tasks.push(uevents_handler_task);
+
+    let (tx, rx) = tokio::sync::oneshot::channel();
+    sandbox.lock().await.sender = Some(tx);

    // vsock:///dev/vsock, port
-    let mut server = rpc::start(sandbox, config.server_addr.as_str());
+    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
+    server.start().await?;

-    let _ = server.start().unwrap();
-
-    let _ = rx.recv()?;
-
-    server.shutdown();
+    let _ = rx.await?;
+    server.shutdown().await?;

    if let Some(handle) = shell_handle {
-        handle.join().map_err(|e| anyhow!("{:?}", e))?;
+        handle.await.map_err(|e| anyhow!("{:?}", e))?;
    }

    Ok(())
 }

-use nix::sys::wait::WaitPidFlag;
-
-fn setup_signal_handler(logger: &Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
-    let logger = logger.new(o!("subsystem" => "signals"));
-
-    set_child_subreaper(true)
-        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
-
-    let signals = Signals::new(&[SIGCHLD])?;
-
-    thread::spawn(move || {
-        'outer: for sig in signals.forever() {
-            info!(logger, "received signal"; "signal" => sig);
-
-            // sevral signals can be combined together
-            // as one. So loop around to reap all
-            // exited children
-            'inner: loop {
-                let wait_status = match wait::waitpid(
-                    Some(Pid::from_raw(-1)),
-                    Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
-                ) {
-                    Ok(s) => {
-                        if s == WaitStatus::StillAlive {
-                            continue 'outer;
-                        }
-                        s
-                    }
-                    Err(e) => {
-                        info!(
-                            logger,
-                            "waitpid reaper failed";
-                            "error" => e.as_errno().unwrap().desc()
-                        );
-                        continue 'outer;
-                    }
-                };
-                info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
-
-                let pid = wait_status.pid();
-                if let Some(pid) = pid {
-                    let raw_pid = pid.as_raw();
-                    let child_pid = format!("{}", raw_pid);
-
-                    let logger = logger.new(o!("child-pid" => child_pid));
-
-                    let mut sandbox = sandbox.lock().unwrap();
-                    let process = sandbox.find_process(raw_pid);
-                    if process.is_none() {
-                        info!(logger, "child exited unexpectedly");
-                        continue 'inner;
-                    }
-
-                    let mut p = process.unwrap();
-
-                    if p.exit_pipe_w.is_none() {
-                        error!(logger, "the process's exit_pipe_w isn't set");
-                        continue 'inner;
-                    }
-                    let pipe_write = p.exit_pipe_w.unwrap();
-                    let ret: i32;
-
-                    match wait_status {
-                        WaitStatus::Exited(_, c) => ret = c,
-                        WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
-                        _ => {
-                            info!(logger, "got wrong status for process";
-                                  "child-status" => format!("{:?}", wait_status));
-                            continue 'inner;
-                        }
-                    }
-
-                    p.exit_code = ret;
-                    let _ = unistd::close(pipe_write);
-
-                    if let Some(ref poller) = p.epoller {
-                        info!(logger, "close epoller");
-                        // close the socket file to notify readStdio to close terminal specifically
-                        // in case this process's terminal has been inherited by its children.
-                        poller.close_wfd()
-                    }
-                }
-            }
-        }
-    });
-    Ok(())
-}
-
 // init_agent_as_init will do the initializations such as setting up the rootfs
 // when this agent has been run as the init process.
 fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
@@ -374,7 +378,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
    unistd::setsid()?;

    unsafe {
-        libc::ioctl(io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
+        libc::ioctl(std::io::stdin().as_raw_fd(), libc::TIOCSCTTY, 1);
    }

    env::set_var("PATH", "/bin:/sbin/:/usr/bin/:/usr/sbin/");
@@ -405,7 +409,7 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
 }

 lazy_static! {
-    static ref SHELLS: Arc<Mutex<Vec<String>>> = {
+    static ref SHELLS: Arc<SyncMutex<Vec<String>>> = {
        let mut v = Vec::new();

        if !cfg!(test) {
@@ -413,32 +417,21 @@ lazy_static! {
            v.push("/bin/sh".to_string());
        }

-        Arc::new(Mutex::new(v))
+        Arc::new(SyncMutex::new(v))
    };
 }

-// pub static mut LOG_LEVEL: ;
-// pub static mut TRACE_MODE: ;
-
-use crate::config::agentConfig;
+use crate::config::AgentConfig;
 use nix::sys::stat::Mode;
 use std::os::unix::io::{FromRawFd, RawFd};
 use std::path::PathBuf;
 use std::process::exit;

 fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Result<()> {
-    let mut shell: &str = "";
-    for sh in shells.iter() {
-        let binary = PathBuf::from(sh);
-        if binary.exists() {
-            shell = sh;
-            break;
-        }
-    }
-
-    if shell == "" {
-        return Err(anyhow!("no shell found to launch debug console"));
-    }
+    let shell = shells
+        .iter()
+        .find(|sh| PathBuf::from(sh).exists())
+        .ok_or_else(|| anyhow!("no shell found to launch debug console"))?;

    if port > 0 {
        let listenfd = socket::socket(
@@ -479,7 +472,7 @@ fn setup_debug_console(logger: &Logger, shells: Vec<String>, port: u32) -> Resul
    };
 }

-fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
+fn io_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> std::io::Result<u64>
 where
    R: Read,
    W: Write,
@@ -542,10 +535,10 @@ fn run_debug_console_shell(logger: &Logger, shell: &str, socket_fd: RawFd) -> Re
            let debug_shell_logger = logger.clone();

            // channel that used to sync between thread and main process
-            let (tx, rx) = mpsc::channel::<i32>();
+            let (tx, rx) = std::sync::mpsc::channel::<i32>();

            // start a thread to do IO copy between socket and pseduo.master
-            thread::spawn(move || {
+            std::thread::spawn(move || {
                let mut master_reader = unsafe { File::from_raw_fd(master_fd) };
                let mut master_writer = unsafe { File::from_raw_fd(master_fd) };
                let mut socket_reader = unsafe { File::from_raw_fd(socket_fd) };
--- a/src/agent/src/metrics.rs
+++ b/src/agent/src/metrics.rs
@@ -187,9 +187,9 @@ fn update_guest_metrics() {
            info!(sl!(), "failed to get guest KernelStats: {:?}", err);
        }
        Ok(kernel_stats) => {
-            set_gauge_vec_CPU_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
+            set_gauge_vec_cpu_time(&GUEST_CPU_TIME, "total", &kernel_stats.total);
            for (i, cpu_time) in kernel_stats.cpu_time.iter().enumerate() {
-                set_gauge_vec_CPU_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
+                set_gauge_vec_cpu_time(&GUEST_CPU_TIME, format!("{}", i).as_str(), &cpu_time);
            }
        }
    }
@@ -332,7 +332,7 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
        .set(meminfo.k_reclaimable.unwrap_or(0) as f64);
 }

-fn set_gauge_vec_CPU_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
+fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
    gv.with_label_values(&[cpu, "user"])
        .set(cpu_time.user as f64);
    gv.with_label_values(&[cpu, "nice"])
--- a/src/agent/src/mount.rs
+++ b/src/agent/src/mount.rs
@@ -7,12 +7,13 @@ use std::collections::HashMap;
 use std::ffi::CString;
 use std::fs;
 use std::io;
-use std::iter::FromIterator;
 use std::os::unix::fs::PermissionsExt;

 use std::path::Path;
 use std::ptr::null;
-use std::sync::{Arc, Mutex};
+use std::str::FromStr;
+use std::sync::Arc;
+use tokio::sync::Mutex;

 use libc::{c_void, mount};
 use nix::mount::{self, MsFlags};
@@ -21,23 +22,28 @@ use regex::Regex;
 use std::fs::File;
 use std::io::{BufRead, BufReader};

-use crate::device::{get_pci_device_name, get_scsi_device_name, online_device};
+use crate::device::{
+    get_pci_device_name, get_pmem_device_name, get_scsi_device_name, online_device,
+};
 use crate::linux_abi::*;
+use crate::pci;
 use crate::protocols::agent::Storage;
 use crate::Sandbox;
 use anyhow::{anyhow, Context, Result};
 use slog::Logger;

-pub const DRIVER9PTYPE: &str = "9p";
-pub const DRIVERVIRTIOFSTYPE: &str = "virtio-fs";
-pub const DRIVERBLKTYPE: &str = "blk";
-pub const DRIVERMMIOBLKTYPE: &str = "mmioblk";
-pub const DRIVERSCSITYPE: &str = "scsi";
-pub const DRIVERNVDIMMTYPE: &str = "nvdimm";
-pub const DRIVEREPHEMERALTYPE: &str = "ephemeral";
-pub const DRIVERLOCALTYPE: &str = "local";
+pub const DRIVER_9P_TYPE: &str = "9p";
+pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
+pub const DRIVER_BLK_TYPE: &str = "blk";
+pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
+pub const DRIVER_SCSI_TYPE: &str = "scsi";
+pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
+pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
+pub const DRIVER_LOCAL_TYPE: &str = "local";

-pub const TYPEROOTFS: &str = "rootfs";
+pub const TYPE_ROOTFS: &str = "rootfs";
+
+pub const MOUNT_GUEST_TAG: &str = "kataShared";

 #[rustfmt::skip]
 lazy_static! {
@@ -81,7 +87,7 @@ lazy_static! {
 }

 #[derive(Debug, PartialEq)]
-pub struct INIT_MOUNT {
+pub struct InitMount {
    fstype: &'static str,
    src: &'static str,
    dest: &'static str,
@@ -111,42 +117,26 @@ lazy_static!{

 #[rustfmt::skip]
 lazy_static! {
-    pub static ref INIT_ROOTFS_MOUNTS: Vec<INIT_MOUNT> = vec![
-        INIT_MOUNT{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
-        INIT_MOUNT{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
-        INIT_MOUNT{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
-        INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
-        INIT_MOUNT{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
-        INIT_MOUNT{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
+    pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount> = vec![
+        InitMount{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
+        InitMount{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
+        InitMount{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
+        InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/dev/shm", options: vec!["nosuid", "nodev"]},
+        InitMount{fstype: "devpts", src: "devpts", dest: "/dev/pts", options: vec!["nosuid", "noexec"]},
+        InitMount{fstype: "tmpfs", src: "tmpfs", dest: "/run", options: vec!["nosuid", "nodev"]},
    ];
 }

-// StorageHandler is the type of callback to be defined to handle every
-// type of storage driver.
-type StorageHandler = fn(&Logger, &Storage, Arc<Mutex<Sandbox>>) -> Result<String>;
-
-// STORAGEHANDLERLIST lists the supported drivers.
-#[rustfmt::skip]
-lazy_static! {
-    pub static ref STORAGEHANDLERLIST: HashMap<&'static str, StorageHandler> = {
-    	let mut m = HashMap::new();
-    let blk: StorageHandler = virtio_blk_storage_handler;
-        m.insert(DRIVERBLKTYPE, blk);
-	let p9: StorageHandler= virtio9p_storage_handler;
-        m.insert(DRIVER9PTYPE, p9);
-	let virtiofs: StorageHandler = virtiofs_storage_handler;
-        m.insert(DRIVERVIRTIOFSTYPE, virtiofs);
-    let ephemeral: StorageHandler = ephemeral_storage_handler;
-        m.insert(DRIVEREPHEMERALTYPE, ephemeral);
-    let virtiommio: StorageHandler = virtiommio_blk_storage_handler;
-        m.insert(DRIVERMMIOBLKTYPE, virtiommio);
-    let local: StorageHandler = local_storage_handler;
-        m.insert(DRIVERLOCALTYPE, local);
-    let scsi: StorageHandler = virtio_scsi_storage_handler;
-        m.insert(DRIVERSCSITYPE, scsi);
-        m
-    };
-}
+pub const STORAGE_HANDLER_LIST: [&str; 8] = [
+    DRIVER_BLK_TYPE,
+    DRIVER_9P_TYPE,
+    DRIVER_VIRTIOFS_TYPE,
+    DRIVER_EPHEMERAL_TYPE,
+    DRIVER_MMIO_BLK_TYPE,
+    DRIVER_LOCAL_TYPE,
+    DRIVER_SCSI_TYPE,
+    DRIVER_NVDIMM_TYPE,
+];

 #[derive(Debug, Clone)]
 pub struct BareMount<'a> {
@@ -238,12 +228,12 @@ impl<'a> BareMount<'a> {
    }
 }

-fn ephemeral_storage_handler(
+async fn ephemeral_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut sb = sandbox.lock().unwrap();
+    let mut sb = sandbox.lock().await;
    let new_storage = sb.set_sandbox_storage(&storage.mount_point);

    if !new_storage {
@@ -256,12 +246,12 @@ fn ephemeral_storage_handler(
    Ok("".to_string())
 }

-fn local_storage_handler(
+async fn local_storage_handler(
    _logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
-    let mut sb = sandbox.lock().unwrap();
+    let mut sb = sandbox.lock().await;
    let new_storage = sb.set_sandbox_storage(&storage.mount_point);

    if !new_storage {
@@ -289,7 +279,7 @@ fn local_storage_handler(
    Ok("".to_string())
 }

-fn virtio9p_storage_handler(
+async fn virtio9p_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -298,7 +288,7 @@ fn virtio9p_storage_handler(
 }

 // virtiommio_blk_storage_handler handles the storage for mmio blk driver.
-fn virtiommio_blk_storage_handler(
+async fn virtiommio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -308,7 +298,7 @@ fn virtiommio_blk_storage_handler(
 }

 // virtiofs_storage_handler handles the storage for virtio-fs.
-fn virtiofs_storage_handler(
+async fn virtiofs_storage_handler(
    logger: &Logger,
    storage: &Storage,
    _sandbox: Arc<Mutex<Sandbox>>,
@@ -317,14 +307,14 @@ fn virtiofs_storage_handler(
 }

 // virtio_blk_storage_handler handles the storage for blk driver.
-fn virtio_blk_storage_handler(
+async fn virtio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
 ) -> Result<String> {
    let mut storage = storage.clone();
-    // If hot-plugged, get the device node path based on the PCI address else
-    // use the virt path provided in Storage Source
+    // If hot-plugged, get the device node path based on the PCI path
+    // otherwise use the virt path provided in Storage Source
    if storage.source.starts_with("/dev") {
        let metadata = fs::metadata(&storage.source)
            .context(format!("get metadata on file {:?}", &storage.source))?;
@@ -334,7 +324,8 @@ fn virtio_blk_storage_handler(
            return Err(anyhow!("Invalid device {}", &storage.source));
        }
    } else {
-        let dev_path = get_pci_device_name(&sandbox, &storage.source)?;
+        let pcipath = pci::Path::from_str(&storage.source)?;
+        let dev_path = get_pci_device_name(&sandbox, &pcipath).await?;
        storage.source = dev_path;
    }

@@ -342,7 +333,7 @@ fn virtio_blk_storage_handler(
 }

 // virtio_scsi_storage_handler handles the storage for scsi driver.
-fn virtio_scsi_storage_handler(
+async fn virtio_scsi_storage_handler(
    logger: &Logger,
    storage: &Storage,
    sandbox: Arc<Mutex<Sandbox>>,
@@ -350,7 +341,7 @@ fn virtio_scsi_storage_handler(
    let mut storage = storage.clone();

    // Retrieve the device path from SCSI address.
-    let dev_path = get_scsi_device_name(&sandbox, &storage.source)?;
+    let dev_path = get_scsi_device_name(&sandbox, &storage.source).await?;
    storage.source = dev_path;

    common_storage_handler(logger, &storage)
@@ -363,12 +354,46 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
    mount_storage(logger, storage).and(Ok(mount_point))
 }

+// nvdimm_storage_handler handles the storage for NVDIMM driver.
+async fn nvdimm_storage_handler(
+    logger: &Logger,
+    storage: &Storage,
+    sandbox: Arc<Mutex<Sandbox>>,
+) -> Result<String> {
+    let mut storage = storage.clone();
+    // If hot-plugged, get the device node path based on the PCI address else
+    // use the virt path provided in Storage Source
+    let pmem_devname = match storage.source.strip_prefix("/dev/") {
+        Some(dev) => dev,
+        None => {
+            return Err(anyhow!(
+                "Storage source '{}' must start with /dev/",
+                storage.source
+            ))
+        }
+    };
+
+    // Retrieve the device path from NVDIMM address.
+    let dev_path = get_pmem_device_name(&sandbox, pmem_devname).await?;
+    storage.source = dev_path;
+
+    common_storage_handler(logger, &storage)
+}
+
 // mount_storage performs the mount described by the storage structure.
 fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

+    // Check share before attempting to mount to see if the destination is already a mount point.
+    // If so, skip doing the mount. This facilitates mounting the sharedfs automatically
+    // in the guest before the agent service starts.
+    if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? {
+        warn!(logger, "kataShared already mounted, ignoring...");
+        return Ok(());
+    }
+
    match storage.fstype.as_str() {
-        DRIVER9PTYPE | DRIVERVIRTIOFSTYPE => {
+        DRIVER_9P_TYPE | DRIVER_VIRTIOFS_TYPE => {
            let dest_path = Path::new(storage.mount_point.as_str());
            if !dest_path.exists() {
                fs::create_dir_all(dest_path).context("Create mount destination failed")?;
@@ -380,7 +405,7 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    }

    let options_vec = storage.options.to_vec();
-    let options_vec = Vec::from_iter(options_vec.iter().map(String::as_str));
+    let options_vec = options_vec.iter().map(String::as_str).collect();
    let (flags, options) = parse_mount_flags_and_options(options_vec);

    info!(logger, "mounting storage";
@@ -402,6 +427,24 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    bare_mount.mount()
 }

+/// Looks for `mount_point` entry in the /proc/mounts.
+fn is_mounted(mount_point: &str) -> Result<bool> {
+    let mount_point = mount_point.trim_end_matches('/');
+    let found = fs::metadata(mount_point).is_ok()
+        // Looks through /proc/mounts and check if the mount exists
+        && fs::read_to_string("/proc/mounts")?
+            .lines()
+            .any(|line| {
+                // The 2nd column reveals the mount point.
+                line.split_whitespace()
+                    .nth(1)
+                    .map(|target| mount_point.eq(target))
+                    .unwrap_or(false)
+            });
+
+    Ok(found)
+}
+
 fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
    let mut flags = MsFlags::empty();
    let mut options: String = "".to_string();
@@ -430,7 +473,7 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
 // associated operations such as waiting for the device to show up, and mount
 // it to a specific location, according to the type of handler chosen, and for
 // each storage.
-pub fn add_storages(
+pub async fn add_storages(
    logger: Logger,
    storages: Vec<Storage>,
    sandbox: Arc<Mutex<Sandbox>>,
@@ -443,17 +486,33 @@ pub fn add_storages(
            "subsystem" => "storage",
            "storage-type" => handler_name.to_owned()));

-        let handler = STORAGEHANDLERLIST
-            .get(&handler_name.as_str())
-            .ok_or_else(|| {
-                anyhow!(
+        let res = match handler_name.as_str() {
+            DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_VIRTIOFS_TYPE => {
+                virtiofs_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_EPHEMERAL_TYPE => {
+                ephemeral_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_MMIO_BLK_TYPE => {
+                virtiommio_blk_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_LOCAL_TYPE => local_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_SCSI_TYPE => {
+                virtio_scsi_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
+            DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await,
+            _ => {
+                return Err(anyhow!(
                    "Failed to find the storage handler {}",
                    storage.driver.to_owned()
-                )
-            })?;
+                ));
+            }
+        };

        // Todo need to rollback the mounted storage if err met.
-        let mount_point = handler(&logger, &storage, sandbox.clone())?;
+        let mount_point = res?;

        if !mount_point.is_empty() {
            mount_list.push(mount_point);
@@ -463,7 +522,7 @@ pub fn add_storages(
    Ok(mount_list)
 }

-fn mount_to_rootfs(logger: &Logger, m: &INIT_MOUNT) -> Result<()> {
+fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    let options_vec: Vec<&str> = m.options.clone();

    let (flags, options) = parse_mount_flags_and_options(options_vec);
@@ -506,7 +565,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {
 // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
 // any error ecountered.
 pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
-    if mount_point == "" {
+    if mount_point.is_empty() {
        return Err(anyhow!("Invalid mount point {}", mount_point));
    }

@@ -539,11 +598,11 @@ pub fn get_cgroup_mounts(
    logger: &Logger,
    cg_path: &str,
    unified_cgroup_hierarchy: bool,
-) -> Result<Vec<INIT_MOUNT>> {
+) -> Result<Vec<InitMount>> {
    // cgroup v2
    // https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249
    if unified_cgroup_hierarchy {
-        return Ok(vec![INIT_MOUNT {
+        return Ok(vec![InitMount {
            fstype: "cgroup2",
            src: "cgroup2",
            dest: "/sys/fs/cgroup",
@@ -555,7 +614,7 @@ pub fn get_cgroup_mounts(
    let reader = BufReader::new(file);

    let mut has_device_cgroup = false;
-    let mut cg_mounts: Vec<INIT_MOUNT> = vec![INIT_MOUNT {
+    let mut cg_mounts: Vec<InitMount> = vec![InitMount {
        fstype: "tmpfs",
        src: "tmpfs",
        dest: SYSFS_CGROUPPATH,
@@ -591,7 +650,7 @@ pub fn get_cgroup_mounts(
            }
        }

-        if fields[0] == "" {
+        if fields[0].is_empty() {
            continue;
        }

@@ -601,7 +660,7 @@ pub fn get_cgroup_mounts(

        if let Some(value) = CGROUPS.get(&fields[0]) {
            let key = CGROUPS.keys().find(|&&f| f == fields[0]).unwrap();
-            cg_mounts.push(INIT_MOUNT {
+            cg_mounts.push(InitMount {
                fstype: "cgroup",
                src: "cgroup",
                dest: *value,
@@ -615,7 +674,7 @@ pub fn get_cgroup_mounts(
        return Ok(Vec::new());
    }

-    cg_mounts.push(INIT_MOUNT {
+    cg_mounts.push(InitMount {
        fstype: "tmpfs",
        src: "tmpfs",
        dest: SYSFS_CGROUPPATH,
@@ -798,7 +857,7 @@ mod tests {
            let src_filename: String;
            let dest_filename: String;

-            if d.src != "" {
+            if !d.src.is_empty() {
                src = dir.path().join(d.src.to_string());
                src_filename = src
                    .to_str()
@@ -808,7 +867,7 @@ mod tests {
                src_filename = "".to_owned();
            }

-            if d.dest != "" {
+            if !d.dest.is_empty() {
                dest = dir.path().join(d.dest.to_string());
                dest_filename = dest
                    .to_str()
@@ -820,7 +879,7 @@ mod tests {

            // Create the mount directories
            for d in [src_filename.clone(), dest_filename.clone()].iter() {
-                if d == "" {
+                if d.is_empty() {
                    continue;
                }

@@ -840,7 +899,7 @@ mod tests {

            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
+            if d.error_contains.is_empty() {
                assert!(result.is_ok(), msg);

                // Cleanup
@@ -865,6 +924,14 @@ mod tests {
        }
    }

+    #[test]
+    fn test_is_mounted() {
+        assert!(is_mounted("/proc").unwrap());
+        assert!(!is_mounted("").unwrap());
+        assert!(!is_mounted("!").unwrap());
+        assert!(!is_mounted("/not_existing_path").unwrap());
+    }
+
    #[test]
    fn test_remove_mounts() {
        skip_if_not_root!();
@@ -958,7 +1025,7 @@ mod tests {

            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
+            if d.error_contains.is_empty() {
                assert!(result.is_ok(), msg);
                continue;
            }
@@ -1066,7 +1133,7 @@ mod tests {
            // add more details if an assertion fails
            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains == "" {
+            if d.error_contains.is_empty() {
                let fs_type = result.unwrap();

                assert!(d.fs_type == fs_type, msg);
@@ -1113,21 +1180,21 @@ mod tests {
        let drain = slog::Discard;
        let logger = slog::Logger::root(drain, o!());

-        let first_mount = INIT_MOUNT {
+        let first_mount = InitMount {
            fstype: "tmpfs",
            src: "tmpfs",
            dest: SYSFS_CGROUPPATH,
            options: vec!["nosuid", "nodev", "noexec", "mode=755"],
        };

-        let last_mount = INIT_MOUNT {
+        let last_mount = InitMount {
            fstype: "tmpfs",
            src: "tmpfs",
            dest: SYSFS_CGROUPPATH,
            options: vec!["remount", "ro", "nosuid", "nodev", "noexec", "mode=755"],
        };

-        let cg_devices_mount = INIT_MOUNT {
+        let cg_devices_mount = InitMount {
            fstype: "cgroup",
            src: "cgroup",
            dest: "/sys/fs/cgroup/devices",
@@ -1223,7 +1290,7 @@ mod tests {
            let result = get_cgroup_mounts(&logger, filename, false);
            let msg = format!("{}: result: {:?}", msg, result);

-            if d.error_contains != "" {
+            if !d.error_contains.is_empty() {
                assert!(result.is_err(), msg);

                let error_msg = format!("{}", result.unwrap_err());
--- a/src/agent/src/namespace.rs
+++ b/src/agent/src/namespace.rs
@@ -11,7 +11,6 @@ use std::fmt;
 use std::fs;
 use std::fs::File;
 use std::path::{Path, PathBuf};
-use std::thread::{self};

 use crate::mount::{BareMount, FLAGS};
 use slog::Logger;
@@ -58,7 +57,7 @@ impl Namespace {

    pub fn get_uts(mut self, hostname: &str) -> Self {
        self.ns_type = NamespaceType::UTS;
-        if hostname != "" {
+        if !hostname.is_empty() {
            self.hostname = Some(String::from(hostname));
        }
        self
@@ -69,6 +68,7 @@ impl Namespace {
        self
    }

+    #[allow(dead_code)]
    pub fn set_root_dir(mut self, dir: &str) -> Self {
        self.persistent_ns_dir = dir.to_string();
        self
@@ -76,7 +76,7 @@ impl Namespace {

    // setup creates persistent namespace without switching to it.
    // Note, pid namespaces cannot be persisted.
-    pub fn setup(mut self) -> Result<Self> {
+    pub async fn setup(mut self) -> Result<Self> {
        fs::create_dir_all(&self.persistent_ns_dir)?;

        let ns_path = PathBuf::from(&self.persistent_ns_dir);
@@ -93,45 +93,51 @@ impl Namespace {
        self.path = new_ns_path.clone().into_os_string().into_string().unwrap();
        let hostname = self.hostname.clone();

-        let new_thread = thread::spawn(move || -> Result<()> {
-            let origin_ns_path = get_current_thread_ns_path(&ns_type.get());
+        let new_thread = tokio::spawn(async move {
+            if let Err(err) = || -> Result<()> {
+                let origin_ns_path = get_current_thread_ns_path(&ns_type.get());

-            File::open(Path::new(&origin_ns_path))?;
+                File::open(Path::new(&origin_ns_path))?;

-            // Create a new netns on the current thread.
-            let cf = ns_type.get_flags();
+                // Create a new netns on the current thread.
+                let cf = ns_type.get_flags();

-            unshare(cf)?;
+                unshare(cf)?;

-            if ns_type == NamespaceType::UTS && hostname.is_some() {
-                nix::unistd::sethostname(hostname.unwrap())?;
+                if ns_type == NamespaceType::UTS && hostname.is_some() {
+                    nix::unistd::sethostname(hostname.unwrap())?;
+                }
+                // Bind mount the new namespace from the current thread onto the mount point to persist it.
+                let source: &str = origin_ns_path.as_str();
+                let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
+
+                let mut flags = MsFlags::empty();
+
+                if let Some(x) = FLAGS.get("rbind") {
+                    let (_, f) = *x;
+                    flags |= f;
+                };
+
+                let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
+                bare_mount.mount().map_err(|e| {
+                    anyhow!(
+                        "Failed to mount {} to {} with err:{:?}",
+                        source,
+                        destination,
+                        e
+                    )
+                })?;
+
+                Ok(())
+            }() {
+                return Err(err);
            }
-            // Bind mount the new namespace from the current thread onto the mount point to persist it.
-            let source: &str = origin_ns_path.as_str();
-            let destination: &str = new_ns_path.as_path().to_str().unwrap_or("none");
-
-            let mut flags = MsFlags::empty();
-
-            if let Some(x) = FLAGS.get("rbind") {
-                let (_, f) = *x;
-                flags |= f;
-            };
-
-            let bare_mount = BareMount::new(source, destination, "none", flags, "", &logger);
-            bare_mount.mount().map_err(|e| {
-                anyhow!(
-                    "Failed to mount {} to {} with err:{:?}",
-                    source,
-                    destination,
-                    e
-                )
-            })?;

            Ok(())
        });

        new_thread
-            .join()
+            .await
            .map_err(|e| anyhow!("Failed to join thread {:?}!", e))??;

        Ok(self)
@@ -185,8 +191,8 @@ mod tests {
    use nix::sched::CloneFlags;
    use tempfile::Builder;

-    #[test]
-    fn test_setup_persistent_ns() {
+    #[tokio::test]
+    async fn test_setup_persistent_ns() {
        skip_if_not_root!();
        // Create dummy logger and temp folder.
        let logger = slog::Logger::root(slog::Discard, o!());
@@ -195,7 +201,8 @@ mod tests {
        let ns_ipc = Namespace::new(&logger)
            .get_ipc()
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_ipc.is_ok());
        assert!(remove_mounts(&[ns_ipc.unwrap().path]).is_ok());
@@ -206,7 +213,8 @@ mod tests {
        let ns_uts = Namespace::new(&logger)
            .get_uts("test_hostname")
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_uts.is_ok());
        assert!(remove_mounts(&[ns_uts.unwrap().path]).is_ok());
@@ -218,7 +226,8 @@ mod tests {
        let ns_pid = Namespace::new(&logger)
            .get_pid()
            .set_root_dir(tmpdir.path().to_str().unwrap())
-            .setup();
+            .setup()
+            .await;

        assert!(ns_pid.is_err());
    }
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
--- a/src/agent/src/network.rs
+++ b/src/agent/src/network.rs
@@ -139,10 +139,10 @@ mod tests {
        assert_eq!(true, content.is_ok());
        let content = content.unwrap();

-        let expected_DNS: Vec<&str> = content.split('\n').collect();
+        let expected_dns: Vec<&str> = content.split('\n').collect();

        // assert the data are the same as /run/kata-containers/sandbox/resolv.conf
-        assert_eq!(dns, expected_DNS);
+        assert_eq!(dns, expected_dns);

        // umount /etc/resolv.conf
        let _ = mount::umount(dst_filename);
--- a/src/agent/src/pci.rs
+++ b/src/agent/src/pci.rs
@@ -0,0 +1,168 @@
+// Copyright Red Hat.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+use std::convert::TryInto;
+use std::fmt;
+use std::ops::Deref;
+use std::str::FromStr;
+
+use anyhow::anyhow;
+
+// The PCI spec reserves 5 bits for slot number (a.k.a. device
+// number), giving slots 0..31
+const SLOT_BITS: u8 = 5;
+const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
+
+// Represents a PCI function's slot number (a.k.a. device number),
+// giving its location on a single bus
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct Slot(u8);
+
+impl Slot {
+    pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
+        if let Ok(v8) = v.try_into() {
+            if v8 <= SLOT_MAX {
+                return Ok(Slot(v8));
+            }
+        }
+        Err(anyhow!(
+            "PCI slot {} should be in range [0..{:#x}]",
+            v,
+            SLOT_MAX
+        ))
+    }
+}
+
+impl FromStr for Slot {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let v = isize::from_str_radix(s, 16)?;
+        Slot::new(v)
+    }
+}
+
+impl fmt::Display for Slot {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "{:02x}", self.0)
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Path(Vec<Slot>);
+
+impl Path {
+    pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
+        if slots.is_empty() {
+            return Err(anyhow!("PCI path must have at least one element"));
+        }
+        Ok(Path(slots))
+    }
+}
+
+// Let Path be treated as a slice of Slots
+impl Deref for Path {
+    type Target = [Slot];
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl fmt::Display for Path {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        let sslots: Vec<String> = self
+            .0
+            .iter()
+            .map(std::string::ToString::to_string)
+            .collect();
+        write!(f, "{}", sslots.join("/"))
+    }
+}
+
+impl FromStr for Path {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
+        Path::new(rslots?)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::pci::{Path, Slot};
+    use std::str::FromStr;
+
+    #[test]
+    fn test_slot() {
+        // Valid slots
+        let slot = Slot::new(0x00).unwrap();
+        assert_eq!(format!("{}", slot), "00");
+
+        let slot = Slot::from_str("00").unwrap();
+        assert_eq!(format!("{}", slot), "00");
+
+        let slot = Slot::new(31).unwrap();
+        let slot2 = Slot::from_str("1f").unwrap();
+        assert_eq!(slot, slot2);
+
+        // Bad slots
+        let slot = Slot::new(-1);
+        assert!(slot.is_err());
+
+        let slot = Slot::new(32);
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("20");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("xy");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("00/");
+        assert!(slot.is_err());
+
+        let slot = Slot::from_str("");
+        assert!(slot.is_err());
+    }
+
+    #[test]
+    fn test_path() {
+        let slot3 = Slot::new(0x03).unwrap();
+        let slot4 = Slot::new(0x04).unwrap();
+        let slot5 = Slot::new(0x05).unwrap();
+
+        // Valid paths
+        let pcipath = Path::new(vec![slot3]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03");
+        let pcipath2 = Path::from_str("03").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 1);
+        assert_eq!(pcipath[0], slot3);
+
+        let pcipath = Path::new(vec![slot3, slot4]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03/04");
+        let pcipath2 = Path::from_str("03/04").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 2);
+        assert_eq!(pcipath[0], slot3);
+        assert_eq!(pcipath[1], slot4);
+
+        let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
+        assert_eq!(format!("{}", pcipath), "03/04/05");
+        let pcipath2 = Path::from_str("03/04/05").unwrap();
+        assert_eq!(pcipath, pcipath2);
+        assert_eq!(pcipath.len(), 3);
+        assert_eq!(pcipath[0], slot3);
+        assert_eq!(pcipath[1], slot4);
+        assert_eq!(pcipath[2], slot5);
+
+        // Bad paths
+        assert!(Path::new(vec!()).is_err());
+        assert!(Path::from_str("20").is_err());
+        assert!(Path::from_str("//").is_err());
+        assert!(Path::from_str("xyz").is_err());
+    }
+}
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -4,12 +4,12 @@
 //

 use crate::linux_abi::*;
-use crate::mount::{get_mount_fs_type, remove_mounts, TYPEROOTFS};
+use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS};
 use crate::namespace::Namespace;
+use crate::netlink::Handle;
 use crate::network::Network;
 use anyhow::{anyhow, Context, Result};
 use libc::pid_t;
-use netlink::{RtnlHandle, NETLINK_ROUTE};
 use oci::{Hook, Hooks};
 use protocols::agent::OnlineCPUMemRequest;
 use regex::Regex;
@@ -22,9 +22,10 @@ use std::collections::HashMap;
 use std::fs;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
-use std::sync::mpsc::{self, Receiver, Sender};
-use std::sync::{Arc, Mutex};
+use std::sync::Arc;
 use std::{thread, time};
+use tokio::sync::mpsc::{channel, Receiver, Sender};
+use tokio::sync::Mutex;

 #[derive(Debug)]
 pub struct Sandbox {
@@ -42,18 +43,18 @@ pub struct Sandbox {
    pub storages: HashMap<String, u32>,
    pub running: bool,
    pub no_pivot_root: bool,
-    pub sender: Option<Sender<i32>>,
-    pub rtnl: Option<RtnlHandle>,
+    pub sender: Option<tokio::sync::oneshot::Sender<i32>>,
+    pub rtnl: Handle,
    pub hooks: Option<Hooks>,
    pub event_rx: Arc<Mutex<Receiver<String>>>,
-    pub event_tx: Sender<String>,
+    pub event_tx: Option<Sender<String>>,
 }

 impl Sandbox {
    pub fn new(logger: &Logger) -> Result<Self> {
        let fs_type = get_mount_fs_type("/")?;
        let logger = logger.new(o!("subsystem" => "sandbox"));
-        let (tx, rx) = mpsc::channel::<String>();
+        let (tx, rx) = channel::<String>(100);
        let event_rx = Arc::new(Mutex::new(rx));

        Ok(Sandbox {
@@ -70,12 +71,12 @@ impl Sandbox {
            sandbox_pidns: None,
            storages: HashMap::new(),
            running: false,
-            no_pivot_root: fs_type.eq(TYPEROOTFS),
+            no_pivot_root: fs_type.eq(TYPE_ROOTFS),
            sender: None,
-            rtnl: Some(RtnlHandle::new(NETLINK_ROUTE, 0).unwrap()),
+            rtnl: Handle::new()?,
            hooks: None,
            event_rx,
-            event_tx: tx,
+            event_tx: Some(tx),
        })
    }

@@ -149,25 +150,19 @@ impl Sandbox {
        Ok(())
    }

-    pub fn is_running(&self) -> bool {
-        self.running
-    }
-
-    pub fn set_hostname(&mut self, hostname: String) {
-        self.hostname = hostname;
-    }
-
-    pub fn setup_shared_namespaces(&mut self) -> Result<bool> {
+    pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
        // Set up shared IPC namespace
        self.shared_ipcns = Namespace::new(&self.logger)
            .get_ipc()
            .setup()
+            .await
            .context("Failed to setup persistent IPC namespace")?;

        // // Set up shared UTS namespace
        self.shared_utsns = Namespace::new(&self.logger)
            .get_uts(self.hostname.as_str())
            .setup()
+            .await
            .context("Failed to setup persistent UTS namespace")?;

        Ok(true)
@@ -214,9 +209,9 @@ impl Sandbox {
        None
    }

-    pub fn destroy(&mut self) -> Result<()> {
+    pub async fn destroy(&mut self) -> Result<()> {
        for ctr in self.containers.values_mut() {
-            ctr.destroy()?;
+            ctr.destroy().await?;
        }
        Ok(())
    }
@@ -315,15 +310,32 @@ impl Sandbox {
        Ok(hooks)
    }

-    pub fn run_oom_event_monitor(&self, rx: Receiver<String>, container_id: String) {
-        let tx = self.event_tx.clone();
+    pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
        let logger = self.logger.clone();

-        thread::spawn(move || {
-            for event in rx {
+        if self.event_tx.is_none() {
+            error!(
+                logger,
+                "sandbox.event_tx not found in run_oom_event_monitor"
+            );
+            return;
+        }
+
+        let tx = self.event_tx.as_ref().unwrap().clone();
+
+        tokio::spawn(async move {
+            loop {
+                let event = rx.recv().await;
+                // None means the container has exited,
+                // and sender in OOM notifier is dropped.
+                if event.is_none() {
+                    return;
+                }
                info!(logger, "got an OOM event {:?}", event);
+
                let _ = tx
                    .send(container_id.clone())
+                    .await
                    .map_err(|e| error!(logger, "failed to send message: {:?}", e));
            }
        });
@@ -383,7 +395,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
            logger,
            SYSFS_CPU_ONLINE_PATH,
            r"cpu[0-9]+",
-            (num - onlined_count),
+            num - onlined_count,
        );
        if r.is_err() {
            return r;
@@ -428,8 +440,8 @@ mod tests {
        baremount.mount()
    }

-    #[test]
-    fn set_sandbox_storage() {
+    #[tokio::test]
+    async fn set_sandbox_storage() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -462,8 +474,8 @@ mod tests {
        );
    }

-    #[test]
-    fn remove_sandbox_storage() {
+    #[tokio::test]
+    async fn remove_sandbox_storage() {
        skip_if_not_root!();

        let logger = slog::Logger::root(slog::Discard, o!());
@@ -518,9 +530,9 @@ mod tests {
        assert!(s.remove_sandbox_storage(destdir_path).is_ok());
    }

-    #[test]
+    #[tokio::test]
    #[allow(unused_assignments)]
-    fn unset_and_remove_sandbox_storage() {
+    async fn unset_and_remove_sandbox_storage() {
        skip_if_not_root!();

        let logger = slog::Logger::root(slog::Discard, o!());
@@ -570,8 +582,8 @@ mod tests {
        assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err());
    }

-    #[test]
-    fn unset_sandbox_storage() {
+    #[tokio::test]
+    async fn unset_sandbox_storage() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -624,13 +636,16 @@ mod tests {
    }

    fn create_dummy_opts() -> CreateOpts {
-        let mut root = Root::default();
-        root.path = String::from("/");
+        let root = Root {
+            path: String::from("/"),
+            ..Default::default()
+        };

-        let linux = Linux::default();
-        let mut spec = Spec::default();
-        spec.root = Some(root);
-        spec.linux = Some(linux);
+        let spec = Spec {
+            linux: Some(Linux::default()),
+            root: Some(root),
+            ..Default::default()
+        };

        CreateOpts {
            cgroup_name: "".to_string(),
@@ -653,8 +668,8 @@ mod tests {
        .unwrap()
    }

-    #[test]
-    fn get_container_entry_exist() {
+    #[tokio::test]
+    async fn get_container_entry_exist() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -666,8 +681,8 @@ mod tests {
        assert!(cnt.is_some());
    }

-    #[test]
-    fn get_container_no_entry() {
+    #[tokio::test]
+    async fn get_container_no_entry() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();

@@ -675,8 +690,8 @@ mod tests {
        assert!(cnt.is_none());
    }

-    #[test]
-    fn add_and_get_container() {
+    #[tokio::test]
+    async fn add_and_get_container() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -685,8 +700,9 @@ mod tests {
        s.add_container(linux_container);
        assert!(s.get_container("some_id").is_some());
    }
-    #[test]
-    fn update_shared_pidns() {
+
+    #[tokio::test]
+    async fn update_shared_pidns() {
        skip_if_not_root!();
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
@@ -702,8 +718,9 @@ mod tests {
        let ns_path = format!("/proc/{}/ns/pid", test_pid);
        assert_eq!(s.sandbox_pidns.unwrap().path, ns_path);
    }
-    #[test]
-    fn add_guest_hooks() {
+
+    #[tokio::test]
+    async fn add_guest_hooks() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
        let tmpdir = Builder::new().tempdir().unwrap();
@@ -725,30 +742,11 @@ mod tests {
        assert!(s.hooks.as_ref().unwrap().poststop.is_empty());
    }

-    #[test]
-    pub fn test_sandbox_is_running() {
+    #[tokio::test]
+    async fn test_sandbox_set_destroy() {
        let logger = slog::Logger::root(slog::Discard, o!());
        let mut s = Sandbox::new(&logger).unwrap();
-        s.running = true;
-        assert!(s.is_running());
-        s.running = false;
-        assert!(!s.is_running());
-    }
-
-    #[test]
-    fn test_sandbox_set_hostname() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let mut s = Sandbox::new(&logger).unwrap();
-        let hostname = "abc123";
-        s.set_hostname(hostname.to_string());
-        assert_eq!(s.hostname, hostname);
-    }
-
-    #[test]
-    fn test_sandbox_set_destroy() {
-        let logger = slog::Logger::root(slog::Discard, o!());
-        let mut s = Sandbox::new(&logger).unwrap();
-        let ret = s.destroy();
+        let ret = s.destroy().await;
        assert!(ret.is_ok());
    }
 }
--- a/src/agent/src/signal.rs
+++ b/src/agent/src/signal.rs
@@ -0,0 +1,159 @@
+// Copyright (c) 2019-2020 Ant Financial
+// Copyright (c) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::sandbox::Sandbox;
+use anyhow::{anyhow, Result};
+use nix::sys::wait::WaitPidFlag;
+use nix::sys::wait::{self, WaitStatus};
+use nix::unistd;
+use prctl::set_child_subreaper;
+use slog::{error, info, o, Logger};
+use std::sync::Arc;
+use tokio::select;
+use tokio::signal::unix::{signal, SignalKind};
+use tokio::sync::watch::Receiver;
+use tokio::sync::Mutex;
+use unistd::Pid;
+
+async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result<()> {
+    info!(logger, "handling signal"; "signal" => "SIGCHLD");
+
+    loop {
+        let result = wait::waitpid(
+            Some(Pid::from_raw(-1)),
+            Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
+        );
+
+        let wait_status = match result {
+            Ok(s) => {
+                if s == WaitStatus::StillAlive {
+                    return Ok(());
+                }
+                s
+            }
+            Err(e) => return Err(anyhow!(e).context("waitpid reaper failed")),
+        };
+
+        info!(logger, "wait_status"; "wait_status result" => format!("{:?}", wait_status));
+
+        if let Some(pid) = wait_status.pid() {
+            let raw_pid = pid.as_raw();
+            let child_pid = format!("{}", raw_pid);
+
+            let logger = logger.new(o!("child-pid" => child_pid));
+
+            let sandbox_ref = sandbox.clone();
+            let mut sandbox = sandbox_ref.lock().await;
+
+            let process = sandbox.find_process(raw_pid);
+            if process.is_none() {
+                info!(logger, "child exited unexpectedly");
+                continue;
+            }
+
+            let mut p = process.unwrap();
+
+            if p.exit_pipe_w.is_none() {
+                info!(logger, "process exit pipe not set");
+                continue;
+            }
+
+            let pipe_write = p.exit_pipe_w.unwrap();
+            let ret: i32;
+
+            match wait_status {
+                WaitStatus::Exited(_, c) => ret = c,
+                WaitStatus::Signaled(_, sig, _) => ret = sig as i32,
+                _ => {
+                    info!(logger, "got wrong status for process";
+                                  "child-status" => format!("{:?}", wait_status));
+                    continue;
+                }
+            }
+
+            p.exit_code = ret;
+            let _ = unistd::close(pipe_write);
+
+            info!(logger, "notify term to close");
+            // close the socket file to notify readStdio to close terminal specifically
+            // in case this process's terminal has been inherited by its children.
+            p.notify_term_close();
+        }
+    }
+}
+
+pub async fn setup_signal_handler(
+    logger: Logger,
+    sandbox: Arc<Mutex<Sandbox>>,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "signals"));
+
+    set_child_subreaper(true)
+        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;
+
+    let mut sigchild_stream = signal(SignalKind::child())?;
+
+    loop {
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "got shutdown request");
+                break;
+            }
+
+            _ = sigchild_stream.recv() => {
+                let result = handle_sigchild(logger.clone(), sandbox.clone()).await;
+
+                match result {
+                    Ok(()) => (),
+                    Err(e) => {
+                        // Log errors, but don't abort - just wait for more signals!
+                        error!(logger, "failed to handle signal"; "error" => format!("{:?}", e));
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::pin;
+    use tokio::sync::watch::channel;
+    use tokio::time::Duration;
+
+    #[tokio::test]
+    async fn test_setup_signal_handler() {
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let s = Sandbox::new(&logger).unwrap();
+
+        let sandbox = Arc::new(Mutex::new(s));
+
+        let (tx, rx) = channel(true);
+
+        let handle = tokio::spawn(setup_signal_handler(logger, sandbox, rx));
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        tx.send(true).expect("failed to request shutdown");
+
+        loop {
+            select! {
+                _ = handle => {
+                    println!("INFO: task completed");
+                    break;
+                },
+                _ = &mut timeout => {
+                    panic!("signal thread failed to stop");
+                }
+            }
+        }
+    }
+}
--- a/src/agent/src/uevent.rs
+++ b/src/agent/src/uevent.rs
@@ -7,10 +7,16 @@ use crate::device::online_device;
 use crate::linux_abi::*;
 use crate::sandbox::Sandbox;
 use crate::GLOBAL_DEVICE_WATCHER;
-use netlink::{RtnlHandle, NETLINK_UEVENT};
 use slog::Logger;
-use std::sync::{Arc, Mutex};
-use std::thread;
+
+use anyhow::Result;
+use netlink_sys::{protocols, SocketAddr, TokioSocket};
+use nix::errno::Errno;
+use std::os::unix::io::FromRawFd;
+use std::sync::Arc;
+use tokio::select;
+use tokio::sync::watch::Receiver;
+use tokio::sync::Mutex;

 #[derive(Debug, Default)]
 struct Uevent {
@@ -51,16 +57,20 @@ impl Uevent {
        let pci_root_bus_path = create_pci_root_bus_path();
        self.action == U_EVENT_ACTION_ADD
            && self.subsystem == "block"
-            && self.devpath.starts_with(&pci_root_bus_path)
-            && self.devname != ""
+            && {
+                self.devpath.starts_with(pci_root_bus_path.as_str())
+                    || self.devpath.starts_with(ACPI_DEV_PATH) // NVDIMM/PMEM devices
+            }
+            && !self.devname.is_empty()
    }

-    fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
+    async fn handle_block_add_event(&self, sandbox: &Arc<Mutex<Sandbox>>) {
        let pci_root_bus_path = create_pci_root_bus_path();

        // Keep the same lock order as device::get_device_name(), otherwise it may cause deadlock.
-        let mut w = GLOBAL_DEVICE_WATCHER.lock().unwrap();
-        let mut sb = sandbox.lock().unwrap();
+        let watcher = GLOBAL_DEVICE_WATCHER.clone();
+        let mut w = watcher.lock().await;
+        let mut sb = sandbox.lock().await;

        // Add the device node name to the pci device map.
        sb.pci_device_map
@@ -70,20 +80,28 @@ impl Uevent {
        // Close the channel after watcher has been notified.
        let devpath = self.devpath.clone();
        let empties: Vec<_> = w
-            .iter()
+            .iter_mut()
            .filter(|(dev_addr, _)| {
                let pci_p = format!("{}/{}", pci_root_bus_path, *dev_addr);

                // blk block device
                devpath.starts_with(pci_p.as_str()) ||
-                    // scsi block device
-                    {
-                        (*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
-                            devpath.contains(*dev_addr)
-                    }
+                // scsi block device
+                {
+                    (*dev_addr).ends_with(SCSI_BLOCK_SUFFIX) &&
+                        devpath.contains(*dev_addr)
+                } ||
+                // nvdimm/pmem device
+                {
+                    let pmem_suffix = format!("/{}/{}", SCSI_BLOCK_SUFFIX, self.devname);
+                    devpath.starts_with(ACPI_DEV_PATH) &&
+                        devpath.ends_with(pmem_suffix.as_str()) &&
+                        dev_addr.ends_with(pmem_suffix.as_str())
+                }
            })
            .map(|(k, sender)| {
                let devname = self.devname.clone();
+                let sender = sender.take().unwrap();
                let _ = sender.send(devname);
                k.clone()
            })
@@ -95,9 +113,9 @@ impl Uevent {
        }
    }

-    fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
+    async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
        if self.is_block_add_event() {
-            return self.handle_block_add_event(sandbox);
+            return self.handle_block_add_event(sandbox).await;
        } else if self.action == U_EVENT_ACTION_ADD {
            let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
            // It's a memory hot-add event.
@@ -117,34 +135,67 @@ impl Uevent {
    }
 }

-pub fn watch_uevents(sandbox: Arc<Mutex<Sandbox>>) {
-    thread::spawn(move || {
-        let rtnl = RtnlHandle::new(NETLINK_UEVENT, 1).unwrap();
-        let logger = sandbox
-            .lock()
-            .unwrap()
-            .logger
-            .new(o!("subsystem" => "uevent"));
+pub async fn watch_uevents(
+    sandbox: Arc<Mutex<Sandbox>>,
+    mut shutdown: Receiver<bool>,
+) -> Result<()> {
+    let sref = sandbox.clone();
+    let s = sref.lock().await;
+    let logger = s.logger.new(o!("subsystem" => "uevent"));

-        loop {
-            match rtnl.recv_message() {
-                Err(e) => {
-                    error!(logger, "receive uevent message failed"; "error" => format!("{}", e))
-                }
-                Ok(data) => {
-                    let text = String::from_utf8(data);
-                    match text {
-                        Err(e) => {
-                            error!(logger, "failed to convert bytes to text"; "error" => format!("{}", e))
+    // Unlock the sandbox to allow a successful shutdown
+    drop(s);
+
+    info!(logger, "starting uevents handler");
+
+    let mut socket;
+
+    unsafe {
+        let fd = libc::socket(
+            libc::AF_NETLINK,
+            libc::SOCK_DGRAM | libc::SOCK_CLOEXEC,
+            protocols::NETLINK_KOBJECT_UEVENT as libc::c_int,
+        );
+        socket = TokioSocket::from_raw_fd(fd);
+    }
+
+    socket.bind(&SocketAddr::new(0, 1))?;
+
+    loop {
+        select! {
+            _ = shutdown.changed() => {
+                info!(logger, "got shutdown request");
+                break;
+            }
+            result = socket.recv_from_full() => {
+                match result {
+                    Err(e) => {
+                        error!(logger, "failed to receive uevent"; "error" => format!("{}", e))
+                    }
+                    Ok((buf, addr)) => {
+                        if addr.port_number() != 0 {
+                            // not our netlink message
+                            let err_msg = format!("{:?}", nix::Error::Sys(Errno::EBADMSG));
+                            error!(logger, "receive uevent message failed"; "error" => err_msg);
+                            continue;
                        }
-                        Ok(text) => {
-                            let event = Uevent::new(&text);
-                            info!(logger, "got uevent message"; "event" => format!("{:?}", event));
-                            event.process(&logger, &sandbox);
+
+                        let text = String::from_utf8(buf);
+                        match text {
+                            Err(e) => {
+                                error!(logger, "failed to convert bytes to text"; "error" => format!("{}", e))
+                            }
+                            Ok(text) => {
+                                let event = Uevent::new(&text);
+                                info!(logger, "got uevent message"; "event" => format!("{:?}", event));
+                                event.process(&logger, &sandbox).await;
+                            }
                        }
                    }
                }
            }
        }
-    });
+    }
+
+    Ok(())
 }
--- a/src/agent/src/util.rs
+++ b/src/agent/src/util.rs
@@ -0,0 +1,342 @@
+// Copyright (c) 2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::io;
+use std::io::ErrorKind;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::sync::watch::Receiver;
+
+// Size of I/O read buffer
+const BUF_SIZE: usize = 8192;
+
+// Interruptable I/O copy using readers and writers
+// (an interruptable version of "io::copy()").
+pub async fn interruptable_io_copier<R: Sized, W: Sized>(
+    mut reader: R,
+    mut writer: W,
+    mut shutdown: Receiver<bool>,
+) -> io::Result<u64>
+where
+    R: tokio::io::AsyncRead + Unpin,
+    W: tokio::io::AsyncWrite + Unpin,
+{
+    let mut total_bytes: u64 = 0;
+
+    let mut buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
+
+    loop {
+        tokio::select! {
+            _ = shutdown.changed() => {
+                eprintln!("INFO: interruptable_io_copier: got shutdown request");
+                break;
+            },
+
+            result = reader.read(&mut buf) => {
+                let bytes = match result {
+                    Ok(0) => return Ok(total_bytes),
+                    Ok(len) => len,
+                    Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
+                    Err(e) => return Err(e),
+                };
+
+                total_bytes += bytes as u64;
+
+                // Actually copy the data ;)
+                writer.write_all(&buf[..bytes]).await?;
+            },
+        };
+    }
+
+    Ok(total_bytes)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io;
+    use std::io::Cursor;
+    use std::io::Write;
+    use std::pin::Pin;
+    use std::sync::{Arc, Mutex};
+    use std::task::{Context, Poll, Poll::Ready};
+    use tokio::pin;
+    use tokio::select;
+    use tokio::sync::watch::channel;
+    use tokio::task::JoinError;
+    use tokio::time::Duration;
+
+    #[derive(Debug, Default, Clone)]
+    struct BufWriter {
+        data: Arc<Mutex<Vec<u8>>>,
+        slow_write: bool,
+        write_delay: Duration,
+    }
+
+    impl BufWriter {
+        fn new() -> Self {
+            BufWriter {
+                data: Arc::new(Mutex::new(Vec::<u8>::new())),
+                slow_write: false,
+                write_delay: Duration::new(0, 0),
+            }
+        }
+
+        fn write_vec(&mut self, buf: &[u8]) -> io::Result<usize> {
+            let vec_ref = self.data.clone();
+
+            let mut vec_locked = vec_ref.lock();
+
+            let mut v = vec_locked.as_deref_mut().unwrap();
+
+            if self.write_delay.as_nanos() > 0 {
+                std::thread::sleep(self.write_delay);
+            }
+
+            std::io::Write::write(&mut v, buf)
+        }
+    }
+
+    impl Write for BufWriter {
+        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+            self.write_vec(buf)
+        }
+
+        fn flush(&mut self) -> io::Result<()> {
+            let vec_ref = self.data.clone();
+
+            let mut vec_locked = vec_ref.lock();
+
+            let v = vec_locked.as_deref_mut().unwrap();
+
+            std::io::Write::flush(v)
+        }
+    }
+
+    impl tokio::io::AsyncWrite for BufWriter {
+        fn poll_write(
+            mut self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+            buf: &[u8],
+        ) -> Poll<Result<usize, io::Error>> {
+            let result = self.write_vec(buf);
+
+            Ready(result)
+        }
+
+        fn poll_flush(
+            self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+        ) -> Poll<Result<(), std::io::Error>> {
+            // NOP
+            Ready(Ok(()))
+        }
+
+        fn poll_shutdown(
+            self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+        ) -> Poll<Result<(), std::io::Error>> {
+            // NOP
+            Ready(Ok(()))
+        }
+    }
+
+    impl ToString for BufWriter {
+        fn to_string(&self) -> String {
+            let data_ref = self.data.clone();
+            let output = data_ref.lock().unwrap();
+            let s = (*output).clone();
+
+            String::from_utf8(s).unwrap()
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_interruptable_io_copier_reader() {
+        #[derive(Debug)]
+        struct TestData {
+            reader_value: String,
+            result: io::Result<u64>,
+        }
+
+        let tests = &[
+            TestData {
+                reader_value: "".into(),
+                result: Ok(0),
+            },
+            TestData {
+                reader_value: "a".into(),
+                result: Ok(1),
+            },
+            TestData {
+                reader_value: "foo".into(),
+                result: Ok(3),
+            },
+            TestData {
+                reader_value: "b".repeat(BUF_SIZE - 1),
+                result: Ok((BUF_SIZE - 1) as u64),
+            },
+            TestData {
+                reader_value: "c".repeat(BUF_SIZE),
+                result: Ok((BUF_SIZE) as u64),
+            },
+            TestData {
+                reader_value: "d".repeat(BUF_SIZE + 1),
+                result: Ok((BUF_SIZE + 1) as u64),
+            },
+            TestData {
+                reader_value: "e".repeat((2 * BUF_SIZE) - 1),
+                result: Ok(((2 * BUF_SIZE) - 1) as u64),
+            },
+            TestData {
+                reader_value: "f".repeat(2 * BUF_SIZE),
+                result: Ok((2 * BUF_SIZE) as u64),
+            },
+            TestData {
+                reader_value: "g".repeat((2 * BUF_SIZE) + 1),
+                result: Ok(((2 * BUF_SIZE) + 1) as u64),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            // Create a string containing details of the test
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let (tx, rx) = channel(true);
+            let reader = Cursor::new(d.reader_value.clone());
+            let writer = BufWriter::new();
+
+            // XXX: Pass a copy of the writer to the copier to allow the
+            // result of the write operation to be checked below.
+            let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
+
+            // Allow time for the thread to be spawned.
+            tokio::time::sleep(Duration::from_secs(1)).await;
+
+            let timeout = tokio::time::sleep(Duration::from_secs(1));
+            pin!(timeout);
+
+            // Since the readers only specify a small number of bytes, the
+            // copier will quickly read zero and kill the task, closing the
+            // Receiver.
+            assert!(tx.is_closed(), "{}", msg);
+
+            let spawn_result: std::result::Result<
+                std::result::Result<u64, std::io::Error>,
+                JoinError,
+            >;
+
+            let result: std::result::Result<u64, std::io::Error>;
+
+            select! {
+                res = handle => spawn_result = res,
+                _ = &mut timeout => panic!("timed out"),
+            }
+
+            assert!(spawn_result.is_ok());
+
+            result = spawn_result.unwrap();
+
+            assert!(result.is_ok());
+
+            let byte_count = result.unwrap() as usize;
+            assert_eq!(byte_count, d.reader_value.len(), "{}", msg);
+
+            let value = writer.to_string();
+            assert_eq!(value, d.reader_value, "{}", msg);
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_interruptable_io_copier_eof() {
+        // Create an async reader that always returns EOF
+        let reader = tokio::io::empty();
+
+        let (tx, rx) = channel(true);
+        let writer = BufWriter::new();
+
+        let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
+
+        // Allow time for the thread to be spawned.
+        tokio::time::sleep(Duration::from_secs(1)).await;
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        assert!(tx.is_closed());
+
+        let spawn_result: std::result::Result<std::result::Result<u64, std::io::Error>, JoinError>;
+
+        let result: std::result::Result<u64, std::io::Error>;
+
+        select! {
+            res = handle => spawn_result = res,
+            _ = &mut timeout => panic!("timed out"),
+        }
+
+        assert!(spawn_result.is_ok());
+
+        result = spawn_result.unwrap();
+
+        assert!(result.is_ok());
+
+        let byte_count = result.unwrap();
+        assert_eq!(byte_count, 0);
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn test_interruptable_io_copier_shutdown() {
+        // Create an async reader that creates an infinite stream of bytes
+        // (which allows us to interrupt it, since we know it is always busy ;)
+        const REPEAT_CHAR: u8 = b'r';
+
+        let reader = tokio::io::repeat(REPEAT_CHAR);
+
+        let (tx, rx) = channel(true);
+        let writer = BufWriter::new();
+
+        let handle = tokio::spawn(interruptable_io_copier(reader, writer.clone(), rx));
+
+        // Allow time for the thread to be spawned.
+        tokio::time::sleep(Duration::from_secs(1)).await;
+
+        let timeout = tokio::time::sleep(Duration::from_secs(1));
+        pin!(timeout);
+
+        assert!(!tx.is_closed());
+
+        tx.send(true).expect("failed to request shutdown");
+
+        let spawn_result: std::result::Result<std::result::Result<u64, std::io::Error>, JoinError>;
+
+        let result: std::result::Result<u64, std::io::Error>;
+
+        select! {
+            res = handle => spawn_result = res,
+            _ = &mut timeout => panic!("timed out"),
+        }
+
+        assert!(spawn_result.is_ok());
+
+        result = spawn_result.unwrap();
+
+        assert!(result.is_ok());
+
+        let byte_count = result.unwrap();
+
+        let value = writer.to_string();
+
+        let writer_byte_count = value.len() as u64;
+
+        assert_eq!(byte_count, writer_byte_count);
+
+        // Remove the char used as a payload. If anything else remins,
+        // something went wrong.
+        let mut remainder = value;
+
+        remainder.retain(|c| c != REPEAT_CHAR as char);
+
+        assert_eq!(remainder.len(), 0);
+    }
+}
--- a/src/agent/src/version.rs.in
+++ b/src/agent/src/version.rs.in
@@ -7,6 +7,8 @@
 // WARNING: This file is auto-generated - DO NOT EDIT!
 //

+#![allow(dead_code)]
+
 pub const AGENT_VERSION: &str = "@AGENT_VERSION@";
 pub const API_VERSION: &str = "@API_VERSION@";
 pub const VERSION_COMMIT: &str = "@VERSION_COMMIT@";
--- a/src/runtime/.gitignore
+++ b/src/runtime/.gitignore
@@ -8,9 +8,7 @@ coverage.html
 /cli/config/configuration-acrn.toml
 /cli/config/configuration-clh.toml
 /cli/config/configuration-fc.toml
-/cli/config/configuration-nemu.toml
 /cli/config/configuration-qemu.toml
-/cli/config/configuration-qemu-virtiofs.toml
 /cli/config/configuration-clh.toml
 /cli/config-generated.go
 /cli/containerd-shim-kata-v2/config-generated.go
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -4,14 +4,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #

-distro := $(shell \
-for file in /etc/os-release /usr/lib/os-release; do \
-    if [ -e $$file ]; then \
-        grep ^ID= $$file|cut -d= -f2-|tr -d '"'; \
-        break; \
-    fi \
-done)
-
 SKIP_GO_VERSION_CHECK=
 include golang.mk

@@ -57,7 +49,6 @@ BINLIBEXECLIST :=
 BIN_PREFIX = $(PROJECT_TYPE)
 PROJECT_DIR = $(PROJECT_TAG)
 IMAGENAME = $(PROJECT_TAG).img
-INITRDNAME = $(PROJECT_TAG)-initrd.img

 TARGET = $(BIN_PREFIX)-runtime
 TARGET_OUTPUT = $(CURDIR)/$(TARGET)
@@ -91,7 +82,6 @@ SHAREDIR := $(PREFIX)/share
 DEFAULTSDIR := $(SHAREDIR)/defaults

 COLLECT_SCRIPT = data/kata-collect-data.sh
-COLLECT_SCRIPT_SRC = $(COLLECT_SCRIPT).in

 # @RUNTIME_NAME@ should be replaced with the target in generated files
 RUNTIME_NAME = $(TARGET)
@@ -111,13 +101,11 @@ BASH_COMPLETIONS := data/completions/bash/kata-runtime
 BASH_COMPLETIONSDIR := $(SHAREDIR)/bash-completion/completions

 PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR)
-PKGLIBDIR := $(LOCALSTATEDIR)/lib/$(PROJECT_DIR)
 PKGRUNDIR := $(LOCALSTATEDIR)/run/$(PROJECT_DIR)
 PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)

 KERNELDIR := $(PKGDATADIR)

-INITRDPATH := $(PKGDATADIR)/$(INITRDNAME)
 IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME)
 FIRMWAREPATH :=

@@ -126,7 +114,6 @@ CONFIG_FILE = configuration.toml

 HYPERVISOR_ACRN = acrn
 HYPERVISOR_FC = firecracker
-JAILER_FC = jailer
 HYPERVISOR_QEMU = qemu
 HYPERVISOR_CLH = cloud-hypervisor

@@ -140,7 +127,6 @@ QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
 QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]

 QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD)
-QEMUVALIDVIRTIOFSPATHS := [\"$(QEMUVIRTIOFSPATH)\"]

 CLHPATH := $(CLHBINDIR)/$(CLHCMD)
 CLHVALIDHYPERVISORPATHS := [\"$(CLHPATH)\"]
@@ -155,9 +141,6 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"]
 ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD)
 ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]

-SHIMCMD := $(BIN_PREFIX)-shim
-SHIMPATH := $(PKGLIBEXECDIR)/$(SHIMCMD)
-
 NETMONCMD := $(BIN_PREFIX)-netmon
 NETMONPATH := $(PKGLIBEXECDIR)/$(NETMONCMD)

@@ -185,7 +168,6 @@ DEFAULTEXPFEATURES := []
 DEFENTROPYSOURCE := /dev/urandom

 DEFDISABLEBLOCK := false
-DEFSHAREDFS := virtio-9p
 DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
 DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
 DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
@@ -200,23 +182,18 @@ DEFVIRTIOFSCACHE ?= auto
 # Make sure you quote args.
 DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\"]
 DEFENABLEIOTHREADS := false
-DEFENABLEMEMPREALLOC := false
-DEFENABLEHUGEPAGES := false
 DEFENABLEVHOSTUSERSTORE := false
 DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user
 DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
 DEFFILEMEMBACKEND := ""
 DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
-DEFENABLESWAP := false
-DEFENABLEDEBUG := false
-DEFDISABLENESTINGCHECKS := false
 DEFMSIZE9P := 8192
-DEFHOTPLUGVFIOONROOTBUS := false
-DEFPCIEROOTPORT := 0

 # Default cgroup model
 DEFSANDBOXCGROUPONLY ?= false

+DEFBINDMOUNTS := []
+
 # Features
 FEATURE_SELINUX ?= check

@@ -283,10 +260,9 @@ ifneq (,$(CLHCMD))

    # CLH-specific options (all should be suffixed by "_CLH")
    # currently, huge pages are required for virtiofsd support
-    DEFENABLEHUGEPAGES_CLH := true
    DEFNETWORKMODEL_CLH := tcfilter
    KERNELTYPE_CLH = uncompressed
-    KERNEL_NAME_CLH = $(call MAKE_KERNEL_VIRTIOFS_NAME,$(KERNELTYPE_CLH))
+    KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH))
    KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH)
 endif

@@ -408,8 +384,6 @@ USER_VARS += FCVALIDJAILERPATHS
 USER_VARS += SYSCONFIG
 USER_VARS += IMAGENAME
 USER_VARS += IMAGEPATH
-USER_VARS += INITRDNAME
-USER_VARS += INITRDPATH
 USER_VARS += MACHINETYPE
 USER_VARS += KERNELDIR
 USER_VARS += KERNELTYPE
@@ -429,7 +403,6 @@ USER_VARS += KERNELPARAMS
 USER_VARS += LIBEXECDIR
 USER_VARS += LOCALSTATEDIR
 USER_VARS += PKGDATADIR
-USER_VARS += PKGLIBDIR
 USER_VARS += PKGLIBEXECDIR
 USER_VARS += PKGRUNDIR
 USER_VARS += PREFIX
@@ -447,10 +420,8 @@ USER_VARS += QEMUPATH
 USER_VARS += QEMUVALIDHYPERVISORPATHS
 USER_VARS += QEMUVIRTIOFSCMD
 USER_VARS += QEMUVIRTIOFSPATH
-USER_VARS += QEMUVALIDVIRTIOFSPATHS
 USER_VARS += RUNTIME_NAME
 USER_VARS += SHAREDIR
-USER_VARS += SHIMPATH
 USER_VARS += SYSCONFDIR
 USER_VARS += DEFVCPUS
 USER_VARS += DEFMAXVCPUS
@@ -468,7 +439,6 @@ USER_VARS += DEFDISABLEBLOCK
 USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
 USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
 USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
-USER_VARS += DEFSHAREDFS
 USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
 USER_VARS += DEFVIRTIOFSDAEMON
 USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS
@@ -477,21 +447,15 @@ USER_VARS += DEFVIRTIOFSCACHE
 USER_VARS += DEFVIRTIOFSEXTRAARGS
 USER_VARS += DEFENABLEANNOTATIONS
 USER_VARS += DEFENABLEIOTHREADS
-USER_VARS += DEFENABLEMEMPREALLOC
-USER_VARS += DEFENABLEHUGEPAGES
 USER_VARS += DEFENABLEVHOSTUSERSTORE
 USER_VARS += DEFVHOSTUSERSTOREPATH
 USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS
 USER_VARS += DEFFILEMEMBACKEND
 USER_VARS += DEFVALIDFILEMEMBACKENDS
-USER_VARS += DEFENABLESWAP
-USER_VARS += DEFENABLEDEBUG
-USER_VARS += DEFDISABLENESTINGCHECKS
 USER_VARS += DEFMSIZE9P
-USER_VARS += DEFHOTPLUGVFIOONROOTBUS
-USER_VARS += DEFPCIEROOTPORT
 USER_VARS += DEFENTROPYSOURCE
 USER_VARS += DEFSANDBOXCGROUPONLY
+USER_VARS += DEFBINDMOUNTS
 USER_VARS += FEATURE_SELINUX
 USER_VARS += BUILDFLAGS

@@ -605,8 +569,8 @@ $(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST)
 	$(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && go build $(KATA_LDFLAGS) $(BUILDFLAGS) -o $@ .)

 $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit
-	$(QUIET_BUILD)(cd $(MONITOR_DIR)/ && go build \
-		--ldflags "-X main.GitCommit=$(shell cat .git-commit)" -o $@ .)
+	$(QUIET_BUILD)(cd $(MONITOR_DIR)/ && CGO_ENABLED=0 go build \
+		--ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -buildmode=exe -o $@ .)

 .PHONY: \
 	check \
@@ -633,13 +597,19 @@ generate-config: $(CONFIGS)

 check: check-go-static

-test: go-test
+test: install-hook go-test
+
+install-hook:
+	make -C virtcontainers hook
+ifeq ($(shell id -u), 0)
+	echo "installing mock hook"
+	make -C virtcontainers install
+endif

 go-test: $(GENERATED_FILES)
 	go test -v -mod=vendor ./...

 check-go-static:
-	$(QUIET_CHECK)../../ci/static-checks.sh
 	$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cli
 	$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./virtcontainers

@@ -696,7 +666,8 @@ show-usage: show-header
 	@printf "• Additional targets:\n"
 	@printf "\n"
 	@printf "\tbuild                      : standard build (build everything).\n"
-	@printf "\tcheck                      : run tests.\n"
+	@printf "\ttest                       : run tests.\n"
+	@printf "\tcheck                      : run code checks.\n"
 	@printf "\tclean                      : remove built files.\n"
 	@printf "\tcontainerd-shim-v2         : only build containerd shim v2.\n"
 	@printf "\tcoverage                   : run coverage tests.\n"
--- a/src/runtime/README.md
+++ b/src/runtime/README.md
@@ -129,14 +129,14 @@ The below command lists the full paths to the configuration files that the
 runtime attempts to load. The first path that exists will be used:

 ```bash
-$ kata-runtime --kata-show-default-config-paths
+$ kata-runtime --show-default-config-paths
 ```

 Aside from the built-in locations, it is possible to specify the path to a
-custom configuration file using the `--kata-config` option:
+custom configuration file using the `--config` option:

 ```bash
-$ kata-runtime --kata-config=/some/where/configuration.toml ...
+$ kata-runtime --config=/some/where/configuration.toml ...
 ```

 The runtime will log the full path to the configuration file it is using. See
--- a/src/runtime/arch/amd64-options.mk
+++ b/src/runtime/arch/amd64-options.mk
@@ -12,9 +12,6 @@ CPUFEATURES := pmu=off

 QEMUCMD := qemu-system-x86_64

-# Qemu experimental with virtiofs
-QEMUVIRTIOFSCMD := qemu-virtiofs-system-x86_64
-
 # Firecracker binary name
 FCCMD := firecracker
 # Firecracker's jailer binary name
--- a/src/runtime/cli/config-generated.go.in
+++ b/src/runtime/cli/config-generated.go.in
@@ -9,10 +9,6 @@
 // by the tests.
 package main

-import (
-	"fmt"
-)
-
 // name is the name of the runtime
 const name = "@RUNTIME_NAME@"

@@ -36,10 +32,6 @@ var commit = "@COMMIT@"
 // version is the runtime version.
 var version = "@VERSION@"

-// project-specific option names
-var configFilePathOption = fmt.Sprintf("%s-config", projectPrefix)
-var showConfigPathsOption = fmt.Sprintf("%s-show-default-config-paths", projectPrefix)
-
 // Default config file used by stateless systems.
 var defaultRuntimeConfiguration = "@CONFIG_PATH@"

--- a/src/runtime/cli/config/configuration-acrn.toml.in
+++ b/src/runtime/cli/config/configuration-acrn.toml.in
@@ -113,7 +113,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@"
 # lexicographical order, to the lifecycle of the guest container.
 # Hooks are executed in the runtime namespace of the guest. See the official documentation:
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
-# Warnings will be logged if any error is encountered will scanning for hooks,
+# Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
 #guest_hook_path = "/usr/share/oci/hooks"

@@ -208,6 +208,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (default: disabled)
 #enable_tracing = true

+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+#jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+#jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+#jaeger_password = ""
+
 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
 # `disable_new_netns` conflicts with `enable_netmon`
--- a/src/runtime/cli/config/configuration-clh.toml.in
+++ b/src/runtime/cli/config/configuration-clh.toml.in
@@ -115,6 +115,23 @@ block_device_driver = "virtio-blk"
 # Default false
 #enable_debug = true

+# Path to OCI hook binaries in the *guest rootfs*.
+# This does not affect host-side hooks which must instead be added to
+# the OCI spec passed to the runtime.
+#
+# You can create a rootfs with hooks by customizing the osbuilder scripts:
+# https://github.com/kata-containers/osbuilder
+#
+# Hooks must be stored in a subdirectory of guest_hook_path according to their
+# hook type, i.e. "guest_hook_path/{prestart,postart,poststop}".
+# The agent will scan these directories for executable files and add them, in
+# lexicographical order, to the lifecycle of the guest container.
+# Hooks are executed in the runtime namespace of the guest. See the official documentation:
+# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
+# Warnings will be logged if any error is encountered while scanning for hooks,
+# but it will not abort container execution.
+#guest_hook_path = "/usr/share/oci/hooks"
+#
 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
@@ -207,6 +224,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (default: disabled)
 #enable_tracing = true

+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+#jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+#jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+#jaeger_password = ""
+
 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
 # `disable_new_netns` conflicts with `enable_netmon`
@@ -225,6 +252,12 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
 sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

+# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
+# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
+# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
+# These will not be exposed to the container workloads, and are only provided for potential guest services.
+sandbox_bind_mounts=@DEFBINDMOUNTS@
+
 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
--- a/src/runtime/cli/config/configuration-fc.toml.in
+++ b/src/runtime/cli/config/configuration-fc.toml.in
@@ -333,6 +333,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (default: disabled)
 #enable_tracing = true

+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+#jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+#jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+#jaeger_password = ""
+
 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
 # `disable_new_netns` conflicts with `enable_netmon`
--- a/src/runtime/cli/config/configuration-qemu.toml.in
+++ b/src/runtime/cli/config/configuration-qemu.toml.in
@@ -241,6 +241,10 @@ valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
 # The behaviour is undefined if mem_prealloc is also set to true
 #enable_swap = true

+# -pflash can add image file to VM. The arguments of it should be in format
+# of ["/path/to/flash0.img", "/path/to/flash1.img"]
+pflashes = []
+
 # This option changes the default hypervisor and kernel parameters
 # to enable debug output where available.
 #
@@ -305,7 +309,7 @@ valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
 # lexicographical order, to the lifecycle of the guest container.
 # Hooks are executed in the runtime namespace of the guest. See the official documentation:
 # https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
-# Warnings will be logged if any error is encountered will scanning for hooks,
+# Warnings will be logged if any error is encountered while scanning for hooks,
 # but it will not abort container execution.
 #guest_hook_path = "/usr/share/oci/hooks"
 #
@@ -479,6 +483,16 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # (default: disabled)
 #enable_tracing = true

+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+#jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+#jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+#jaeger_password = ""
+
 # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
 # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
 # `disable_new_netns` conflicts with `enable_netmon`
@@ -497,6 +511,12 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
 # See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
 sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@

+# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
+# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
+# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
+# These will not be exposed to the container workloads, and are only provided for potential guest services.
+sandbox_bind_mounts=@DEFBINDMOUNTS@
+
 # Enabled experimental feature list, format: ["a", "b"].
 # Experimental features are features not stable enough for production,
 # they may break compatibility, and are prepared for a big version bump.
--- a/src/runtime/cli/factory.go
+++ b/src/runtime/cli/factory.go
@@ -73,7 +73,7 @@ func (s *cacheServer) GetBaseVM(ctx context.Context, empty *types.Empty) (*pb.Gr
 		return nil, errors.Wrapf(err, "failed to GetBaseVM")
 	}

-	return vm.ToGrpc(config)
+	return vm.ToGrpc(ctx, config)
 }

 func (s *cacheServer) quit() {
--- a/src/runtime/cli/kata-check.go
+++ b/src/runtime/cli/kata-check.go
@@ -63,7 +63,6 @@ const (
 	moduleParamDir        = "parameters"
 	successMessageCapable = "System is capable of running " + project
 	successMessageCreate  = "System can currently create " + project
-	successMessageVersion = "Version consistency of " + project + " is verified"
 	failMessage           = "System is not capable of running " + project
 	kernelPropertyCorrect = "Kernel property value correct"

@@ -396,9 +395,9 @@ EXAMPLES:
 		}

 		span, _ := katautils.Trace(ctx, "check")
-		defer span.Finish()
+		defer span.End()

-		if context.Bool("no-network-checks") == false && os.Getenv(noNetworkEnvVar) == "" {
+		if !context.Bool("no-network-checks") && os.Getenv(noNetworkEnvVar) == "" {
 			cmd := RelCmdCheck

 			if context.Bool("only-list-releases") {
--- a/src/runtime/cli/kata-env.go
+++ b/src/runtime/cli/kata-env.go
@@ -454,7 +454,7 @@ var kataEnvCLICommand = cli.Command{
 		}

 		span, _ := katautils.Trace(ctx, "kata-env")
-		defer span.Finish()
+		defer span.End()

 		return handleSettings(defaultOutputFile, context)
 	},
--- a/src/runtime/cli/kata-exec.go
+++ b/src/runtime/cli/kata-exec.go
@@ -1,4 +1,5 @@
 // Copyright (c) 2017-2019 Intel Corporation
+// Copyright (c) 2020 Ant Group
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -13,16 +14,19 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"path/filepath"
 	"strings"

 	"sync"
 	"time"

 	"github.com/containerd/console"
+	kataMonitor "github.com/kata-containers/kata-containers/src/runtime/pkg/kata-monitor"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	clientUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client"
 	"github.com/pkg/errors"
 	"github.com/urfave/cli"
+	"go.opentelemetry.io/otel/label"
 )

 const (
@@ -34,10 +38,10 @@ const (

 	subCommandName = "exec"
 	// command-line parameters name
-	paramKataMonitorAddr                     = "kata-monitor-addr"
+	paramRuntimeNamespace                    = "runtime-namespace"
 	paramDebugConsolePort                    = "kata-debug-port"
 	defaultKernelParamDebugConsoleVPortValue = 1026
-	defaultParamKataMonitorAddr              = "http://localhost:8090"
+	defaultRuntimeNamespace                  = "k8s.io"
 )

 var (
@@ -54,12 +58,12 @@ var kataExecCLICommand = cli.Command{
 	Usage: "Enter into guest by debug console",
 	Flags: []cli.Flag{
 		cli.StringFlag{
-			Name:  paramKataMonitorAddr,
-			Usage: "Kata monitor listen address.",
+			Name:  paramRuntimeNamespace,
+			Usage: "Namespace that containerd or CRI-O are using for containers. (Default: k8s.io, only works for containerd)",
 		},
 		cli.Uint64Flag{
 			Name:  paramDebugConsolePort,
-			Usage: "Port that debug console is listening on.",
+			Usage: "Port that debug console is listening on. (Default: 1026)",
 		},
 	},
 	Action: func(context *cli.Context) error {
@@ -68,24 +72,29 @@ var kataExecCLICommand = cli.Command{
 			return err
 		}
 		span, _ := katautils.Trace(ctx, subCommandName)
-		defer span.Finish()
+		defer span.End()

-		endPoint := context.String(paramKataMonitorAddr)
-		if endPoint == "" {
-			endPoint = defaultParamKataMonitorAddr
+		namespace := context.String(paramRuntimeNamespace)
+		if namespace == "" {
+			namespace = defaultRuntimeNamespace
 		}
+		span.SetAttributes(label.Key("namespace").String(namespace))

 		port := context.Uint64(paramDebugConsolePort)
 		if port == 0 {
 			port = defaultKernelParamDebugConsoleVPortValue
 		}
+		span.SetAttributes(label.Key("port").Uint64(port))

 		sandboxID := context.Args().Get(0)
-		if sandboxID == "" {
-			return fmt.Errorf("SandboxID not found")
+
+		if err := katautils.VerifyContainerID(sandboxID); err != nil {
+			return err
 		}

-		conn, err := getConn(endPoint, sandboxID, port)
+		span.SetAttributes(label.Key("sandbox").String(sandboxID))
+
+		conn, err := getConn(namespace, sandboxID, port)
 		if err != nil {
 			return err
 		}
@@ -168,15 +177,20 @@ func (s *iostream) Read(data []byte) (n int, err error) {
 	return s.conn.Read(data)
 }

-func getConn(endPoint, sandboxID string, port uint64) (net.Conn, error) {
-	shimURL := fmt.Sprintf("%s/agent-url?sandbox=%s", endPoint, sandboxID)
-	resp, err := http.Get(shimURL)
+func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
+	socketAddr := filepath.Join(string(filepath.Separator), "containerd-shim", namespace, sandboxID, "shim-monitor.sock")
+	client, err := kataMonitor.BuildUnixSocketClient(socketAddr, defaultTimeout)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := client.Get("http://shim/agent-url")
 	if err != nil {
 		return nil, err
 	}

 	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("Failed to get %s: %d", shimURL, resp.StatusCode)
+		return nil, fmt.Errorf("Failed to get %s: %d", socketAddr, resp.StatusCode)
 	}

 	defer resp.Body.Close()
--- a/src/runtime/cli/kata-monitor/main.go
+++ b/src/runtime/cli/kata-monitor/main.go
@@ -48,9 +48,9 @@ var versionTemplate = `{{.AppName}}
 `

 func printVersion(ver versionInfo) {
-	t, err := template.New("version").Parse(versionTemplate)
+	t, _ := template.New("version").Parse(versionTemplate)

-	if err = t.Execute(os.Stdout, ver); err != nil {
+	if err := t.Execute(os.Stdout, ver); err != nil {
 		panic(err)
 	}
 }
--- a/src/runtime/cli/main.go
+++ b/src/runtime/cli/main.go
@@ -25,9 +25,11 @@ import (
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
-	opentracing "github.com/opentracing/opentracing-go"
 	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/label"
+	otelTrace "go.opentelemetry.io/otel/trace"
 )

 // specConfig is the name of the file holding the containers configuration
@@ -62,9 +64,6 @@ var originalLoggerLevel = logrus.WarnLevel

 var debug = false

-// if true, coredump when an internal error occurs or a fatal signal is received
-var crashOnError = false
-
 // concrete virtcontainer implementation
 var virtcontainersImpl = &vc.VCImpl{}

@@ -84,7 +83,7 @@ var defaultErrorFile = os.Stderr
 // runtimeFlags is the list of supported global command-line flags
 var runtimeFlags = []cli.Flag{
 	cli.StringFlag{
-		Name:  configFilePathOption,
+		Name:  "config, kata-config",
 		Usage: project + " config file path",
 	},
 	cli.StringFlag{
@@ -108,7 +107,7 @@ var runtimeFlags = []cli.Flag{
 		Usage: "ignore cgroup permission errors ('true', 'false', or 'auto')",
 	},
 	cli.BoolFlag{
-		Name:  showConfigPathsOption,
+		Name:  "show-default-config-paths, kata-show-default-config-paths",
 		Usage: "show config file paths that will be checked for (in order)",
 	},
 	cli.BoolFlag{
@@ -211,16 +210,16 @@ func setupSignalHandler(ctx context.Context) {
 // setExternalLoggers registers the specified logger with the external
 // packages which accept a logger to handle their own logging.
 func setExternalLoggers(ctx context.Context, logger *logrus.Entry) {
-	var span opentracing.Span
+	var span otelTrace.Span

 	// Only create a new span if a root span already exists. This is
 	// required to ensure that this function will not disrupt the root
 	// span logic by creating a span before the proper root span has been
 	// created.

-	if opentracing.SpanFromContext(ctx) != nil {
+	if otelTrace.SpanFromContext(ctx) != nil {
 		span, ctx = katautils.Trace(ctx, "setExternalLoggers")
-		defer span.Finish()
+		defer span.End()
 	}

 	// Set virtcontainers logger.
@@ -245,6 +244,7 @@ func beforeSubcommands(c *cli.Context) error {
 	var configFile string
 	var runtimeConfig oci.RuntimeConfig
 	var err error
+	var traceFlushFunc func()

 	katautils.SetConfigOptions(name, defaultRuntimeConfiguration, defaultSysConfRuntimeConfiguration)

@@ -319,13 +319,12 @@ func beforeSubcommands(c *cli.Context) error {
 		}
 	}

-	configFile, runtimeConfig, err = katautils.LoadConfiguration(c.GlobalString(configFilePathOption), ignoreConfigLogs, false)
+	configFile, runtimeConfig, err = katautils.LoadConfiguration(c.GlobalString("kata-config"), ignoreConfigLogs, false)
 	if err != nil {
 		fatal(err)
 	}
 	if !subCmdIsCheckCmd {
 		debug = runtimeConfig.Debug
-		crashOnError = runtimeConfig.Debug

 		if traceRootSpan != "" {
 			// Create the tracer.
@@ -334,10 +333,11 @@ func beforeSubcommands(c *cli.Context) error {
 			// This delays collection of trace data slightly but benefits the user by
 			// ensuring the first span is the name of the sub-command being
 			// invoked from the command-line.
-			err = setupTracing(c, traceRootSpan)
+			traceFlushFunc, err = setupTracing(c, traceRootSpan, &runtimeConfig)
 			if err != nil {
 				return err
 			}
+			defer traceFlushFunc()
 		}
 	}

@@ -366,7 +366,7 @@ func beforeSubcommands(c *cli.Context) error {
 // handleShowConfig determines if the user wishes to see the configuration
 // paths. If so, it will display them and then exit.
 func handleShowConfig(context *cli.Context) {
-	if context.GlobalBool(showConfigPathsOption) {
+	if context.GlobalBool("show-default-config-paths") {
 		files := katautils.GetDefaultConfigFilePaths()

 		for _, file := range files {
@@ -377,10 +377,15 @@ func handleShowConfig(context *cli.Context) {
 	}
 }

-func setupTracing(context *cli.Context, rootSpanName string) error {
-	tracer, err := katautils.CreateTracer(name)
+func setupTracing(context *cli.Context, rootSpanName string, config *oci.RuntimeConfig) (func(), error) {
+	flush, err := katautils.CreateTracer(name, config)
 	if err != nil {
-		fatal(err)
+		return nil, err
+	}
+
+	ctx, err := cliContextToContext(context)
+	if err != nil {
+		return nil, err
 	}

 	// Create the root span now that the sub-command name is
@@ -390,23 +395,16 @@ func setupTracing(context *cli.Context, rootSpanName string) error {
 	// before the subcommand handler is called. As such, we cannot
 	// "Finish()" the span here - that is handled in the .After
 	// function.
-	span := tracer.StartSpan(rootSpanName)
+	tracer := otel.Tracer("kata")
+	newCtx, span := tracer.Start(ctx, rootSpanName)

-	ctx, err := cliContextToContext(context)
-	if err != nil {
-		return err
-	}
-
-	span.SetTag("subsystem", "runtime")
-
-	// Associate the root span with the context
-	ctx = opentracing.ContextWithSpan(ctx, span)
+	span.SetAttributes(label.Key("subsystem").String("runtime"))

 	// Add tracer to metadata and update the context
 	context.App.Metadata["tracer"] = tracer
-	context.App.Metadata["context"] = ctx
+	context.App.Metadata["context"] = newCtx

-	return nil
+	return flush, nil
 }

 // add supported experimental features in context
--- a/src/runtime/cli/main_test.go
+++ b/src/runtime/cli/main_test.go
@@ -8,7 +8,6 @@ package main
 import (
 	"bytes"
 	"context"
-	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
@@ -29,10 +28,9 @@ import (
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock"
-	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/stretchr/testify/assert"
-	jaeger "github.com/uber/jaeger-client-go"
+
 	"github.com/urfave/cli"
 )

@@ -44,10 +42,8 @@ const (
 	// small docker image used to create root filesystems from
 	testDockerImage = "busybox"

-	testSandboxID   = "99999999-9999-9999-99999999999999999"
-	testContainerID = "1"
-	testBundle      = "bundle"
-	testConsole     = "/dev/pts/999"
+	testBundle  = "bundle"
+	testConsole = "/dev/pts/999"
 )

 var (
@@ -387,44 +383,6 @@ func makeOCIBundle(bundleDir string) error {
 	return nil
 }

-func writeOCIConfigFile(spec specs.Spec, configPath string) error {
-	if configPath == "" {
-		return errors.New("BUG: need config file path")
-	}
-
-	bytes, err := json.MarshalIndent(spec, "", "\t")
-	if err != nil {
-		return err
-	}
-
-	return ioutil.WriteFile(configPath, bytes, testFileMode)
-}
-
-func newSingleContainerStatus(containerID string, containerState types.ContainerState, annotations map[string]string, spec *specs.Spec) vc.ContainerStatus {
-	return vc.ContainerStatus{
-		ID:          containerID,
-		State:       containerState,
-		Annotations: annotations,
-		Spec:        spec,
-	}
-}
-
-func execCLICommandFunc(assertHandler *assert.Assertions, cliCommand cli.Command, set *flag.FlagSet, expectedErr bool) {
-	ctx := createCLIContext(set)
-	ctx.App.Name = "foo"
-
-	fn, ok := cliCommand.Action.(func(context *cli.Context) error)
-	assertHandler.True(ok)
-
-	err := fn(ctx)
-
-	if expectedErr {
-		assertHandler.Error(err)
-	} else {
-		assertHandler.Nil(err)
-	}
-}
-
 func createCLIContextWithApp(flagSet *flag.FlagSet, app *cli.App) *cli.Context {
 	ctx := cli.NewContext(app, flagSet, nil)

@@ -435,7 +393,6 @@ func createCLIContextWithApp(flagSet *flag.FlagSet, app *cli.App) *cli.Context {

 	// add standard entries
 	ctx.App.Metadata["context"] = context.Background()
-	ctx.App.Metadata["tracer"] = &jaeger.Tracer{}

 	return ctx
 }
@@ -671,7 +628,7 @@ func TestMainBeforeSubCommandsShowCCConfigPaths(t *testing.T) {
 	defer os.RemoveAll(tmpdir)

 	set := flag.NewFlagSet("", 0)
-	set.Bool("kata-show-default-config-paths", true, "")
+	set.Bool("show-default-config-paths", true, "")

 	ctx := createCLIContext(set)

--- a/src/runtime/cli/release_test.go
+++ b/src/runtime/cli/release_test.go
@@ -458,7 +458,7 @@ func TestGetNewReleaseType(t *testing.T) {
 	}

 	data := []testData{
-		// Check build metadata (ignored for version comparisions)
+		// Check build metadata (ignored for version comparisons)
 		{"2.0.0+build", "2.0.0", true, ""},
 		{"2.0.0+build-1", "2.0.0+build-2", true, ""},
 		{"1.12.0+build", "1.12.0", true, ""},
--- a/src/runtime/cli/utils.go
+++ b/src/runtime/cli/utils.go
@@ -189,21 +189,3 @@ func constructVersionInfo(version string) VersionInfo {
 	}

 }
-
-func versionEqual(a VersionInfo, b VersionInfo) bool {
-	av, err := semver.Make(a.Semver)
-	if err != nil {
-		return false
-	}
-
-	bv, err := semver.Make(b.Semver)
-	if err != nil {
-		return false
-	}
-
-	if av.Major == bv.Major && av.Minor == bv.Minor && av.Patch == bv.Patch {
-		return true
-	}
-
-	return false
-}
--- a/src/runtime/cli/utils_arch_base.go
+++ b/src/runtime/cli/utils_arch_base.go
@@ -1,10 +0,0 @@
-// +build !s390x
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-package main
-
-func archConvertStatFs(cgroupFsType int) int64 {
-	return int64(cgroupFsType)
-}
--- a/src/runtime/cli/utils_s390x.go
+++ b/src/runtime/cli/utils_s390x.go
@@ -1,10 +0,0 @@
-// Copyright (c) 2018 IBM
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-package main
-
-func archConvertStatFs(cgroupFsType int) uint32 {
-	return uint32(cgroupFsType)
-}
--- a/src/runtime/cli/version.go
+++ b/src/runtime/cli/version.go
@@ -20,7 +20,7 @@ var versionCLICommand = cli.Command{
 		}

 		span, _ := katautils.Trace(ctx, "version")
-		defer span.Finish()
+		defer span.End()

 		cli.VersionPrinter(context)
 		return nil
--- a/src/runtime/containerd-shim-v2/create.go
+++ b/src/runtime/containerd-shim-v2/create.go
@@ -19,6 +19,7 @@ import (
 	"github.com/containerd/typeurl"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
+	otelTrace "go.opentelemetry.io/otel/trace"

 	// only register the proto type
 	_ "github.com/containerd/containerd/runtime/linux/runctypes"
@@ -59,11 +60,29 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
 			return nil, fmt.Errorf("cannot create another sandbox in sandbox: %s", s.sandbox.ID())
 		}

-		_, err := loadRuntimeConfig(s, r, ociSpec.Annotations)
+		s.config, err = loadRuntimeConfig(s, r, ociSpec.Annotations)
 		if err != nil {
 			return nil, err
 		}

+		// create tracer
+		// This is the earliest location we can create the tracer because we must wait
+		// until the runtime config is loaded
+		_, err = katautils.CreateTracer("kata", s.config)
+		if err != nil {
+			return nil, err
+		}
+
+		// create root span
+		var rootSpan otelTrace.Span
+		rootSpan, s.rootCtx = trace(s.ctx, "root span")
+		defer rootSpan.End()
+
+		// create span
+		var span otelTrace.Span
+		span, s.ctx = trace(s.rootCtx, "create")
+		defer span.End()
+
 		if rootFs.Mounted, err = checkAndMount(s, r); err != nil {
 			return nil, err
 		}
@@ -87,9 +106,19 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
 			return nil, err
 		}
 		s.sandbox = sandbox
+		pid, err := s.sandbox.GetHypervisorPid()
+		if err != nil {
+			return nil, err
+		}
+		s.hpid = uint32(pid)
+
 		go s.startManagementServer(ctx, ociSpec)

 	case vc.PodContainer:
+		var span otelTrace.Span
+		span, ctx = trace(s.ctx, "create")
+		defer span.End()
+
 		if s.sandbox == nil {
 			return nil, fmt.Errorf("BUG: Cannot start the container, since the sandbox hasn't been created")
 		}
--- a/src/runtime/containerd-shim-v2/delete.go
+++ b/src/runtime/containerd-shim-v2/delete.go
@@ -17,19 +17,21 @@ import (
 func deleteContainer(ctx context.Context, s *service, c *container) error {
 	if !c.cType.IsSandbox() {
 		if c.status != task.StatusStopped {
-			if _, err := s.sandbox.StopContainer(c.id, false); err != nil && !isNotFound(err) {
+			if _, err := s.sandbox.StopContainer(ctx, c.id, false); err != nil && !isNotFound(err) {
 				return err
 			}
 		}

-		if _, err := s.sandbox.DeleteContainer(c.id); err != nil && !isNotFound(err) {
+		if _, err := s.sandbox.DeleteContainer(ctx, c.id); err != nil && !isNotFound(err) {
 			return err
 		}
 	}

 	// Run post-stop OCI hooks.
 	if err := katautils.PostStopHooks(ctx, *c.spec, s.sandbox.ID(), c.bundle); err != nil {
-		return err
+		// log warning and continue, as defined in oci runtime spec
+		// https://github.com/opencontainers/runtime-spec/blob/master/runtime.md#lifecycle
+		shimLog.WithError(err).Warn("Failed to run post-stop hooks")
 	}

 	if c.mounted {
--- a/src/runtime/containerd-shim-v2/exec.go
+++ b/src/runtime/containerd-shim-v2/exec.go
@@ -90,6 +90,10 @@ func newExec(c *container, stdin, stdout, stderr string, terminal bool, jspec *g
 		height = uint32(spec.ConsoleSize.Height)
 		width = uint32(spec.ConsoleSize.Width)
 	}
+	var extraGroups []string
+	for _, g := range spec.User.AdditionalGids {
+		extraGroups = append(extraGroups, fmt.Sprintf("%d", g))
+	}

 	tty := &tty{
 		stdin:    stdin,
@@ -101,14 +105,15 @@ func newExec(c *container, stdin, stdout, stderr string, terminal bool, jspec *g
 	}

 	cmds := &types.Cmd{
-		Args:            spec.Args,
-		Envs:            getEnvs(spec.Env),
-		User:            fmt.Sprintf("%d", spec.User.UID),
-		PrimaryGroup:    fmt.Sprintf("%d", spec.User.GID),
-		WorkDir:         spec.Cwd,
-		Interactive:     terminal,
-		Detach:          !terminal,
-		NoNewPrivileges: spec.NoNewPrivileges,
+		Args:                spec.Args,
+		Envs:                getEnvs(spec.Env),
+		User:                fmt.Sprintf("%d", spec.User.UID),
+		PrimaryGroup:        fmt.Sprintf("%d", spec.User.GID),
+		SupplementaryGroups: extraGroups,
+		WorkDir:             spec.Cwd,
+		Interactive:         terminal,
+		Detach:              !terminal,
+		NoNewPrivileges:     spec.NoNewPrivileges,
 	}

 	exec := &exec{
--- a/src/runtime/containerd-shim-v2/metrics.go
+++ b/src/runtime/containerd-shim-v2/metrics.go
@@ -6,6 +6,8 @@
 package containerdshim

 import (
+	"context"
+
 	"github.com/containerd/cgroups"
 	"github.com/containerd/typeurl"

@@ -13,8 +15,8 @@ import (
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 )

-func marshalMetrics(s *service, containerID string) (*google_protobuf.Any, error) {
-	stats, err := s.sandbox.StatsContainer(containerID)
+func marshalMetrics(ctx context.Context, s *service, containerID string) (*google_protobuf.Any, error) {
+	stats, err := s.sandbox.StatsContainer(ctx, containerID)
 	if err != nil {
 		return nil, err
 	}
--- a/src/runtime/containerd-shim-v2/metrics_test.go
+++ b/src/runtime/containerd-shim-v2/metrics_test.go
@@ -7,6 +7,7 @@
 package containerdshim

 import (
+	"context"
 	"testing"

 	"github.com/containerd/cgroups"
@@ -50,7 +51,7 @@ func TestStatNetworkMetric(t *testing.T) {
 		sandbox.StatsContainerFunc = nil
 	}()

-	resp, err := sandbox.StatsContainer(testContainerID)
+	resp, err := sandbox.StatsContainer(context.Background(), testContainerID)
 	assert.NoError(err)

 	metrics := statsToMetrics(&resp)
--- a/src/runtime/containerd-shim-v2/service.go
+++ b/src/runtime/containerd-shim-v2/service.go
@@ -25,9 +25,11 @@ import (
 	"github.com/containerd/typeurl"
 	ptypes "github.com/gogo/protobuf/types"
 	"github.com/opencontainers/runtime-spec/specs-go"
-	opentracing "github.com/opentracing/opentracing-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/label"
+	otelTrace "go.opentelemetry.io/otel/trace"
 	"golang.org/x/sys/unix"

 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
@@ -60,7 +62,10 @@ var (
 var vci vc.VC = &vc.VCImpl{}

 // shimLog is logger for shim package
-var shimLog = logrus.WithField("source", "containerd-kata-shim-v2")
+var shimLog = logrus.WithFields(logrus.Fields{
+	"source": "containerd-kata-shim-v2",
+	"name":   "containerd-shim-v2",
+})

 // New returns a new shim service that can be used via GRPC
 func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shim, error) {
@@ -79,21 +84,6 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi
 	vci.SetLogger(ctx, shimLog)
 	katautils.SetLogger(ctx, shimLog, shimLog.Logger.Level)

-	// load runtime config so that tracing can start if enabled
-	_, runtimeConfig, err := katautils.LoadConfiguration("", false, true)
-	if err != nil {
-		return nil, err
-	}
-
-	// create tracer
-	_, err = katautils.CreateTracer("kata")
-	if err != nil {
-		return nil, err
-	}
-	// create span
-	span, ctx := trace(ctx, "New")
-	defer span.Finish()
-
 	ctx, cancel := context.WithCancel(ctx)

 	s := &service{
@@ -101,7 +91,6 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi
 		pid:        uint32(os.Getpid()),
 		ctx:        ctx,
 		containers: make(map[string]*container),
-		config:     &runtimeConfig,
 		events:     make(chan interface{}, chSize),
 		ec:         make(chan exit, bufferSize),
 		cancel:     cancel,
@@ -109,7 +98,7 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi

 	go s.processExits()

-	go s.forward(publisher)
+	go s.forward(ctx, publisher)

 	return s, nil
 }
@@ -127,12 +116,16 @@ type service struct {
 	mu          sync.Mutex
 	eventSendMu sync.Mutex

-	// pid Since this shimv2 cannot get the container processes pid from VM,
-	// thus for the returned values needed pid, just return this shim's
+	// hypervisor pid, Since this shimv2 cannot get the container processes pid from VM,
+	// thus for the returned values needed pid, just return the hypervisor's
 	// pid directly.
+	hpid uint32
+
+	// shim's pid
 	pid uint32

 	ctx        context.Context
+	rootCtx    context.Context // root context for tracing
 	sandbox    vc.VCSandbox
 	containers map[string]*container
 	config     *oci.RuntimeConfig
@@ -184,13 +177,6 @@ func newCommand(ctx context.Context, containerdBinary, id, containerdAddress str
 // StartShim willl start a kata shimv2 daemon which will implemented the
 // ShimV2 APIs such as create/start/update etc containers.
 func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) {
-	// Stop tracing here since a new tracer will be created the next time New()
-	// is called again after StartShim()
-	defer katautils.StopTracing(s.ctx)
-
-	span, _ := trace(s.ctx, "StartShim")
-	defer span.Finish()
-
 	bundlePath, err := os.Getwd()
 	if err != nil {
 		return "", err
@@ -248,9 +234,9 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
 	return address, nil
 }

-func (s *service) forward(publisher events.Publisher) {
+func (s *service) forward(ctx context.Context, publisher events.Publisher) {
 	for e := range s.events {
-		ctx, cancel := context.WithTimeout(s.ctx, timeOut)
+		ctx, cancel := context.WithTimeout(ctx, timeOut)
 		err := publisher.Publish(ctx, getTopic(e), e)
 		cancel()
 		if err != nil {
@@ -302,21 +288,21 @@ func getTopic(e interface{}) string {
 	return cdruntime.TaskUnknownTopic
 }

-func trace(ctx context.Context, name string) (opentracing.Span, context.Context) {
+func trace(ctx context.Context, name string) (otelTrace.Span, context.Context) {
 	if ctx == nil {
 		logrus.WithField("type", "bug").Error("trace called before context set")
 		ctx = context.Background()
 	}
-
-	span, ctx := opentracing.StartSpanFromContext(ctx, name)
-	span.SetTag("source", "runtime")
+	tracer := otel.Tracer("kata")
+	ctx, span := tracer.Start(ctx, name)
+	span.SetAttributes([]label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("containerdshim")}...)

 	return span, ctx
 }

 func (s *service) Cleanup(ctx context.Context) (_ *taskAPI.DeleteResponse, err error) {
-	span, _ := trace(s.ctx, "Cleanup")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Cleanup")
+	defer span.End()

 	//Since the binary cleanup will return the DeleteResponse from stdout to
 	//containerd, thus we must make sure there is no any outputs in stdout except
@@ -349,7 +335,7 @@ func (s *service) Cleanup(ctx context.Context) (_ *taskAPI.DeleteResponse, err e

 	switch containerType {
 	case vc.PodSandbox:
-		err = cleanupContainer(ctx, s.id, s.id, path)
+		err = cleanupContainer(spanCtx, s.id, s.id, path)
 		if err != nil {
 			return nil, err
 		}
@@ -359,7 +345,7 @@ func (s *service) Cleanup(ctx context.Context) (_ *taskAPI.DeleteResponse, err e
 			return nil, err
 		}

-		err = cleanupContainer(ctx, sandboxID, s.id, path)
+		err = cleanupContainer(spanCtx, sandboxID, s.id, path)
 		if err != nil {
 			return nil, err
 		}
@@ -373,9 +359,6 @@ func (s *service) Cleanup(ctx context.Context) (_ *taskAPI.DeleteResponse, err e

 // Create a new sandbox or container with the underlying OCI runtime
 func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) {
-	span, _ := trace(s.ctx, "Create")
-	defer span.Finish()
-
 	start := time.Now()
 	defer func() {
 		err = toGRPC(err)
@@ -385,6 +368,10 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
 	s.mu.Lock()
 	defer s.mu.Unlock()

+	if err := katautils.VerifyContainerID(r.ID); err != nil {
+		return nil, err
+	}
+
 	type Result struct {
 		container *container
 		err       error
@@ -418,19 +405,19 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
 				Terminal: r.Terminal,
 			},
 			Checkpoint: r.Checkpoint,
-			Pid:        s.pid,
+			Pid:        s.hpid,
 		})

 		return &taskAPI.CreateTaskResponse{
-			Pid: s.pid,
+			Pid: s.hpid,
 		}, nil
 	}
 }

 // Start a process
 func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (_ *taskAPI.StartResponse, err error) {
-	span, _ := trace(s.ctx, "Start")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Start")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -452,36 +439,36 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (_ *taskAP

 	//start a container
 	if r.ExecID == "" {
-		err = startContainer(ctx, s, c)
+		err = startContainer(spanCtx, s, c)
 		if err != nil {
 			return nil, errdefs.ToGRPC(err)
 		}
 		s.send(&eventstypes.TaskStart{
 			ContainerID: c.id,
-			Pid:         s.pid,
+			Pid:         s.hpid,
 		})
 	} else {
 		//start an exec
-		_, err = startExec(ctx, s, r.ID, r.ExecID)
+		_, err = startExec(spanCtx, s, r.ID, r.ExecID)
 		if err != nil {
 			return nil, errdefs.ToGRPC(err)
 		}
 		s.send(&eventstypes.TaskExecStarted{
 			ContainerID: c.id,
 			ExecID:      r.ExecID,
-			Pid:         s.pid,
+			Pid:         s.hpid,
 		})
 	}

 	return &taskAPI.StartResponse{
-		Pid: s.pid,
+		Pid: s.hpid,
 	}, nil
 }

 // Delete the initial process and container
 func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *taskAPI.DeleteResponse, err error) {
-	span, _ := trace(s.ctx, "Delete")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Delete")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -498,13 +485,13 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
 	}

 	if r.ExecID == "" {
-		if err = deleteContainer(ctx, s, c); err != nil {
+		if err = deleteContainer(spanCtx, s, c); err != nil {
 			return nil, err
 		}

 		s.send(&eventstypes.TaskDelete{
 			ContainerID: c.id,
-			Pid:         s.pid,
+			Pid:         s.hpid,
 			ExitStatus:  c.exit,
 			ExitedAt:    c.exitTime,
 		})
@@ -512,7 +499,7 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
 		return &taskAPI.DeleteResponse{
 			ExitStatus: c.exit,
 			ExitedAt:   c.exitTime,
-			Pid:        s.pid,
+			Pid:        s.hpid,
 		}, nil
 	}
 	//deal with the exec case
@@ -526,14 +513,14 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
 	return &taskAPI.DeleteResponse{
 		ExitStatus: uint32(execs.exitCode),
 		ExitedAt:   execs.exitTime,
-		Pid:        s.pid,
+		Pid:        s.hpid,
 	}, nil
 }

 // Exec an additional process inside the container
 func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Exec")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "Exec")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -570,8 +557,8 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (_ *p

 // ResizePty of a process
 func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "ResizePty")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "ResizePty")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -599,7 +586,7 @@ func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (_
 		processID = execs.id

 	}
-	err = s.sandbox.WinsizeProcess(c.id, processID, r.Height, r.Width)
+	err = s.sandbox.WinsizeProcess(spanCtx, c.id, processID, r.Height, r.Width)
 	if err != nil {
 		return nil, err
 	}
@@ -609,8 +596,8 @@ func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (_

 // State returns runtime state information for a process
 func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAPI.StateResponse, err error) {
-	span, _ := trace(s.ctx, "State")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "State")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -630,7 +617,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP
 		return &taskAPI.StateResponse{
 			ID:         c.id,
 			Bundle:     c.bundle,
-			Pid:        s.pid,
+			Pid:        s.hpid,
 			Status:     c.status,
 			Stdin:      c.stdin,
 			Stdout:     c.stdout,
@@ -649,7 +636,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP
 	return &taskAPI.StateResponse{
 		ID:         execs.id,
 		Bundle:     c.bundle,
-		Pid:        s.pid,
+		Pid:        s.hpid,
 		Status:     execs.status,
 		Stdin:      execs.tty.stdin,
 		Stdout:     execs.tty.stdout,
@@ -661,8 +648,8 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP

 // Pause the container
 func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Pause")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Pause")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -680,7 +667,7 @@ func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (_ *ptypes

 	c.status = task.StatusPausing

-	err = s.sandbox.PauseContainer(r.ID)
+	err = s.sandbox.PauseContainer(spanCtx, r.ID)
 	if err == nil {
 		c.status = task.StatusPaused
 		s.send(&eventstypes.TaskPaused{
@@ -700,8 +687,8 @@ func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (_ *ptypes

 // Resume the container
 func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Resume")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Resume")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -717,7 +704,7 @@ func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (_ *ptyp
 		return nil, err
 	}

-	err = s.sandbox.ResumeContainer(c.id)
+	err = s.sandbox.ResumeContainer(spanCtx, c.id)
 	if err == nil {
 		c.status = task.StatusRunning
 		s.send(&eventstypes.TaskResumed{
@@ -737,8 +724,8 @@ func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (_ *ptyp

 // Kill a process with the provided signal
 func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Kill")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Kill")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -791,15 +778,15 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
 		return empty, nil
 	}

-	return empty, s.sandbox.SignalProcess(c.id, processID, signum, r.All)
+	return empty, s.sandbox.SignalProcess(spanCtx, c.id, processID, signum, r.All)
 }

 // Pids returns all pids inside the container
 // Since for kata, it cannot get the process's pid from VM,
 // thus only return the Shim's pid directly.
 func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.PidsResponse, err error) {
-	span, _ := trace(s.ctx, "Pids")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "Pids")
+	defer span.End()

 	var processes []*task.ProcessInfo

@@ -810,7 +797,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.
 	}()

 	pInfo := task.ProcessInfo{
-		Pid: s.pid,
+		Pid: s.hpid,
 	}
 	processes = append(processes, &pInfo)

@@ -821,8 +808,8 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.

 // CloseIO of a process
 func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "CloseIO")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "CloseIO")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -862,8 +849,8 @@ func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (_ *pt

 // Checkpoint the container
 func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Checkpoint")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "Checkpoint")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -876,8 +863,8 @@ func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskReque

 // Connect returns shim information such as the shim's pid
 func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *taskAPI.ConnectResponse, err error) {
-	span, _ := trace(s.ctx, "Connect")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "Connect")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -891,12 +878,12 @@ func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *ta
 	return &taskAPI.ConnectResponse{
 		ShimPid: s.pid,
 		//Since kata cannot get the container's pid in VM, thus only return the shim's pid.
-		TaskPid: s.pid,
+		TaskPid: s.hpid,
 	}, nil
 }

 func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Shutdown")
+	span, _ := trace(s.rootCtx, "Shutdown")

 	start := time.Now()
 	defer func() {
@@ -911,7 +898,7 @@ func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ *
 	}
 	s.mu.Unlock()

-	span.Finish()
+	span.End()
 	katautils.StopTracing(s.ctx)

 	s.cancel()
@@ -924,8 +911,8 @@ func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ *
 }

 func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (_ *taskAPI.StatsResponse, err error) {
-	span, _ := trace(s.ctx, "Stats")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Stats")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -941,7 +928,7 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (_ *taskAP
 		return nil, err
 	}

-	data, err := marshalMetrics(s, c.id)
+	data, err := marshalMetrics(spanCtx, s, c.id)
 	if err != nil {
 		return nil, err
 	}
@@ -953,8 +940,8 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (_ *taskAP

 // Update a running container
 func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.ctx, "Update")
-	defer span.Finish()
+	span, spanCtx := trace(s.rootCtx, "Update")
+	defer span.End()

 	start := time.Now()
 	defer func() {
@@ -975,7 +962,7 @@ func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (_ *
 		return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "Invalid resources type for %s", s.id)
 	}

-	err = s.sandbox.UpdateContainer(r.ID, *resources)
+	err = s.sandbox.UpdateContainer(spanCtx, r.ID, *resources)
 	if err != nil {
 		return nil, errdefs.ToGRPC(err)
 	}
@@ -985,8 +972,8 @@ func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (_ *

 // Wait for a process to exit
 func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (_ *taskAPI.WaitResponse, err error) {
-	span, _ := trace(s.ctx, "Wait")
-	defer span.Finish()
+	span, _ := trace(s.rootCtx, "Wait")
+	defer span.End()

 	var ret uint32

--- a/Show More
+++ b/Show More