Merge pull request #2257 from fidencio/wip/runtime-option-fix

shim-v2: Be compatible with the old runtime options
2026-02-22 14:54:23 +00:00 · 2021-07-19 08:05:01 +02:00 · 2021-07-18 00:07:57 +02:00 · 2021-07-15 00:59:45 +02:00 · 2021-07-15 00:03:11 +02:00 · 2021-07-14 21:56:53 +02:00
1958 changed files with 180767 additions and 48795 deletions
--- a/.github/workflows/require-pr-porting-labels.yaml
+++ b/.github/workflows/require-pr-porting-labels.yaml
@@ -12,6 +12,9 @@ on:
      - reopened
      - labeled
      - unlabeled
+   pull_request:
+     branches:
+      - main

 jobs:
  check-pr-porting-labels:
--- a/.github/workflows/snap-release.yaml
+++ b/.github/workflows/snap-release.yaml
@@ -9,6 +9,8 @@ jobs:
    steps:
      - name: Check out Git repository
        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0

      - name: Install Snapcraft
        uses: samuelmeuli/action-snapcraft@v1
@@ -33,5 +35,5 @@ jobs:
          snap_file="kata-containers_${snap_version}_amd64.snap"
          # Upload the snap if it exists
          if [ -f ${snap_file} ]; then
-            snapcraft upload --release=candidate ${snap_file}
+            snapcraft upload --release=stable ${snap_file}
          fi
--- a/.github/workflows/snap.yaml
+++ b/.github/workflows/snap.yaml
@@ -6,6 +6,8 @@ jobs:
    steps:
      - name: Check out
        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0

      - name: Install Snapcraft
        uses: samuelmeuli/action-snapcraft@v1
--- a/.github/workflows/static-checks.yaml
+++ b/.github/workflows/static-checks.yaml
@@ -1,10 +1,18 @@
-on: ["pull_request"]
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - labeled
+      - unlabeled
+
 name: Static checks
 jobs:
  test:
+    if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
    strategy:
      matrix:
-        go-version: [1.13.x, 1.14.x, 1.15.x]
+        go-version: [1.15.x, 1.16.x]
        os: [ubuntu-20.04]
    runs-on: ${{ matrix.os }}
    env:
@@ -13,7 +21,7 @@ jobs:
      TRAVIS_PULL_REQUEST_BRANCH: ${{ github.head_ref }}
      TRAVIS_PULL_REQUEST_SHA : ${{ github.event.pull_request.head.sha }}
      RUST_BACKTRACE: "1"
-      target_branch: ${TRAVIS_BRANCH}
+      target_branch: ${{ github.base_ref }}
    steps:
    - name: Install Go
      uses: actions/setup-go@v2
@@ -51,6 +59,10 @@ jobs:
        PATH=$PATH:"$HOME/.cargo/bin"
        rustup target add x86_64-unknown-linux-musl
        rustup component add rustfmt clippy
+    # Check whether the vendored code is up-to-date & working as the first thing
+    - name: Check vendored code
+      run: |
+        cd ${GOPATH}/src/github.com/${{ github.repository }} && make vendor
    # Must build before static checks as we depend on some generated code in runtime and agent
    - name: Build
      run: |
--- a/2
+++ b/2
@@ -15,7 +15,7 @@ TOOLS =

 TOOLS += agent-ctl

-STANDARD_TARGETS = build check clean install test
+STANDARD_TARGETS = build check clean install test vendor

 include utils.mk

--- a/2
+++ b/2
@@ -1 +1 @@
-2.1.0-rc0
+2.2.0-alpha1
--- a/ci/install_yq.sh
+++ b/ci/install_yq.sh
@@ -18,7 +18,9 @@ function install_yq() {
 	GOPATH=${GOPATH:-${HOME}/go}
 	local yq_path="${GOPATH}/bin/yq"
 	local yq_pkg="github.com/mikefarah/yq"
-	[ -x  "${GOPATH}/bin/yq" ] && return
+	local yq_version=3.4.1
+
+	[ -x  "${GOPATH}/bin/yq" ] && [ "`${GOPATH}/bin/yq --version`"X == "yq version ${yq_version}"X ] && return

 	read -r -a sysInfo <<< "$(uname -sm)"

@@ -56,8 +58,6 @@ function install_yq() {
 		die "Please install curl"
 	fi

-	local yq_version=3.4.1
-
 	## NOTE: ${var,,} => gives lowercase value of var
 	local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
 	curl -o "${yq_path}" -LSsf "${yq_url}"
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -5,7 +5,7 @@

 export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
 export tests_repo_dir="$GOPATH/src/$tests_repo"
-export branch="${branch:-main}"
+export branch="${target_branch:-main}"

 # Clones the tests repository and checkout to the branch pointed out by
 # the global $branch variable.
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -37,6 +37,7 @@
  - [Set up a debug console](#set-up-a-debug-console)
    - [Simple debug console setup](#simple-debug-console-setup)
      - [Enable agent debug console](#enable-agent-debug-console)
+      - [Start `kata-monitor` - ONLY NEEDED FOR 2.0.x](#start-kata-monitor---only-needed-for-20x)
      - [Connect to debug console](#connect-to-debug-console)
    - [Traditional debug console setup](#traditional-debug-console-setup)
      - [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
@@ -304,7 +305,7 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `f
 > - You should only do this step if you are testing with the latest version of the agent.

 ```
-$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
+$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/usr/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
 $ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
 $ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
 ```
@@ -353,12 +354,13 @@ You MUST choose one of `alpine`, `centos`, `clearlinux`, `euleros`, and `fedora`
 >
 > - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.

-Optionally, add your custom agent binary to the rootfs with the following, `LIBC` default is `musl`, if `ARCH` is `ppc64le`, should set the `LIBC=gnu` and `ARCH=powerpc64le`:
+Optionally, add your custom agent binary to the rootfs with the following commands. The default `$LIBC` used
+is `musl`, but on ppc64le and s390x, `gnu` should be used. Also, Rust refers to ppc64le as `powerpc64le`:
 ```
-$ export ARCH=$(shell uname -m)
-$ [ ${ARCH} == "ppc64le" ] && export LIBC=gnu || export LIBC=musl
+$ export ARCH=$(uname -m)
+$ [ ${ARCH} == "ppc64le" ] || [ ${ARCH} == "s390x" ] && export LIBC=gnu || export LIBC=musl
 $ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le
-$ sudo install -o root -g root -m 0550 -T ../../../src/agent/target/$(ARCH)-unknown-linux-$(LIBC)/release/kata-agent ${ROOTFS_DIR}/sbin/init
+$ sudo install -o root -g root -m 0550 -T ../../../src/agent/target/${ARCH}-unknown-linux-${LIBC}/release/kata-agent ${ROOTFS_DIR}/sbin/init
 ```

 ### Build an initrd image
@@ -393,14 +395,40 @@ You may choose to manually build your VMM/hypervisor.
 Kata Containers makes use of upstream QEMU branch. The exact version
 and repository utilized can be found by looking at the [versions file](../versions.yaml).

-Kata often utilizes patches for not-yet-upstream fixes for components,
-including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches)
+Find the correct version of QEMU from the versions file:
+```
+$ source ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/lib.sh
+$ qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version")
+$ echo ${qemu_version}
+```
+Get source from the matching branch of QEMU:
+```
+$ go get -d github.com/qemu/qemu
+$ cd ${GOPATH}/src/github.com/qemu/qemu
+$ git checkout ${qemu_version}
+$ your_qemu_directory=${GOPATH}/src/github.com/qemu/qemu
+```
+
+There are scripts to manage the build and packaging of QEMU. For the examples below, set your
+environment as:
+```
+$ go get -d github.com/kata-containers/kata-containers
+$ packaging_dir="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging"
+```
+
+Kata often utilizes patches for not-yet-upstream and/or backported fixes for components,
+including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches),
+and it's *recommended* that you apply them. For example, suppose that you are going to build QEMU
+version 5.2.0, do:
+```
+$ cd $your_qemu_directory
+$ $packaging_dir/scripts/apply_patches.sh $packaging_dir/qemu/patches/5.2.x/
+```

 To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
 ```
-$ go get -d github.com/kata-containers/kata-containers/tools/packaging
 $ cd $your_qemu_directory
-$ ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
+$ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
 $ eval ./configure "$(cat kata.cfg)"
 $ make -j $(nproc)
 $ sudo -E make install
@@ -475,6 +503,16 @@ debug_console_enabled = true

 This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as kernel parameters, and sandboxes created using this parameters will start a shell in guest if new connection is accept from VSOCK.

+#### Start `kata-monitor` - ONLY NEEDED FOR 2.0.x
+
+For Kata Containers `2.0.x` releases, the `kata-runtime exec` command depends on the`kata-monitor` running, in order to get the sandbox's `vsock` address to connect to. Thus, first start the `kata-monitor` process.
+
+```
+$ sudo kata-monitor
+```
+
+`kata-monitor` will serve at `localhost:8090` by default.
+
 #### Connect to debug console

 Command `kata-runtime exec` is used to connect to the debug console.
@@ -619,7 +657,7 @@ VMM solution.

 In case of cloud-hypervisor, connect to the `vsock` as shown:
 ```
-$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
+$ sudo su -c 'cd /var/run/vc/vm/${sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
 CONNECT 1026
 ```

@@ -627,7 +665,7 @@ CONNECT 1026

 For firecracker, connect to the `hvsock` as shown:
 ```
-$ sudo su -c 'cd /var/run/vc/firecracker/{sandbox_id}/root/ && socat stdin unix-connect:kata.hvsock'
+$ sudo su -c 'cd /var/run/vc/firecracker/${sandbox_id}/root/ && socat stdin unix-connect:kata.hvsock'
 CONNECT 1026
 ```

@@ -636,7 +674,7 @@ CONNECT 1026

 For QEMU, connect to the `vsock` as shown:
 ```
-$ sudo su -c 'cd /var/run/vc/vm/{sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"
+$ sudo su -c 'cd /var/run/vc/vm/${sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"'
 ```

 To disconnect from the virtual machine, type `CONTROL+q` (hold down the
--- a/docs/Documentation-Requirements.md
+++ b/docs/Documentation-Requirements.md
@@ -25,7 +25,7 @@ All documents must:
 - Have a `.md` file extension.
 - Include a TOC (table of contents) at the top of the document with links to
  all heading sections. We recommend using the
-  [`kata-check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
+  [`kata-check-markdown`](https://github.com/kata-containers/tests/tree/main/cmd/check-markdown)
  tool to generate the TOC.
 - Be linked to from another document in the same repository.

@@ -118,7 +118,7 @@ This section lists requirements for displaying commands and command output.
 The requirements must be adhered to since documentation containing code blocks
 is validated by the CI system, which executes the command blocks with the help
 of the
-[doc-to-script](https://github.com/kata-containers/tests/tree/master/.ci/kata-doc-to-script.sh)
+[doc-to-script](https://github.com/kata-containers/tests/tree/main/.ci/kata-doc-to-script.sh)
 utility.

 - If a document includes commands the user should run, they **MUST** be shown
@@ -202,7 +202,7 @@ and compare them with standard tools (e.g. `diff(1)`).

 Since this project uses a number of terms not found in conventional
 dictionaries, we have a
-[spell checking tool](https://github.com/kata-containers/tests/tree/master/cmd/check-spelling)
+[spell checking tool](https://github.com/kata-containers/tests/tree/main/cmd/check-spelling)
 that checks both dictionary words and the additional terms we use.

 Run the spell checking tool on your document before raising a PR to ensure it
--- a/docs/Limitations.md
+++ b/docs/Limitations.md
@@ -28,7 +28,7 @@
 * [Appendices](#appendices)
    * [The constraints challenge](#the-constraints-challenge)

---
+***

 # Overview

@@ -94,7 +94,9 @@ This section lists items that might be possible to fix.
 ### checkpoint and restore

 The runtime does not provide `checkpoint` and `restore` commands. There
-are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
+are discussions about using VM save and restore to give us a
+`[criu](https://github.com/checkpoint-restore/criu)`-like functionality,
+which might provide a solution.

 Note that the OCI standard does not specify `checkpoint` and `restore`
 commands.
--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -29,6 +29,7 @@

 ## Release Process

+
 ### Bump all Kata repositories

  Bump the repositories using a script in the Kata packaging repo, where:
@@ -41,6 +42,23 @@
  $ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
  ```

+### Point tests repository to stable branch
+
+  If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then
+  you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch.
+  The objective is that changes in the CI on the main branch will not impact the stable branch.
+
+  In the test directory, change references the main branch in:
+  * `README.md`
+  * `versions.yaml`
+  * `cmd/github-labels/labels.yaml.in`
+  * `cmd/pmemctl/pmemctl.sh`
+  * `.ci/lib.sh`
+  * `.ci/static-checks.sh`
+
+  See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes.
+
+
 ### Merge all bump version Pull requests

  - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
@@ -50,7 +68,7 @@
 ### Tag all Kata repositories

  Once all the pull requests to bump versions in all Kata repositories are merged,
-  tag all the repositories as shown below.  
+  tag all the repositories as shown below.
  ```
  $ cd ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/release
  $ git checkout  <kata-branch-to-release>
@@ -60,7 +78,7 @@

 ### Check Git-hub Actions

-  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
+  We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/main/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository.

  Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).

--- a/docs/Stable-Branch-Strategy.md
+++ b/docs/Stable-Branch-Strategy.md
@@ -32,16 +32,16 @@ provides additional information regarding release `99.123.77` in the previous ex
  changing the existing behavior*.

 - When `MAJOR` increases, the new release adds **new features, bug fixes, or
-  both** and which *changes the behavior from the previous release* (incompatible with previous releases).
+  both** and which **changes the behavior from the previous release** (incompatible with previous releases).

  A major release will also likely require a change of the container manager version used, 
-for example Docker\*. Please refer to the release notes for further details.
+for example Containerd or CRI-O. Please refer to the release notes for further details.

 ## Release Strategy

 Any new features added since the last release will be available in the next minor
 release. These will include bug fixes as well. To facilitate a stable user environment, 
-Kata provides stable branch-based releases and a master branch release.
+Kata provides stable branch-based releases and a main branch release.

 ## Stable branch patch criteria

@@ -49,9 +49,10 @@ No new features should be introduced to stable branches.  This is intended to li
 providing only bug and security fixes.

 ## Branch Management
-Kata Containers will maintain two stable release branches in addition to the master branch.
-Once a new MAJOR or MINOR release is created from master, a new stable branch is created for
-the prior MAJOR or MINOR release and the older stable branch is no longer maintained. End of
+Kata Containers will maintain **one** stable release branch, in addition to the main branch, for
+each active major release.
+Once a new MAJOR or MINOR release is created from main, a new stable branch is created for
+the prior MAJOR or MINOR release and the previous stable branch is no longer maintained. End of
 maintenance for a branch is announced on the Kata Containers mailing list.  Users can determine
 the version currently installed by running `kata-runtime kata-env`. It is recommended to use the
 latest stable branch available.
@@ -61,59 +62,59 @@ A couple of examples follow to help clarify this process.
 ### New bug fix introduced

 A bug fix is submitted against the runtime which does not introduce new inter-component dependencies.
-This fix is applied to both the master and stable branches, and there is no need to create a new
+This fix is applied to both the main and stable branches, and there is no need to create a new
 stable branch.

 | Branch | Original version | New version |
 |--|--|--|
-| `master` | `1.3.0-rc0` | `1.3.0-rc1` |
-| `stable-1.2` | `1.2.0` | `1.2.1` |
-| `stable-1.1` | `1.1.2` | `1.1.3` |
+| `main` | `2.3.0-rc0` | `2.3.0-rc1` |
+| `stable-2.2` | `2.2.0` | `2.2.1` |
+| `stable-2.1` | (unmaintained) | (unmaintained) |


 ### New release made feature or change adding new inter-component dependency

 A new feature is introduced, which adds a new inter-component dependency. In this case a new stable
-branch is created (stable-1.3) starting from master and the older stable branch (stable-1.1)
+branch is created (stable-2.3) starting from main and the previous stable branch (stable-2.2)
 is dropped from maintenance.


 | Branch | Original version | New version |
 |--|--|--|
-| `master` | `1.3.0-rc1` | `1.3.0` |
-| `stable-1.3` | N/A| `1.3.0` |
-| `stable-1.2` | `1.2.1` | `1.2.2` |
-| `stable-1.1` | `1.1.3` | (unmaintained) |
+| `main` | `2.3.0-rc1` | `2.3.0` |
+| `stable-2.3` | N/A| `2.3.0` |
+| `stable-2.2` | `2.2.1` | (unmaintained) |
+| `stable-2.1` | (unmaintained) | (unmaintained) |

-Note, the stable-1.1 branch will still exist with tag 1.1.3, but under current plans it is
-not maintained further. The next tag applied to master will be 1.4.0-alpha0. We would then
+Note, the stable-2.2 branch will still exist with tag 2.2.1, but under current plans it is
+not maintained further. The next tag applied to main will be 2.4.0-alpha0. We would then
 create a couple of alpha releases gathering features targeted for that particular release (in
-this case 1.4.0), followed by a release candidate. The release candidate marks a feature freeze.
+this case 2.4.0), followed by a release candidate. The release candidate marks a feature freeze.
 A new stable branch is created for the release candidate. Only bug fixes and any security issues
-are added to the branch going forward until release 1.4.0 is made.
+are added to the branch going forward until release 2.4.0 is made.
   
 ## Backporting Process 

-Development that occurs against the master branch and applicable code commits should also be submitted
+Development that occurs against the main branch and applicable code commits should also be submitted
 against the stable branches. Some guidelines for this process follow::
  1. Only bug and security fixes which do not introduce inter-component dependencies are
 candidates for stable branches. These PRs should be marked with "bug" in GitHub.
-  2. Once a PR is created against master which meets requirement of (1), a comparable one
+  2. Once a PR is created against main which meets requirement of (1), a comparable one
 should also be submitted against the stable branches. It is the responsibility of the submitter
 to apply their pull request against stable, and it is the responsibility of the
 reviewers to help identify stable-candidate pull requests.
 
 ## Continuous Integration Testing

-The test repository is forked to create stable branches from master. Full CI
-runs on each stable and master PR using its respective tests repository branch.
+The test repository is forked to create stable branches from main. Full CI
+runs on each stable and main PR using its respective tests repository branch.

 ### An alternative method for CI testing:

-Ideally, the continuous integration infrastructure will run the same test suite on both master
+Ideally, the continuous integration infrastructure will run the same test suite on both main
 and the stable branches.  When tests are modified or new feature tests are introduced, explicit
 logic should exist within the testing CI to make sure only applicable tests are executed against
-stable and master. While this is not in place currently, it should be considered in the long term.
+stable and main. While this is not in place currently, it should be considered in the long term.

 ## Release Management

@@ -121,7 +122,7 @@ stable and master. While this is not in place currently, it should be considered

 Releases are made every three weeks, which include a GitHub release as
 well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
-for the next `MAJOR` or `MINOR` is created from master. If there are no changes across all the repositories, no
+for the next `MAJOR` or `MINOR` is created from main. If there are no changes across all the repositories, no
 release is created and an announcement is made on the developer mailing list to highlight this.
 If a release is being made, each repository is tagged for this release, regardless
 of whether changes are introduced. The release schedule can be seen on the
@@ -142,10 +143,10 @@ maturity, we have increased the cadence from six weeks to twelve weeks. The rele
 ### Compatibility
 Kata guarantees compatibility between components that are within one minor release of each other. 
 
-This is critical for dependencies which cross between host (runtime, shim, proxy) and
+This is critical for dependencies which cross between host (shimv2 runtime) and
 the guest (hypervisor, rootfs and agent).  For example, consider a cluster with a long-running
-deployment, workload-never-dies, all on Kata version 1.1.3 components. If the operator updates
-the Kata components to the next new minor release (i.e. 1.2.0), we need to guarantee that the 1.2.0
-runtime still communicates with 1.1.3 agent within workload-never-dies.
+deployment, workload-never-dies, all on Kata version 2.1.3 components. If the operator updates
+the Kata components to the next new minor release (i.e. 2.2.0), we need to guarantee that the 2.2.0
+shimv2 runtime still communicates with 2.1.3 agent within workload-never-dies.

 Handling live-update is out of the scope of this document. See this [`kata-runtime` issue](https://github.com/kata-containers/runtime/issues/492) for details.
--- a/docs/design/README.md
+++ b/docs/design/README.md
@@ -8,4 +8,5 @@ Kata Containers design documents:
 - [VSocks](VSocks.md)
 - [VCPU handling](vcpu-handling.md)
 - [Host cgroups](host-cgroups.md)
+- [`Inotify` support](inotify.md)
 - [Metrics(Kata 2.0)](kata-2-0-metrics.md)
--- a/docs/design/arch-images/inotify-workaround.png
+++ b/docs/design/arch-images/inotify-workaround.png
--- a/docs/design/inotify.md
+++ b/docs/design/inotify.md
@@ -0,0 +1,30 @@
+# Kata Containers support for `inotify`
+
+## Background on `inotify` usage
+
+A common pattern in Kubernetes is to watch for changes to files/directories passed in as `ConfigMaps`
+or `Secrets`. Sidecar's normally use `inotify` to watch for changes and then signal the primary container to reload
+the updated configuration. Kata Containers typically will pass these host files into the guest using `virtiofs`, which
+does not support `inotify` today. While we work to enable this use case in `virtiofs`, we introduced a workaround in Kata Containers.
+This document describes how Kata Containers implements this workaround.
+
+### Detecting a `watchable` mount
+
+Kubernetes creates `secrets` and `ConfigMap` mounts at very specific locations on the host filesystem. For container mounts,
+the `Kata Containers` runtime will check the source of the mount to identify these special cases. For these use cases, only a single file
+or very few would typically need to be watched. To avoid excessive overheads in making a mount watchable,
+we enforce a limit of eight files per mount. If a `secret` or `ConfigMap` mount contains more than 8 files, it will not be
+considered watchable. We similarly enforce a limit of 1 MB per mount to be considered watchable. Non-watchable mounts will
+continue to propagate changes from the mount on the host to the container workload, but these updates will not trigger an
+`inotify` event.
+
+If at any point a mount grows beyond the eight file or 1MB limit, it will no longer be `watchable.`
+
+### Presenting a `watchable` mount to the workload
+
+For mounts that are considered `watchable`, inside the guest, the `kata-agent` will poll the mount presented from
+the host through `virtiofs` and copy any changed files to a `tmpfs` mount that is presented to the container. In this way,
+for `watchable` mounts, Kata will do the polling on behalf of the workload and existing workloads needn't change their usage
+of `inotify`.
+
+![drawing](arch-images/inotify-workaround.png)
--- a/docs/how-to/README.md
+++ b/docs/how-to/README.md
@@ -37,3 +37,4 @@
 - [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
 - [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
 - [How to monitor Kata Containers in K8s](how-to-set-prometheus-in-k8s.md)
+- [How to use hotplug memory on arm64 in Kata Containers](how-to-hotplug-memory-arm64.md)
--- a/docs/how-to/how-to-hotplug-memory-arm64.md
+++ b/docs/how-to/how-to-hotplug-memory-arm64.md
@@ -0,0 +1,32 @@
+# How to use memory hotplug feature in Kata Containers on arm64
+
+- [Introduction](#introduction)
+- [Install UEFI ROM](#install-uefi-rom)
+- [Run for test](#run-for-test)
+
+## Introduction
+
+Memory hotplug is a key feature for containers to allocate memory dynamically in deployment.
+As Kata Container bases on VM, this feature needs support both from VMM and guest kernel. Luckily, it has been fully supported for the current default version of QEMU and guest kernel used by Kata on arm64. For other VMMs, e.g, Cloud Hypervisor, the enablement work is on the road. Apart from VMM and guest kernel, memory hotplug also depends on ACPI which depends on firmware either. On x86, you can boot a VM using QEMU with ACPI enabled directly, because it boots up with firmware implicitly. For arm64, however, you need specify firmware explicitly. That is to say, if you are ready to run a normal Kata Container on arm64, what you need extra to do is to install the UEFI ROM before use the memory hotplug feature.
+
+## Install UEFI ROM
+
+We have offered a helper script for you to install the UEFI ROM. If you have installed Kata normally on your host, you just need to run the script as fellows:
+
+```bash
+$ pushd $GOPATH/src/github.com/kata-containers/tests
+$ sudo .ci/aarch64/install_rom_aarch64.sh
+$ popd
+```
+
+## Run for test
+
+Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test.
+
+```bash
+$ sudo ctr image pull docker.io/library/ubuntu:latest
+$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
+$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --memory-limit 536870912 --rm docker.io/library/ubuntu:latest hello sh -c "free -h"
+```
+
+Compare the results between the two tests. If the latter is 0.5G larger than the former, you have done what you want, and congratulation!
--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -26,6 +26,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
 | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
 | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
+| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process |

 ## Agent Options
 | Key | Value Type | Comments |
@@ -78,7 +79,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.kernel` | string | the kernel used to boot the container VM |
 | `io.katacontainers.config.hypervisor.machine_accelerators` | string | machine specific accelerators for the hypervisor |
 | `io.katacontainers.config.hypervisor.machine_type` | string | the type of machine being emulated by the hypervisor |
-| `io.katacontainers.config.hypervisor.memory_offset` | uint32| the memory space used for `nvdimm` device by the hypervisor |
+| `io.katacontainers.config.hypervisor.memory_offset` | uint64| the memory space used for `nvdimm` device by the hypervisor |
 | `io.katacontainers.config.hypervisor.memory_slots` | uint32| the memory slots assigned to the VM by the hypervisor |
 | `io.katacontainers.config.hypervisor.msize_9p` | uint32 | the `msize` for 9p shares |
 | `io.katacontainers.config.hypervisor.path` | string | the hypervisor that will run the container VM |
--- a/docs/how-to/how-to-use-sysctls-with-kata.md
+++ b/docs/how-to/how-to-use-sysctls-with-kata.md
@@ -16,11 +16,10 @@ To get a complete list of kernel parameters, run:
 $ sudo sysctl -a
 ```

-Both Docker and Kubernetes provide mechanisms for setting namespaced sysctls. 
-Namespaced sysctls can be set per pod in the case of Kubernetes or per container
-in case of Docker.
+Kubernetes provide mechanisms for setting namespaced sysctls. 
+Namespaced sysctls can be set per pod in the case of Kubernetes.
 The following sysctls are known to be namespaced and can be set with 
-Docker and Kubernetes:
+Kubernetes:

 - `kernel.shm*`
 - `kernel.msg*`
@@ -30,31 +29,10 @@ Docker and Kubernetes:

 ### Namespaced Sysctls:

-Kata Containers supports setting namespaced sysctls with Docker and Kubernetes.
+Kata Containers supports setting namespaced sysctls with Kubernetes.
 All namespaced sysctls can be set in the same way as regular Linux based
 containers, the difference being, in the case of Kata they are set inside the guest.

-#### Setting Namespaced Sysctls with Docker:
-
-```
-$ sudo docker run --runtime=kata-runtime -it alpine cat /proc/sys/fs/mqueue/queues_max
-256
-$ sudo docker run --runtime=kata-runtime --sysctl fs.mqueue.queues_max=512 -it alpine cat /proc/sys/fs/mqueue/queues_max
-512
-```
-
-... and:
-
-```
-$ sudo docker run --runtime=kata-runtime -it alpine cat /proc/sys/kernel/shmmax
-18446744073692774399
-$ sudo docker run --runtime=kata-runtime --sysctl kernel.shmmax=1024 -it alpine cat /proc/sys/kernel/shmmax
-1024
-```
-
-For additional documentation on setting sysctls with Docker please refer to [Docker-sysctl-doc](https://docs.docker.com/engine/reference/commandline/run/#configure-namespaced-kernel-parameters-sysctls-at-runtime).
-
-
 #### Setting Namespaced Sysctls with Kubernetes:

 Kubernetes considers certain sysctls as safe and others as unsafe. For detailed
@@ -100,7 +78,7 @@ spec:

 ### Non-Namespaced Sysctls:

-Docker and Kubernetes disallow sysctls without a namespace.
+Kubernetes disallow sysctls without a namespace.
 The recommendation is to set them directly on the host or use a privileged
 container in the case of Kubernetes.

--- a/docs/how-to/how-to-use-virtio-fs-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-fs-with-kata.md
@@ -9,4 +9,4 @@ Container deployments utilize explicit or implicit file sharing between host fil

 As of the 2.0 release of Kata Containers, [virtio-fs](https://virtio-fs.gitlab.io/) is the default filesystem sharing mechanism.

-virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
+virtio-fs support works out of the box for `cloud-hypervisor` and `qemu`, when Kata Containers is deployed using `kata-deploy`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy#kubernetes-quick-start).
--- a/docs/how-to/how-to-use-virtio-mem-with-kata.md
+++ b/docs/how-to/how-to-use-virtio-mem-with-kata.md
@@ -13,26 +13,23 @@ Kata Containers with `virtio-mem` supports memory resize.

 ## Requisites

-Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
-The Linux kernel and QEMU upstream version still not support `virtio-mem`.  @davidhildenbrand is working on them.
-Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
+Kata Containers just supports `virtio-mem` with QEMU.
+Install and setup Kata Containers as shown [here](../install/README.md).

-The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
-The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
-
-The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem.  Its base version of QEMU cannot work with Kata Containers.  So merge the commit of `virtio-mem` to upstream QEMU.)
-
-Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
-```toml
-[hypervisor.qemu]
-path = "qemu-dir"
-kernel = "vmlinux-dir"
+### With x86_64
+The `virtio-mem` config of the x86_64 Kata Linux kernel is open.
+Enable `virtio-mem` as follows:
+```
+$ sudo sed -i -e 's/^#enable_virtio_mem.*$/enable_virtio_mem = true/g' /etc/kata-containers/configuration.toml
 ```

-Enable `virtio-mem` with following line in the Kata Containers configuration:
-```toml
-enable_virtio_mem = true
+### With other architectures
+The `virtio-mem` config of the others Kata Linux kernel is not open.
+You can open `virtio-mem` config as follows:
 ```
+CONFIG_VIRTIO_MEM=y
+```
+Then you can build and install the guest kernel image as shown [here](../../tools/packaging/kernel/README.md#build-kata-containers-kernel).

 ## Run a Kata Container utilizing `virtio-mem`

@@ -41,13 +38,35 @@ Use following command to enable memory overcommitment of a Linux kernel.  Becaus
 $ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
 ```

-Use following command start a Kata Container.
+Use following command to start a Kata Container.
 ```
-$ docker run --rm -it --runtime=kata --name test busybox
+$ pod_yaml=pod.yaml
+$ container_yaml=${REPORT_DIR}/container.yaml
+$ image="quay.io/prometheus/busybox:latest"
+$ cat << EOF > "${pod_yaml}"
+metadata:
+  name: busybox-sandbox1
+EOF
+$ cat << EOF > "${container_yaml}"
+metadata:
+  name: busybox-killed-vmm
+image:
+  image: "$image"
+command:
+- top
+EOF
+$ sudo crictl pull $image
+$ podid=$(sudo crictl runp $pod_yaml)
+$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
+$ sudo crictl start $cid
 ```

-Use following command set the memory size of test to default_memory + 512m.
+Use the following command to set the container memory limit to 2g and the memory size of the VM to its default_memory + 2g.
 ```
-$ docker update -m 512m --memory-swap -1 test
+$ sudo crictl update --memory $((2*1024*1024*1024)) $cid
 ```

+Use the following command to set the container memory limit to 1g and the memory size of the VM to its default_memory + 1g.
+```
+$ sudo crictl update --memory $((1*1024*1024*1024)) $cid
+```
--- a/docs/how-to/run-kata-with-k8s.md
+++ b/docs/how-to/run-kata-with-k8s.md
@@ -171,10 +171,10 @@ $ sudo systemctl daemon-reload
 $ sudo systemctl restart kubelet

 # If using CRI-O
-$ sudo kubeadm init --skip-preflight-checks --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
+$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16

 # If using CRI-containerd
-$ sudo kubeadm init --skip-preflight-checks --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
+$ sudo kubeadm init --ignore-preflight-errors=all --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16

 $ export KUBECONFIG=/etc/kubernetes/admin.conf
 ```
--- a/docs/install/minikube-installation-guide.md
+++ b/docs/install/minikube-installation-guide.md
@@ -18,7 +18,7 @@
 cluster locally. It creates a single node Kubernetes stack in a local VM.

 [Kata Containers](https://github.com/kata-containers) can be installed into a Minikube cluster using
-[`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
+[`kata-deploy`](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy).

 This document details the pre-requisites, installation steps, and how to check
 the installation has been successful.
@@ -135,7 +135,7 @@ $ kubectl apply -f kata-deploy/base/kata-deploy.yaml
 This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
 a few minutes for the operation to complete. You can check the installation has worked by checking
 the status of the `kata-deploy` pod, which will be executing
-[this script](https://github.com/kata-containers/packaging/blob/master/kata-deploy/scripts/kata-deploy.sh),
+[this script](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy/scripts/kata-deploy.sh),
 and will be executing a `sleep infinity` once it has successfully completed its work.
 You can accomplish this by running the following:

--- a/docs/install/snap-installation-guide.md
+++ b/docs/install/snap-installation-guide.md
@@ -14,7 +14,7 @@ Kata Containers can be installed in any Linux distribution that supports
 Run the following command to install **Kata Containers**:

 ```sh
-$ sudo snap install kata-containers --candidate --classic
+$ sudo snap install kata-containers --stable --classic
 ```

 ## Configure Kata Containers
--- a/docs/use-cases/Intel-GPU-passthrough-and-Kata.md
+++ b/docs/use-cases/Intel-GPU-passthrough-and-Kata.md
@@ -65,8 +65,8 @@ configuration in the Kata `configuration.toml` file as shown below.
 $ sudo sed -i -e 's/^# *\(hotplug_vfio_on_root_bus\).*=.*$/\1 = true/g' /usr/share/defaults/kata-containers/configuration.toml
 ```

-Make sure you are using the `pc` machine type by verifying `machine_type = "pc"` is
-set in the `configuration.toml`.
+Make sure you are using the `q35` machine type by verifying `machine_type = "q35"` is
+set in the `configuration.toml`. Make sure `pcie_root_port` is set to a positive value.

 ## Build Kata Containers kernel with GPU support

--- a/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md
+++ b/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md
@@ -75,13 +75,6 @@ To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata vers
 Follow the [Kata Containers setup instructions](../install/README.md)
 to install the latest version of Kata.

-The following configuration in the Kata `configuration.toml` file as shown below can work:
-```
-machine_type = "pc"
-
-hotplug_vfio_on_root_bus = true
-```
-
 To use large BARs devices (for example, Nvidia Tesla P100), you need Kata version 1.11.0 or above.

 The following configuration in the Kata `configuration.toml` file as shown below can work:
--- a/docs/use-cases/using-Intel-QAT-and-kata.md
+++ b/docs/use-cases/using-Intel-QAT-and-kata.md
@@ -74,7 +74,7 @@ Make sure to check [`01.org`](https://01.org/intel-quickassist-technology) for
 the latest driver.

 ```bash
-$ export QAT_DRIVER_VER=qat1.7.l.4.12.0-00011.tar.gz
+$ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz
 $ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
 $ export QAT_CONF_LOCATION=~/QAT_conf
 $ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile
@@ -402,7 +402,7 @@ different hypervisor, different install method for Kata, or a different
 Intel® QAT chipset then the command will need to be modified. 

 > **Note: The following was tested with 
-[containerd v1.3.9](https://github.com/containerd/containerd/releases/tag/v1.3.9).**
+[containerd v1.4.6](https://github.com/containerd/containerd/releases/tag/v1.4.6).**

 ```bash
 $ config_file="/opt/kata/share/defaults/kata-containers/configuration-qemu.toml"
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -80,6 +80,8 @@ parts:
      - uidmap
      - gnupg2
    override-build: |
+      [ "$(uname -m)" = "ppc64le" ] || [ "$(uname -m)" = "s390x" ] && sudo apt-get --no-install-recommends install -y protobuf-compiler
+
      yq=${SNAPCRAFT_STAGE}/yq

      # set GOPATH
@@ -88,6 +90,7 @@ parts:

      export GOROOT=${SNAPCRAFT_STAGE}
      export PATH="${GOROOT}/bin:${PATH}"
+      export GO111MODULE="auto"

      http_proxy=${http_proxy:-""}
      https_proxy=${https_proxy:-""}
@@ -117,9 +120,13 @@ parts:
      export USE_DOCKER=1
      export DEBUG=1
      case "$(uname -m)" in
-        aarch64|ppc64le|s390x)
+        aarch64)
          sudo -E PATH=$PATH make initrd DISTRO=alpine
        ;;
+        ppc64le|s390x)
+          # Cannot use alpine on ppc64le/s390x because it would require a musl agent
+          sudo -E PATH=$PATH make initrd DISTRO=ubuntu
+        ;;
        x86_64)
          # In some build systems it's impossible to build a rootfs image, try with the initrd image
          sudo -E PATH=$PATH make image DISTRO=clearlinux || sudo -E PATH=$PATH make initrd DISTRO=alpine
@@ -141,6 +148,7 @@ parts:
      export GOPATH=${SNAPCRAFT_STAGE}/gopath
      export GOROOT=${SNAPCRAFT_STAGE}
      export PATH="${GOROOT}/bin:${PATH}"
+      export GO111MODULE="auto"
      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}

      cd ${kata_dir}/src/runtime
@@ -180,7 +188,10 @@ parts:
      - bison
      - flex
    override-build: |
+      [ "$(uname -m)" = "s390x" ] && sudo apt-get --no-install-recommends install -y libssl-dev
+
      export GOPATH=${SNAPCRAFT_STAGE}/gopath
+      export GO111MODULE="auto"
      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}

      cd ${kata_dir}/tools/packaging/kernel
@@ -202,8 +213,10 @@ parts:
      ln -sf ${vmlinuz_name} ${kata_kernel_dir}/vmlinuz.container

      # Install raw kernel
+      vmlinux_path=vmlinux
+      [ "$(uname -m)" = "s390x" ] && vmlinux_path=arch/s390/boot/compressed/vmlinux
      vmlinux_name=vmlinux-${kernel_suffix}
-      cp vmlinux ${kata_kernel_dir}/${vmlinux_name}
+      cp ${vmlinux_path} ${kata_kernel_dir}/${vmlinux_name}
      ln -sf ${vmlinux_name} ${kata_kernel_dir}/vmlinux.container

  qemu:
@@ -227,11 +240,13 @@ parts:
      - libblkid-dev
      - libffi-dev
      - libmount-dev
+      - libseccomp-dev
      - libselinux1-dev
      - ninja-build
    override-build: |
      yq=${SNAPCRAFT_STAGE}/yq
      export GOPATH=${SNAPCRAFT_STAGE}/gopath
+      export GO111MODULE="auto"
      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}

      versions_file="${kata_dir}/versions.yaml"
@@ -267,7 +282,7 @@ parts:
      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"

      # Only x86_64 supports libpmem
-      [ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev libseccomp-dev
+      [ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev

      configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
      chmod +x ${configure_hypervisor}
--- a/src/agent/.gitignore
+++ b/src/agent/.gitignore
@@ -1 +1,2 @@
 tarpaulin-report.html
+vendor/
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -13,20 +13,25 @@ lazy_static = "1.3.0"
 ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
 protobuf = "=2.14.0"
 libc = "0.2.58"
-nix = "0.17.0"
-prctl = "1.0.0"
+nix = "0.21.0"
+capctl = "0.2.0"
 serde_json = "1.0.39"
 scan_fmt = "0.2.3"
 scopeguard = "1.0.0"
 regex = "1"

+# Async helpers
 async-trait = "0.1.42"
-tokio = { version = "1.2.0", features = ["rt", "rt-multi-thread", "sync", "macros", "io-util", "time", "signal", "io-std", "process", "fs"] }
+async-recursion = "0.3.2"
 futures = "0.3.12"
-netlink-sys = { version = "0.6.0", features = ["tokio_socket",]}
-tokio-vsock = "0.3.0"
-rtnetlink = "0.7.0"
-netlink-packet-utils = "0.4.0"
+
+# Async runtime
+tokio = { version = "1.2.0", features = ["full"] }
+tokio-vsock = "0.3.1"
+
+netlink-sys = { version = "0.7.0", features = ["tokio_socket",]}
+rtnetlink = "0.8.0"
+netlink-packet-utils = "0.4.1"
 ipnetwork = "0.17.0"

 # slog:
@@ -40,13 +45,21 @@ slog-scope = "4.1.2"
 slog-stdlog = "4.0.0"
 log = "0.4.11"

-# for testing
-tempfile = "3.1.0"
 prometheus = { version = "0.9.0", features = ["process"] }
 procfs = "0.7.9"
 anyhow = "1.0.32"
 cgroups = { package = "cgroups-rs", version = "0.2.5" }

+# Tracing
+tracing = "0.1.26"
+tracing-subscriber = "0.2.18"
+tracing-opentelemetry = "0.13.0"
+opentelemetry = "0.14.0"
+vsock-exporter = { path = "vsock-exporter" }
+
+[dev-dependencies]
+tempfile = "3.1.0"
+
 [workspace]
 members = [
    "oci",
--- a/src/agent/Makefile
+++ b/src/agent/Makefile
@@ -27,40 +27,7 @@ COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown)
 # Exported to allow cargo to see it
 export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION))

-##VAR BUILD_TYPE=release|debug type of rust build
-BUILD_TYPE = release
-
-##VAR ARCH=arch target to build (format: uname -m)
-ARCH = $(shell uname -m)
-##VAR LIBC=musl|gnu
-LIBC ?= musl
-ifneq ($(LIBC),musl)
-    ifeq ($(LIBC),gnu)
-        override LIBC = gnu
-    else
-        $(error "ERROR: A non supported LIBC value was passed. Supported values are musl and gnu")
-    endif
-endif
-
-ifeq ($(ARCH), ppc64le)
-    override ARCH = powerpc64le
-    override LIBC = gnu
-    $(warning "WARNING: powerpc64le-unknown-linux-musl target is unavailable")
-endif
-
-ifeq ($(ARCH), s390x)
-    override LIBC = gnu
-    $(warning "WARNING: s390x-unknown-linux-musl target is unavailable")
-endif
-
-
-EXTRA_RUSTFLAGS :=
-ifeq ($(ARCH), aarch64)
-    override EXTRA_RUSTFLAGS = -C link-arg=-lgcc
-    $(warning "WARNING: aarch64-musl needs extra symbols from libgcc")
-endif
-
-TRIPLE = $(ARCH)-unknown-linux-$(LIBC)
+include ../../utils.mk

 TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)

@@ -154,6 +121,10 @@ clean:
 	@rm -f $(GENERATED_FILES)
 	@rm -f tarpaulin-report.html

+vendor:
+	@cargo vendor
+
+
 #TARGET test: run cargo tests
 test:
 	@cargo test --all --target $(TRIPLE)
@@ -223,7 +194,8 @@ codecov-html: check_tarpaulin
 	help \
 	show-header \
 	show-summary \
-	optimize
+	optimize \
+	vendor

 ##TARGET generate-protocols: generate/update grpc agent protocols
 generate-protocols:
--- a/src/agent/kata-agent.service.in
+++ b/src/agent/kata-agent.service.in
@@ -15,7 +15,7 @@ Wants=kata-containers.target
 StandardOutput=tty
 Type=simple
 ExecStart=@BINDIR@/@AGENT_NAME@
-LimitNOFILE=infinity
+LimitNOFILE=1048576
 # ExecStop is required for static agent tracing; in all other scenarios
 # the runtime handles shutting down the VM.
 ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -11,9 +11,9 @@ serde_derive = "1.0.91"
 oci = { path = "../oci" }
 protocols = { path ="../protocols" }
 caps = "0.5.0"
-nix = "0.17.0"
+nix = "0.21.0"
 scopeguard = "1.0.0"
-prctl = "1.0.0"
+capctl = "0.2.0"
 lazy_static = "1.3.0"
 libc = "0.2.58"
 protobuf = "=2.14.0"
@@ -24,7 +24,6 @@ regex = "1.1"
 path-absolutize = "1.2.0"
 anyhow = "1.0.32"
 cgroups = { package = "cgroups-rs", version = "0.2.5" }
-tempfile = "3.1.0"
 rlimit = "0.5.3"

 tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
@@ -34,3 +33,4 @@ inotify = "0.9.2"

 [dev-dependencies]
 serial_test = "0.5.0"
+tempfile = "3.1.0"
--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -349,14 +349,34 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        mem_controller.set_kmem_limit(-1)?;
    }

-    set_resource!(mem_controller, set_limit, memory, limit);
-    set_resource!(mem_controller, set_soft_limit, memory, reservation);
-    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
-    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+    // If the memory update is set to -1 we should also
+    // set swap to -1, it means unlimited memory.
+    let mut swap = memory.swap.unwrap_or(0);
+    if memory.limit == Some(-1) {
+        swap = -1;
+    }

-    if let Some(swap) = memory.swap {
-        // set memory swap
-        let swap = if cg.v2() {
+    if memory.limit.is_some() && swap != 0 {
+        let memstat = get_memory_stats(cg)
+            .into_option()
+            .ok_or_else(|| anyhow!("failed to get the cgroup memory stats"))?;
+        let memusage = memstat.get_usage();
+
+        // When update memory limit, the kernel would check the current memory limit
+        // set against the new swap setting, if the current memory limit is large than
+        // the new swap, then set limit first, otherwise the kernel would complain and
+        // refused to set; on the other hand, if the current memory limit is smaller than
+        // the new swap, then we should set the swap first and then set the memor limit.
+        if swap == -1 || memusage.get_limit() < swap as u64 {
+            mem_controller.set_memswap_limit(swap)?;
+            set_resource!(mem_controller, set_limit, memory, limit);
+        } else {
+            set_resource!(mem_controller, set_limit, memory, limit);
+            mem_controller.set_memswap_limit(swap)?;
+        }
+    } else {
+        set_resource!(mem_controller, set_limit, memory, limit);
+        swap = if cg.v2() {
            convert_memory_swap_to_v2_value(swap, memory.limit.unwrap_or(0))?
        } else {
            swap
@@ -366,6 +386,10 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
        }
    }

+    set_resource!(mem_controller, set_soft_limit, memory, reservation);
+    set_resource!(mem_controller, set_kmem_limit, memory, kernel);
+    set_resource!(mem_controller, set_tcp_limit, memory, kernel_tcp);
+
    if let Some(swappiness) = memory.swappiness {
        if (0..=100).contains(&swappiness) {
            mem_controller.set_swappiness(swappiness as u64)?;
@@ -899,12 +923,12 @@ pub fn get_mounts() -> Result<HashMap<String, String>> {
    let paths = get_paths()?;

    for l in fs::read_to_string(MOUNTS)?.lines() {
-        let p: Vec<&str> = l.split(" - ").collect();
+        let p: Vec<&str> = l.splitn(2, " - ").collect();
        let pre: Vec<&str> = p[0].split(' ').collect();
        let post: Vec<&str> = p[1].split(' ').collect();

        if post.len() != 3 {
-            warn!(sl!(), "mountinfo corrupted!");
+            warn!(sl!(), "can't parse {} line {:?}", MOUNTS, l);
            continue;
        }

--- a/src/agent/rustjail/src/cgroups/notifier.rs
+++ b/src/agent/rustjail/src/cgroups/notifier.rs
@@ -139,19 +139,6 @@ async fn notify_on_oom(cid: &str, dir: String) -> Result<Receiver<String>> {
    register_memory_event(cid, dir, "memory.oom_control", "").await
 }

-// level is one of "low", "medium", or "critical"
-async fn notify_memory_pressure(cid: &str, dir: String, level: &str) -> Result<Receiver<String>> {
-    if dir.is_empty() {
-        return Err(anyhow!("memory controller missing"));
-    }
-
-    if level != "low" && level != "medium" && level != "critical" {
-        return Err(anyhow!("invalid pressure level {}", level));
-    }
-
-    register_memory_event(cid, dir, "memory.pressure_level", level).await
-}
-
 async fn register_memory_event(
    cid: &str,
    cg_dir: String,
--- a/src/agent/rustjail/src/configs/device.rs
+++ b/src/agent/rustjail/src/configs/device.rs
@@ -1,56 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use libc::*;
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Device {
-    #[serde(default)]
-    r#type: char,
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    major: i64,
-    #[serde(default)]
-    minor: i64,
-    #[serde(default)]
-    permissions: String,
-    #[serde(default)]
-    file_mode: mode_t,
-    #[serde(default)]
-    uid: i32,
-    #[serde(default)]
-    gid: i32,
-    #[serde(default)]
-    allow: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct BlockIODevice {
-    #[serde(default)]
-    major: i64,
-    #[serde(default)]
-    minor: i64,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct WeightDevice {
-    block: BlockIODevice,
-    #[serde(default)]
-    weight: u16,
-    #[serde(default, rename = "leafWeight")]
-    leaf_weight: u16,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct ThrottleDevice {
-    block: BlockIODevice,
-    #[serde(default)]
-    rate: u64,
-}
--- a/src/agent/rustjail/src/configs/mod.rs
+++ b/src/agent/rustjail/src/configs/mod.rs
@@ -1,372 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-use protocols::oci::State as OCIState;
-
-use std::collections::HashMap;
-use std::fmt;
-use std::path::PathBuf;
-use std::time::Duration;
-
-use nix::unistd;
-
-use self::device::{Device, ThrottleDevice, WeightDevice};
-use self::namespaces::Namespaces;
-use crate::specconv::CreateOpts;
-
-pub mod device;
-pub mod namespaces;
-pub mod validator;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Rlimit {
-    #[serde(default)]
-    r#type: i32,
-    #[serde(default)]
-    hard: i32,
-    #[serde(default)]
-    soft: i32,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IDMap {
-    #[serde(default)]
-    container_id: i32,
-    #[serde(default)]
-    host_id: i32,
-    #[serde(default)]
-    size: i32,
-}
-
-type Action = i32;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Seccomp {
-    #[serde(default)]
-    default_action: Action,
-    #[serde(default)]
-    architectures: Vec<String>,
-    #[serde(default)]
-    flags: Vec<String>,
-    #[serde(default)]
-    syscalls: Vec<Syscall>,
-}
-
-type Operator = i32;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Arg {
-    #[serde(default)]
-    index: u32,
-    #[serde(default)]
-    value: u64,
-    #[serde(default)]
-    value_two: u64,
-    #[serde(default)]
-    op: Operator,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Syscall {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    names: String,
-    #[serde(default)]
-    action: Action,
-    #[serde(default, rename = "errnoRet")]
-    errno_ret: u32,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    args: Vec<Arg>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Config<'a> {
-    #[serde(default)]
-    no_pivot_root: bool,
-    #[serde(default)]
-    parent_death_signal: i32,
-    #[serde(default)]
-    rootfs: String,
-    #[serde(default)]
-    readonlyfs: bool,
-    #[serde(default, rename = "rootPropagation")]
-    root_propagation: i32,
-    #[serde(default)]
-    mounts: Vec<Mount>,
-    #[serde(default)]
-    devices: Vec<Device>,
-    #[serde(default)]
-    mount_label: String,
-    #[serde(default)]
-    hostname: String,
-    #[serde(default)]
-    namespaces: Namespaces,
-    #[serde(default)]
-    capabilities: Option<Capabilities>,
-    #[serde(default)]
-    networks: Vec<Network>,
-    #[serde(default)]
-    routes: Vec<Route>,
-    #[serde(default)]
-    cgroups: Option<Cgroup<'a>>,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    apparmor_profile: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    process_label: String,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    rlimits: Vec<Rlimit>,
-    #[serde(default)]
-    oom_score_adj: Option<i32>,
-    #[serde(default)]
-    uid_mappings: Vec<IDMap>,
-    #[serde(default)]
-    gid_mappings: Vec<IDMap>,
-    #[serde(default)]
-    mask_paths: Vec<String>,
-    #[serde(default)]
-    readonly_paths: Vec<String>,
-    #[serde(default)]
-    sysctl: HashMap<String, String>,
-    #[serde(default)]
-    seccomp: Option<Seccomp>,
-    #[serde(default)]
-    no_new_privileges: bool,
-    hooks: Option<Hooks>,
-    #[serde(default)]
-    version: String,
-    #[serde(default)]
-    labels: Vec<String>,
-    #[serde(default)]
-    no_new_keyring: bool,
-    #[serde(default)]
-    intel_rdt: Option<IntelRdt>,
-    #[serde(default)]
-    rootless_euid: bool,
-    #[serde(default)]
-    rootless_cgroups: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Hooks {
-    prestart: Vec<Box<Hook>>,
-    poststart: Vec<Box<Hook>>,
-    poststop: Vec<Box<Hook>>,
-}
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Capabilities {
-    bounding: Vec<String>,
-    effective: Vec<String>,
-    inheritable: Vec<String>,
-    permitted: Vec<String>,
-    ambient: Vec<String>,
-}
-
-pub trait Hook {
-    fn run(&self, state: &OCIState) -> Result<()>;
-}
-
-pub struct FuncHook {
-    // run: fn(&OCIState) -> Result<()>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Command {
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    args: Vec<String>,
-    #[serde(default)]
-    env: Vec<String>,
-    #[serde(default)]
-    dir: String,
-    #[serde(default)]
-    timeout: Duration,
-}
-
-pub struct CommandHook {
-    command: Command,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Mount {
-    #[serde(default)]
-    source: String,
-    #[serde(default)]
-    destination: String,
-    #[serde(default)]
-    device: String,
-    #[serde(default)]
-    flags: i32,
-    #[serde(default)]
-    propagation_flags: Vec<i32>,
-    #[serde(default)]
-    data: String,
-    #[serde(default)]
-    relabel: String,
-    #[serde(default)]
-    extensions: i32,
-    #[serde(default)]
-    premount_cmds: Vec<Command>,
-    #[serde(default)]
-    postmount_cmds: Vec<Command>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct HugepageLimit {
-    #[serde(default)]
-    page_size: String,
-    #[serde(default)]
-    limit: u64,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IntelRdt {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    l3_cache_schema: String,
-    #[serde(
-        default,
-        rename = "memBwSchema",
-        skip_serializing_if = "String::is_empty"
-    )]
-    mem_bw_schema: String,
-}
-
-pub type FreezerState = String;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Cgroup<'a> {
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    name: String,
-    #[serde(default, skip_serializing_if = "String::is_empty")]
-    parent: String,
-    #[serde(default)]
-    path: String,
-    #[serde(default)]
-    scope_prefix: String,
-    paths: HashMap<String, String>,
-    resource: &'a Resources<'a>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Resources<'a> {
-    #[serde(default)]
-    allow_all_devices: bool,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    allowed_devices: Vec<&'a Device>,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    denied_devices: Vec<&'a Device>,
-    #[serde(default)]
-    devices: Vec<&'a Device>,
-    #[serde(default)]
-    memory: i64,
-    #[serde(default)]
-    memory_reservation: i64,
-    #[serde(default)]
-    memory_swap: i64,
-    #[serde(default)]
-    kernel_memory: i64,
-    #[serde(default)]
-    kernel_memory_tcp: i64,
-    #[serde(default)]
-    cpu_shares: u64,
-    #[serde(default)]
-    cpu_quota: i64,
-    #[serde(default)]
-    cpu_period: u64,
-    #[serde(default)]
-    cpu_rt_quota: i64,
-    #[serde(default)]
-    cpu_rt_period: u64,
-    #[serde(default)]
-    cpuset_cpus: String,
-    #[serde(default)]
-    cpuset_mems: String,
-    #[serde(default)]
-    pids_limit: i64,
-    #[serde(default)]
-    blkio_weight: u64,
-    #[serde(default)]
-    blkio_leaf_weight: u64,
-    #[serde(default)]
-    blkio_weight_device: Vec<&'a WeightDevice>,
-    #[serde(default)]
-    blkio_throttle_read_bps_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_write_bps_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_read_iops_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    blkio_throttle_write_iops_device: Vec<&'a ThrottleDevice>,
-    #[serde(default)]
-    freezer: FreezerState,
-    #[serde(default)]
-    hugetlb_limit: Vec<&'a HugepageLimit>,
-    #[serde(default)]
-    oom_kill_disable: bool,
-    #[serde(default)]
-    memory_swapiness: u64,
-    #[serde(default)]
-    net_prio_ifpriomap: Vec<&'a IfPrioMap>,
-    #[serde(default)]
-    net_cls_classid_u: u32,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Network {
-    #[serde(default)]
-    r#type: String,
-    #[serde(default)]
-    name: String,
-    #[serde(default)]
-    bridge: String,
-    #[serde(default)]
-    mac_address: String,
-    #[serde(default)]
-    address: String,
-    #[serde(default)]
-    gateway: String,
-    #[serde(default)]
-    ipv6_address: String,
-    #[serde(default)]
-    ipv6_gateway: String,
-    #[serde(default)]
-    mtu: i32,
-    #[serde(default)]
-    txqueuelen: i32,
-    #[serde(default)]
-    host_interface_name: String,
-    #[serde(default)]
-    hairpin_mode: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Route {
-    #[serde(default)]
-    destination: String,
-    #[serde(default)]
-    source: String,
-    #[serde(default)]
-    gateway: String,
-    #[serde(default)]
-    interface_name: String,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IfPrioMap {
-    #[serde(default)]
-    interface: String,
-    #[serde(default)]
-    priority: i32,
-}
-
-impl IfPrioMap {
-    fn cgroup_string(&self) -> String {
-        format!("{} {}", self.interface, self.priority)
-    }
-}
--- a/src/agent/rustjail/src/configs/namespaces.rs
+++ b/src/agent/rustjail/src/configs/namespaces.rs
@@ -1,46 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use serde;
-#[macro_use]
-use serde_derive;
-use serde_json;
-
-use std::collections::HashMap;
-#[macro_use]
-use lazy_static;
-
-pub type NamespaceType = String;
-pub type Namespaces = Vec<Namespace>;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct Namespace {
-    #[serde(default)]
-    r#type: NamespaceType,
-    #[serde(default)]
-    path: String,
-}
-
-pub const NEWNET: &'static str = "NEWNET";
-pub const NEWPID: &'static str = "NEWPID";
-pub const NEWNS: &'static str = "NEWNS";
-pub const NEWUTS: &'static str = "NEWUTS";
-pub const NEWUSER: &'static str = "NEWUSER";
-pub const NEWCGROUP: &'static str = "NEWCGROUP";
-pub const NEWIPC: &'static str = "NEWIPC";
-
-lazy_static! {
-    static ref TYPETONAME: HashMap<&'static str, &'static str> = {
-        let mut m = HashMap::new();
-        m.insert("pid", "pid");
-        m.insert("network", "net");
-        m.insert("mount", "mnt");
-        m.insert("user", "user");
-        m.insert("uts", "uts");
-        m.insert("ipc", "ipc");
-        m.insert("cgroup", "cgroup");
-        m
-    };
-}
--- a/src/agent/rustjail/src/configs/validator.rs
+++ b/src/agent/rustjail/src/configs/validator.rs
@@ -1,23 +0,0 @@
-// Copyright (c) 2019 Ant Financial
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-use crate::configs::Config;
-use std::io::Result;
-
-pub trait Validator {
-    fn validate(&self, config: &Config) -> Result<()> {
-        Ok(())
-    }
-}
-
-pub struct ConfigValidator {}
-
-impl Validator for ConfigValidator {}
-
-impl ConfigValidator {
-    fn new() -> Self {
-        ConfigValidator {}
-    }
-}
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
@@ -8,7 +8,7 @@ use libc::pid_t;
 use oci::{ContainerState, LinuxDevice, LinuxIdMapping};
 use oci::{Hook, Linux, LinuxNamespace, LinuxResources, Spec};
 use std::clone::Clone;
-use std::ffi::{CStr, CString};
+use std::ffi::CString;
 use std::fmt::Display;
 use std::fs;
 use std::os::unix::io::RawFd;
@@ -48,6 +48,7 @@ use oci::State as OCIState;
 use std::collections::HashMap;
 use std::os::unix::io::FromRawFd;
 use std::str::FromStr;
+use std::sync::Arc;

 use slog::{info, o, Logger};

@@ -57,13 +58,11 @@ use crate::sync_with_async::{read_async, write_async};
 use async_trait::async_trait;
 use rlimit::{setrlimit, Resource, Rlim};
 use tokio::io::AsyncBufReadExt;
+use tokio::sync::Mutex;

 use crate::utils;

-const STATE_FILENAME: &str = "state.json";
 const EXEC_FIFO_FILENAME: &str = "exec.fifo";
-const VER_MARKER: &str = "1.2.5";
-const PID_NS_PATH: &str = "/proc/self/ns/pid";

 const INIT: &str = "INIT";
 const NO_PIVOT: &str = "NO_PIVOT";
@@ -92,10 +91,6 @@ impl ContainerStatus {
        self.cur_status
    }

-    fn pre_status(&self) -> ContainerState {
-        self.pre_status
-    }
-
    fn transition(&mut self, to: ContainerState) {
        self.pre_status = self.status();
        self.cur_status = to;
@@ -106,6 +101,9 @@ pub type Config = CreateOpts;
 type NamespaceType = String;

 lazy_static! {
+    // This locker ensures the child exit signal will be received by the right receiver.
+    pub static ref WAIT_PID_LOCKER: Arc<Mutex<bool>> = Arc::new(Mutex::new(false));
+
    static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
        let mut m = HashMap::new();
        m.insert("user", CloneFlags::CLONE_NEWUSER);
@@ -341,7 +339,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        Err(_e) => sched::unshare(CloneFlags::CLONE_NEWPID)?,
    }

-    match fork() {
+    match unsafe { fork() } {
        Ok(ForkResult::Parent { child, .. }) => {
            log_child!(
                cfd_log,
@@ -464,7 +462,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
    // Ref: https://github.com/opencontainers/runc/commit/50a19c6ff828c58e5dab13830bd3dacde268afe5
    //
    if !nses.is_empty() {
-        prctl::set_dumpable(false)
+        capctl::prctl::set_dumpable(false)
            .map_err(|e| anyhow!(e).context("set process non-dumpable failed"))?;
    }

@@ -540,7 +538,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        // notify parent to run prestart hooks
        write_sync(cwfd, SYNC_SUCCESS, "")?;
        // wait parent run prestart hooks
-        let _ = read_sync(crfd)?;
+        read_sync(crfd)?;
    }

    if mount_fd != -1 {
@@ -597,7 +595,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {

    // NoNewPeiviledges, Drop capabilities
    if oci_process.no_new_privileges {
-        prctl::set_no_new_privileges(true).map_err(|_| anyhow!("cannot set no new privileges"))?;
+        capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
    }

    if oci_process.capabilities.is_some() {
@@ -607,8 +605,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {

    if init {
        // notify parent to run poststart hooks
-        // cfd is closed when return from join_namespaces
-        // should retunr cfile instead of cfd?
        write_sync(cwfd, SYNC_SUCCESS, "")?;
    }

@@ -817,7 +813,7 @@ impl BaseContainer for LinuxContainer {
            if stat::stat(fifo_file.as_str()).is_ok() {
                return Err(anyhow!("exec fifo exists"));
            }
-            unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o622).unwrap())?;
+            unistd::mkfifo(fifo_file.as_str(), Mode::from_bits(0o644).unwrap())?;

            fifofd = fcntl::open(
                fifo_file.as_str(),
@@ -1076,9 +1072,8 @@ fn do_exec(args: &[String]) -> ! {
        .iter()
        .map(|s| CString::new(s.to_string()).unwrap_or_default())
        .collect();
-    let a: Vec<&CStr> = sa.iter().map(|s| s.as_c_str()).collect();

-    let _ = unistd::execvp(p.as_c_str(), a.as_slice()).map_err(|e| match e {
+    let _ = unistd::execvp(p.as_c_str(), &sa).map_err(|e| match e {
        nix::Error::Sys(errno) => {
            std::process::exit(errno as i32);
        }
@@ -1251,7 +1246,7 @@ async fn join_namespaces(

    if p.init {
        info!(logger, "notify child parent ready to run prestart hook!");
-        let _ = read_async(pipe_r).await?;
+        read_async(pipe_r).await?;

        info!(logger, "get ready to run prestart hook!");

@@ -1311,7 +1306,7 @@ fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIdMapping]) -> Resul

 fn setid(uid: Uid, gid: Gid) -> Result<()> {
    // set uid/gid
-    prctl::set_keep_capabilities(true)
+    capctl::prctl::set_keepcaps(true)
        .map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;

    {
@@ -1325,7 +1320,7 @@ fn setid(uid: Uid, gid: Gid) -> Result<()> {
        capabilities::reset_effective()?;
    }

-    prctl::set_keep_capabilities(false)
+    capctl::prctl::set_keepcaps(false)
        .map_err(|e| anyhow!(e).context("set keep capabilities returned"))?;

    Ok(())
@@ -1399,18 +1394,8 @@ impl LinuxContainer {
            logger: logger.new(o!("module" => "rustjail", "subsystem" => "container", "cid" => id)),
        })
    }
-
-    fn load<T: Into<String>>(_id: T, _base: T) -> Result<Self> {
-        Err(anyhow!("not supported"))
-    }
 }

-// Handle the differing rlimit types for different targets
-#[cfg(target_env = "musl")]
-type RlimitsType = libc::c_int;
-#[cfg(target_env = "gnu")]
-type RlimitsType = libc::__rlimit_resource_t;
-
 fn setgroups(grps: &[libc::gid_t]) -> Result<()> {
    let ret = unsafe { libc::setgroups(grps.len(), grps.as_ptr() as *const libc::gid_t) };
    Errno::result(ret).map(drop)?;
@@ -1467,6 +1452,8 @@ async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
        })
        .collect();

+    // Avoid the exit signal to be reaped by the global reaper.
+    let _wait_locker = WAIT_PID_LOCKER.lock().await;
    let mut child = tokio::process::Command::new(path)
        .args(args.iter())
        .envs(env.iter())
@@ -1550,6 +1537,7 @@ mod tests {
    use std::os::unix::fs::MetadataExt;
    use std::os::unix::io::AsRawFd;
    use tempfile::tempdir;
+    use tokio::process::Command;

    macro_rules! sl {
        () => {
@@ -1557,12 +1545,27 @@ mod tests {
        };
    }

+    async fn which(cmd: &str) -> String {
+        let output: std::process::Output = Command::new("which")
+            .arg(cmd)
+            .output()
+            .await
+            .expect("which command failed to run");
+
+        match String::from_utf8(output.stdout) {
+            Ok(v) => v.trim_end_matches('\n').to_string(),
+            Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
+        }
+    }
+
    #[tokio::test]
    async fn test_execute_hook() {
+        let xargs = which("xargs").await;
+
        execute_hook(
            &slog_scope::logger(),
            &Hook {
-                path: "/usr/bin/xargs".to_string(),
+                path: xargs,
                args: vec![],
                env: vec![],
                timeout: None,
@@ -1582,10 +1585,12 @@ mod tests {

    #[tokio::test]
    async fn test_execute_hook_with_timeout() {
+        let sleep = which("sleep").await;
+
        let res = execute_hook(
            &slog_scope::logger(),
            &Hook {
-                path: "/usr/bin/sleep".to_string(),
+                path: sleep,
                args: vec!["2".to_string()],
                env: vec![],
                timeout: Some(1),
@@ -1622,7 +1627,7 @@ mod tests {
            let pre_status = status.status();
            status.transition(*s);

-            assert_eq!(pre_status, status.pre_status());
+            assert_eq!(pre_status, status.pre_status);
        }
    }

--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -3,15 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-// #![allow(unused_attributes)]
-// #![allow(unused_imports)]
-// #![allow(unused_variables)]
-// #![allow(unused_mut)]
-#![allow(dead_code)]
-// #![allow(deprecated)]
-// #![allow(unused_must_use)]
 #![allow(non_upper_case_globals)]
-// #![allow(unused_comparisons)]
 #[macro_use]
 #[cfg(test)]
 extern crate serial_test;
@@ -23,7 +15,7 @@ extern crate caps;
 extern crate protocols;
 #[macro_use]
 extern crate scopeguard;
-extern crate prctl;
+extern crate capctl;
 #[macro_use]
 extern crate lazy_static;
 extern crate libc;
@@ -47,16 +39,6 @@ pub mod sync;
 pub mod sync_with_async;
 pub mod utils;
 pub mod validator;
-// pub mod factory;
-//pub mod configs;
-// pub mod devices;
-// pub mod init;
-// pub mod rootfs;
-// pub mod capabilities;
-// pub mod console;
-// pub mod stats;
-// pub mod user;
-//pub mod intelrdt;

 use std::collections::HashMap;

@@ -474,10 +456,6 @@ fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux {
    }
 }

-fn linux_oci_to_grpc(_l: &oci::Linux) -> grpc::Linux {
-    grpc::Linux::default()
-}
-
 pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
    // process
    let process = if grpc.Process.is_some() {
@@ -533,7 +511,6 @@ pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {

 #[cfg(test)]
 mod tests {
-    #[allow(unused_macros)]
    #[macro_export]
    macro_rules! skip_if_not_root {
        () => {
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -62,58 +62,56 @@ const PROC_SUPER_MAGIC: libc::c_uint = 0x00009fa0;
 lazy_static! {
    static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
        let mut m = HashMap::new();
-        m.insert("shared", MsFlags::MS_SHARED);
-        m.insert("rshared", MsFlags::MS_SHARED | MsFlags::MS_REC);
        m.insert("private", MsFlags::MS_PRIVATE);
        m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
-        m.insert("slave", MsFlags::MS_SLAVE);
+        m.insert("rshared", MsFlags::MS_SHARED | MsFlags::MS_REC);
        m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
+        m.insert("runbindable", MsFlags::MS_UNBINDABLE | MsFlags::MS_REC);
+        m.insert("shared", MsFlags::MS_SHARED);
+        m.insert("slave", MsFlags::MS_SLAVE);
+        m.insert("unbindable", MsFlags::MS_UNBINDABLE);
        m
    };
    static ref OPTIONS: HashMap<&'static str, (bool, MsFlags)> = {
        let mut m = HashMap::new();
+        m.insert("acl", (false, MsFlags::MS_POSIXACL));
+        m.insert("async", (true, MsFlags::MS_SYNCHRONOUS));
+        m.insert("atime", (true, MsFlags::MS_NOATIME));
+        m.insert("bind", (false, MsFlags::MS_BIND));
        m.insert("defaults", (false, MsFlags::empty()));
+        m.insert("dev", (true, MsFlags::MS_NODEV));
+        m.insert("diratime", (true, MsFlags::MS_NODIRATIME));
+        m.insert("dirsync", (false, MsFlags::MS_DIRSYNC));
+        m.insert("exec", (true, MsFlags::MS_NOEXEC));
+        m.insert("iversion", (false, MsFlags::MS_I_VERSION));
+        m.insert("lazytime", (false, MsFlags::MS_LAZYTIME));
+        m.insert("loud", (true, MsFlags::MS_SILENT));
+        m.insert("mand", (false, MsFlags::MS_MANDLOCK));
+        m.insert("noacl", (true, MsFlags::MS_POSIXACL));
+        m.insert("noatime", (false, MsFlags::MS_NOATIME));
+        m.insert("nodev", (false, MsFlags::MS_NODEV));
+        m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME));
+        m.insert("noexec", (false, MsFlags::MS_NOEXEC));
+        m.insert("noiversion", (true, MsFlags::MS_I_VERSION));
+        m.insert("nolazytime", (true, MsFlags::MS_LAZYTIME));
+        m.insert("nomand", (true, MsFlags::MS_MANDLOCK));
+        m.insert("norelatime", (true, MsFlags::MS_RELATIME));
+        m.insert("nostrictatime", (true, MsFlags::MS_STRICTATIME));
+        m.insert("nosuid", (false, MsFlags::MS_NOSUID));
+        m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC));
+        m.insert("relatime", (false, MsFlags::MS_RELATIME));
+        m.insert("remount", (false, MsFlags::MS_REMOUNT));
        m.insert("ro", (false, MsFlags::MS_RDONLY));
        m.insert("rw", (true, MsFlags::MS_RDONLY));
-        m.insert("suid", (true, MsFlags::MS_NOSUID));
-        m.insert("nosuid", (false, MsFlags::MS_NOSUID));
-        m.insert("dev", (true, MsFlags::MS_NODEV));
-        m.insert("nodev", (false, MsFlags::MS_NODEV));
-        m.insert("exec", (true, MsFlags::MS_NOEXEC));
-        m.insert("noexec", (false, MsFlags::MS_NOEXEC));
-        m.insert("sync", (false, MsFlags::MS_SYNCHRONOUS));
-        m.insert("async", (true, MsFlags::MS_SYNCHRONOUS));
-        m.insert("dirsync", (false, MsFlags::MS_DIRSYNC));
-        m.insert("remount", (false, MsFlags::MS_REMOUNT));
-        m.insert("mand", (false, MsFlags::MS_MANDLOCK));
-        m.insert("nomand", (true, MsFlags::MS_MANDLOCK));
-        m.insert("atime", (true, MsFlags::MS_NOATIME));
-        m.insert("noatime", (false, MsFlags::MS_NOATIME));
-        m.insert("diratime", (true, MsFlags::MS_NODIRATIME));
-        m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME));
-        m.insert("bind", (false, MsFlags::MS_BIND));
-        m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC));
-        m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE));
-        m.insert(
-            "runbindable",
-            (false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC),
-        );
-        m.insert("private", (false, MsFlags::MS_PRIVATE));
-        m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC));
-        m.insert("shared", (false, MsFlags::MS_SHARED));
-        m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC));
-        m.insert("slave", (false, MsFlags::MS_SLAVE));
-        m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC));
-        m.insert("relatime", (false, MsFlags::MS_RELATIME));
-        m.insert("norelatime", (true, MsFlags::MS_RELATIME));
+        m.insert("silent", (false, MsFlags::MS_SILENT));
        m.insert("strictatime", (false, MsFlags::MS_STRICTATIME));
-        m.insert("nostrictatime", (true, MsFlags::MS_STRICTATIME));
+        m.insert("suid", (true, MsFlags::MS_NOSUID));
+        m.insert("sync", (false, MsFlags::MS_SYNCHRONOUS));
        m
    };
 }

 #[inline(always)]
-#[allow(unused_variables)]
 pub fn mount<
    P1: ?Sized + NixPath,
    P2: ?Sized + NixPath,
@@ -133,7 +131,6 @@ pub fn mount<
 }

 #[inline(always)]
-#[allow(unused_variables)]
 pub fn umount2<P: ?Sized + NixPath>(
    target: &P,
    flags: MntFlags,
@@ -192,7 +189,7 @@ pub fn init_rootfs(

    let mut bind_mount_dev = false;
    for m in &spec.mounts {
-        let (mut flags, data) = parse_mount(&m);
+        let (mut flags, pgflags, data) = parse_mount(&m);
        if !m.destination.starts_with('/') || m.destination.contains("..") {
            return Err(anyhow!(
                "the mount destination {} is invalid",
@@ -234,13 +231,15 @@ pub fn init_rootfs(
            // effective.
            // first check that we have non-default options required before attempting a
            // remount
-            if m.r#type == "bind" {
-                for o in &m.options {
-                    if let Some(fl) = PROPAGATION.get(o.as_str()) {
-                        let dest = secure_join(rootfs, &m.destination);
-                        mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
-                    }
-                }
+            if m.r#type == "bind" && !pgflags.is_empty() {
+                let dest = secure_join(rootfs, &m.destination);
+                mount(
+                    None::<&str>,
+                    dest.as_str(),
+                    None::<&str>,
+                    pgflags,
+                    None::<&str>,
+                )?;
            }
        }
    }
@@ -451,7 +450,6 @@ fn mount_cgroups(
    Ok(())
 }

-#[allow(unused_variables)]
 fn pivot_root<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(
    new_root: &P1,
    put_old: &P2,
@@ -584,7 +582,6 @@ fn parse_mount_table() -> Result<Vec<Info>> {
 }

 #[inline(always)]
-#[allow(unused_variables)]
 fn chroot<P: ?Sized + NixPath>(path: &P) -> Result<(), nix::Error> {
    #[cfg(not(test))]
    return unistd::chroot(path);
@@ -657,26 +654,27 @@ pub fn ms_move_root(rootfs: &str) -> Result<bool> {
    Ok(true)
 }

-fn parse_mount(m: &Mount) -> (MsFlags, String) {
+fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
    let mut flags = MsFlags::empty();
+    let mut pgflags = MsFlags::empty();
    let mut data = Vec::new();

    for o in &m.options {
-        match OPTIONS.get(o.as_str()) {
-            Some(v) => {
-                let (clear, fl) = *v;
-                if clear {
-                    flags &= !fl;
-                } else {
-                    flags |= fl;
-                }
+        if let Some(v) = OPTIONS.get(o.as_str()) {
+            let (clear, fl) = *v;
+            if clear {
+                flags &= !fl;
+            } else {
+                flags |= fl;
            }
-
-            None => data.push(o.clone()),
+        } else if let Some(fl) = PROPAGATION.get(o.as_str()) {
+            pgflags |= *fl;
+        } else {
+            data.push(o.clone());
        }
    }

-    (flags, data.join(","))
+    (flags, pgflags, data.join(","))
 }

 // This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
@@ -922,7 +920,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> {

    for m in spec.mounts.iter() {
        if m.destination == "/dev" {
-            let (flags, _) = parse_mount(m);
+            let (flags, _, _) = parse_mount(m);
            if flags.contains(MsFlags::MS_RDONLY) {
                mount(
                    Some("/dev"),
--- a/src/agent/rustjail/src/validator.rs
+++ b/src/agent/rustjail/src/validator.rs
@@ -28,16 +28,6 @@ fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool {
    false
 }

-fn get_namespace_path(nses: &[LinuxNamespace], key: &str) -> Result<String> {
-    for ns in nses {
-        if ns.r#type.as_str() == key {
-            return Ok(ns.path.clone());
-        }
-    }
-
-    Err(einval())
-}
-
 fn rootfs(root: &str) -> Result<()> {
    let path = PathBuf::from(root);
    // not absolute path or not exists
@@ -166,31 +156,6 @@ lazy_static! {
    };
 }

-fn check_host_ns(path: &str) -> Result<()> {
-    let cpath = PathBuf::from(path);
-    let hpath = PathBuf::from("/proc/self/ns/net");
-
-    let real_hpath = hpath
-        .read_link()
-        .context(format!("read link {:?}", hpath))?;
-    let meta = cpath
-        .symlink_metadata()
-        .context(format!("symlink metadata {:?}", cpath))?;
-    let file_type = meta.file_type();
-
-    if !file_type.is_symlink() {
-        return Ok(());
-    }
-    let real_cpath = cpath
-        .read_link()
-        .context(format!("read link {:?}", cpath))?;
-    if real_cpath == real_hpath {
-        return Err(einval());
-    }
-
-    Ok(())
-}
-
 fn sysctl(oci: &Spec) -> Result<()> {
    let linux = get_linux(oci)?;

@@ -334,19 +299,6 @@ mod tests {
        assert_eq!(contain_namespace(&namespaces, ""), false);
        assert_eq!(contain_namespace(&namespaces, "Net"), false);
        assert_eq!(contain_namespace(&namespaces, "ipc"), false);
-
-        assert_eq!(
-            get_namespace_path(&namespaces, "net").unwrap(),
-            "/sys/cgroups/net"
-        );
-        assert_eq!(
-            get_namespace_path(&namespaces, "uts").unwrap(),
-            "/sys/cgroups/uts"
-        );
-
-        get_namespace_path(&namespaces, "").unwrap_err();
-        get_namespace_path(&namespaces, "Uts").unwrap_err();
-        get_namespace_path(&namespaces, "ipc").unwrap_err();
    }

    #[test]
@@ -528,12 +480,6 @@ mod tests {
        rootless_euid(&spec).unwrap();
    }

-    #[test]
-    fn test_check_host_ns() {
-        check_host_ns("/proc/self/ns/net").unwrap_err();
-        check_host_ns("/proc/sys/net/ipv4/tcp_sack").unwrap();
-    }
-
    #[test]
    fn test_sysctl() {
        let mut spec = Spec::default();
--- a/src/agent/src/ccw.rs
+++ b/src/agent/src/ccw.rs
@@ -0,0 +1,140 @@
+// Copyright (c) IBM Corp. 2021
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::fmt;
+use std::str::FromStr;
+
+use anyhow::anyhow;
+
+// CCW bus ID follow the format <xx>.<d>.<xxxx> [1, p. 11], where
+//   - <xx> is the channel subsystem ID, which is always 0 from the guest side, but different from
+//     the host side, e.g. 0xfe for virtio-*-ccw [1, p. 435],
+//   - <d> is the subchannel set ID, which ranges from 0-3 [2], and
+//   - <xxxx> is the device number (0000-ffff; leading zeroes can be omitted,
+//      e.g. 3 instead of 0003).
+// [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku4dd04.pdf
+// [2] https://qemu.readthedocs.io/en/latest/system/s390x/css.html
+
+// Maximum subchannel set ID
+const SUBCHANNEL_SET_MAX: u8 = 3;
+
+// CCW device. From the guest side, the first field is always 0 and can therefore be omitted.
+#[derive(Copy, Clone, Debug)]
+pub struct Device {
+    subchannel_set_id: u8,
+    device_number: u16,
+}
+
+impl Device {
+    pub fn new(subchannel_set_id: u8, device_number: u16) -> anyhow::Result<Self> {
+        if subchannel_set_id > SUBCHANNEL_SET_MAX {
+            return Err(anyhow!(
+                "Subchannel set ID {:?} should be in range [0..{}]",
+                subchannel_set_id,
+                SUBCHANNEL_SET_MAX
+            ));
+        }
+
+        Ok(Device {
+            subchannel_set_id,
+            device_number,
+        })
+    }
+}
+
+impl FromStr for Device {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let split: Vec<&str> = s.split('.').collect();
+        if split.len() != 3 {
+            return Err(anyhow!(
+                "Wrong bus format. It needs to be in the form 0.<d>.<xxxx>, got {:?}",
+                s
+            ));
+        }
+
+        if split[0] != "0" {
+            return Err(anyhow!(
+                "Wrong bus format. First digit needs to be 0, but is {:?}",
+                split[0]
+            ));
+        }
+
+        let subchannel_set_id = match split[1].parse::<u8>() {
+            Ok(id) => id,
+            Err(_) => {
+                return Err(anyhow!(
+                    "Wrong bus format. Second digit needs to be 0-3, but is {:?}",
+                    split[1]
+                ))
+            }
+        };
+
+        let device_number = match u16::from_str_radix(split[2], 16) {
+            Ok(id) => id,
+            Err(_) => {
+                return Err(anyhow!(
+                    "Wrong bus format. Third digit needs to be 0-ffff, but is {:?}",
+                    split[2]
+                ))
+            }
+        };
+
+        Device::new(subchannel_set_id, device_number)
+    }
+}
+
+impl fmt::Display for Device {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "0.{}.{:04x}", self.subchannel_set_id, self.device_number)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::ccw::Device;
+    use std::str::FromStr;
+
+    #[test]
+    fn test_new_device() {
+        // Valid devices
+        let device = Device::new(0, 0).unwrap();
+        assert_eq!(format!("{}", device), "0.0.0000");
+
+        let device = Device::new(3, 0xffff).unwrap();
+        assert_eq!(format!("{}", device), "0.3.ffff");
+
+        // Invalid device
+        let device = Device::new(4, 0);
+        assert!(device.is_err());
+    }
+
+    #[test]
+    fn test_device_from_str() {
+        // Valid devices
+        let device = Device::from_str("0.0.0").unwrap();
+        assert_eq!(format!("{}", device), "0.0.0000");
+
+        let device = Device::from_str("0.0.0000").unwrap();
+        assert_eq!(format!("{}", device), "0.0.0000");
+
+        let device = Device::from_str("0.3.ffff").unwrap();
+        assert_eq!(format!("{}", device), "0.3.ffff");
+
+        // Invalid devices
+        let device = Device::from_str("0.0");
+        assert!(device.is_err());
+
+        let device = Device::from_str("1.0.0");
+        assert!(device.is_err());
+
+        let device = Device::from_str("0.not_a_subchannel_set_id.0");
+        assert!(device.is_err());
+
+        let device = Device::from_str("0.0.not_a_device_number");
+        assert!(device.is_err());
+    }
+}
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
--- a/src/agent/src/console.rs
+++ b/src/agent/src/console.rs
@@ -145,9 +145,10 @@ fn run_in_child(slave_fd: libc::c_int, shell: String) -> Result<()> {
    }

    let cmd = CString::new(shell).unwrap();
+    let args: Vec<CString> = Vec::new();

    // run shell
-    let _ = unistd::execvp(cmd.as_c_str(), &[]).map_err(|e| match e {
+    let _ = unistd::execvp(cmd.as_c_str(), &args).map_err(|e| match e {
        nix::Error::Sys(errno) => {
            std::process::exit(errno as i32);
        }
@@ -205,7 +206,7 @@ async fn run_debug_console_vsock<T: AsyncRead + AsyncWrite>(

    let slave_fd = pseudo.slave;

-    match fork() {
+    match unsafe { fork() } {
        Ok(ForkResult::Child) => run_in_child(slave_fd, shell),
        Ok(ForkResult::Parent { child: child_pid }) => {
            run_in_parent(logger.clone(), stream, pseudo, child_pid).await
--- a/src/agent/src/device.rs
+++ b/src/agent/src/device.rs
@@ -14,14 +14,20 @@ use std::str::FromStr;
 use std::sync::Arc;
 use tokio::sync::Mutex;

+#[cfg(target_arch = "s390x")]
+use crate::ccw;
 use crate::linux_abi::*;
-use crate::mount::{DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE};
+use crate::mount::{
+    DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE,
+    DRIVER_SCSI_TYPE,
+};
 use crate::pci;
 use crate::sandbox::Sandbox;
 use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
 use anyhow::{anyhow, Result};
 use oci::{LinuxDeviceCgroup, LinuxResources, Spec};
 use protocols::agent::Device;
+use tracing::instrument;

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -32,17 +38,21 @@ macro_rules! sl {

 const VM_ROOTFS: &str = "/";

+#[derive(Debug)]
 struct DevIndexEntry {
    idx: usize,
    residx: Vec<usize>,
 }

+#[derive(Debug)]
 struct DevIndex(HashMap<String, DevIndexEntry>);

+#[instrument]
 pub fn rescan_pci_bus() -> Result<()> {
    online_device(SYSFS_PCI_BUS_RESCAN_FILE)
 }

+#[instrument]
 pub fn online_device(path: &str) -> Result<()> {
    fs::write(path, "1")?;
    Ok(())
@@ -51,6 +61,7 @@ pub fn online_device(path: &str) -> Result<()> {
 // pcipath_to_sysfs fetches the sysfs path for a PCI path, relative to
 // the sysfs path for the PCI host bridge, based on the PCI path
 // provided.
+#[instrument]
 fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<String> {
    let mut bus = "0000:00".to_string();
    let mut relpath = String::new();
@@ -109,6 +120,7 @@ impl UeventMatcher for ScsiBlockMatcher {
    }
 }

+#[instrument]
 pub async fn get_scsi_device_name(
    sandbox: &Arc<Mutex<Sandbox>>,
    scsi_addr: &str,
@@ -141,6 +153,7 @@ impl UeventMatcher for VirtioBlkPciMatcher {
    }
 }

+#[instrument]
 pub async fn get_virtio_blk_pci_device_name(
    sandbox: &Arc<Mutex<Sandbox>>,
    pcipath: &pci::Path,
@@ -155,6 +168,47 @@ pub async fn get_virtio_blk_pci_device_name(
    Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
 }

+#[cfg(target_arch = "s390x")]
+#[derive(Debug)]
+struct VirtioBlkCCWMatcher {
+    rex: Regex,
+}
+
+#[cfg(target_arch = "s390x")]
+impl VirtioBlkCCWMatcher {
+    fn new(root_bus_path: &str, device: &ccw::Device) -> Self {
+        let re = format!(
+            r"^{}/0\.[0-3]\.[0-9a-f]{{1,4}}/{}/virtio[0-9]+/block/",
+            root_bus_path, device
+        );
+        VirtioBlkCCWMatcher {
+            rex: Regex::new(&re).unwrap(),
+        }
+    }
+}
+
+#[cfg(target_arch = "s390x")]
+impl UeventMatcher for VirtioBlkCCWMatcher {
+    fn is_match(&self, uev: &Uevent) -> bool {
+        uev.action == "add" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty()
+    }
+}
+
+#[cfg(target_arch = "s390x")]
+#[instrument]
+pub async fn get_virtio_blk_ccw_device_name(
+    sandbox: &Arc<Mutex<Sandbox>>,
+    device: &ccw::Device,
+) -> Result<String> {
+    let matcher = VirtioBlkCCWMatcher::new(&create_ccw_root_bus_path(), device);
+    let uev = wait_for_uevent(sandbox, matcher).await?;
+    let devname = uev.devname;
+    return match Path::new(SYSTEM_DEV_PATH).join(&devname).to_str() {
+        Some(path) => Ok(String::from(path)),
+        None => Err(anyhow!("CCW device name {} is not valid UTF-8", &devname)),
+    };
+}
+
 #[derive(Debug)]
 struct PmemBlockMatcher {
    suffix: String,
@@ -177,6 +231,7 @@ impl UeventMatcher for PmemBlockMatcher {
    }
 }

+#[instrument]
 pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str) -> Result<()> {
    let devname = match devpath.strip_prefix("/dev/") {
        Some(dev) => dev,
@@ -201,6 +256,7 @@ pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str)
 }

 /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
+#[instrument]
 fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
    let tokens: Vec<&str> = scsi_addr.split(':').collect();
    if tokens.len() != 2 {
@@ -235,6 +291,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
 // the same device in the list of devices provided through the OCI spec.
 // This is needed to update information about minor/major numbers that cannot
 // be predicted from the caller.
+#[instrument]
 fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> {
    let major_id: c_uint;
    let minor_id: c_uint;
@@ -311,6 +368,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex)

 // device.Id should be the predicted device name (vda, vdb, ...)
 // device.VmPath already provides a way to send it in
+#[instrument]
 async fn virtiommio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -325,6 +383,7 @@ async fn virtiommio_blk_device_handler(
 }

 // device.Id should be a PCI path string
+#[instrument]
 async fn virtio_blk_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -339,7 +398,34 @@ async fn virtio_blk_device_handler(
    update_spec_device_list(&dev, spec, devidx)
 }

+// device.id should be a CCW path string
+#[cfg(target_arch = "s390x")]
+#[instrument]
+async fn virtio_blk_ccw_device_handler(
+    device: &Device,
+    spec: &mut Spec,
+    sandbox: &Arc<Mutex<Sandbox>>,
+    devidx: &DevIndex,
+) -> Result<()> {
+    let mut dev = device.clone();
+    let ccw_device = ccw::Device::from_str(&device.id)?;
+    dev.vm_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?;
+    update_spec_device_list(&dev, spec, devidx)
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[instrument]
+async fn virtio_blk_ccw_device_handler(
+    _: &Device,
+    _: &mut Spec,
+    _: &Arc<Mutex<Sandbox>>,
+    _: &DevIndex,
+) -> Result<()> {
+    Err(anyhow!("CCW is only supported on s390x"))
+}
+
 // device.Id should be the SCSI address of the disk in the format "scsiID:lunID"
+#[instrument]
 async fn virtio_scsi_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -351,6 +437,7 @@ async fn virtio_scsi_device_handler(
    update_spec_device_list(&dev, spec, devidx)
 }

+#[instrument]
 async fn virtio_nvdimm_device_handler(
    device: &Device,
    spec: &mut Spec,
@@ -389,6 +476,7 @@ impl DevIndex {
    }
 }

+#[instrument]
 pub async fn add_devices(
    devices: &[Device],
    spec: &mut Spec,
@@ -403,6 +491,7 @@ pub async fn add_devices(
    Ok(())
 }

+#[instrument]
 async fn add_device(
    device: &Device,
    spec: &mut Spec,
@@ -427,6 +516,7 @@ async fn add_device(

    match device.field_type.as_str() {
        DRIVER_BLK_TYPE => virtio_blk_device_handler(device, spec, sandbox, devidx).await,
+        DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, spec, sandbox, devidx).await,
        DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
        DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
        DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
@@ -437,6 +527,7 @@ async fn add_device(
 // update_device_cgroup update the device cgroup for container
 // to not allow access to the guest root partition. This prevents
 // the container from being able to access the VM rootfs.
+#[instrument]
 pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> {
    let meta = fs::metadata(VM_ROOTFS)?;
    let rdev = meta.dev();
@@ -888,6 +979,66 @@ mod tests {
        assert!(!matcher_a.is_match(&uev_b));
    }

+    #[cfg(target_arch = "s390x")]
+    #[tokio::test]
+    async fn test_virtio_blk_ccw_matcher() {
+        let root_bus = create_ccw_root_bus_path();
+        let subsystem = "block";
+        let devname = "vda";
+        let relpath = "0.0.0002";
+
+        let mut uev = crate::uevent::Uevent::default();
+        uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string();
+        uev.subsystem = subsystem.to_string();
+        uev.devname = devname.to_string();
+        uev.devpath = format!(
+            "{}/0.0.0001/{}/virtio1/{}/{}",
+            root_bus, relpath, subsystem, devname
+        );
+
+        // Valid path
+        let device = ccw::Device::from_str(relpath).unwrap();
+        let matcher = VirtioBlkCCWMatcher::new(&root_bus, &device);
+        assert!(matcher.is_match(&uev));
+
+        // Invalid paths
+        uev.devpath = format!(
+            "{}/0.0.0001/0.0.0003/virtio1/{}/{}",
+            root_bus, subsystem, devname
+        );
+        assert!(!matcher.is_match(&uev));
+
+        uev.devpath = format!("0.0.0001/{}/virtio1/{}/{}", relpath, subsystem, devname);
+        assert!(!matcher.is_match(&uev));
+
+        uev.devpath = format!(
+            "{}/0.0.0001/{}/virtio/{}/{}",
+            root_bus, relpath, subsystem, devname
+        );
+        assert!(!matcher.is_match(&uev));
+
+        uev.devpath = format!("{}/0.0.0001/{}/virtio1", root_bus, relpath);
+        assert!(!matcher.is_match(&uev));
+
+        uev.devpath = format!(
+            "{}/1.0.0001/{}/virtio1/{}/{}",
+            root_bus, relpath, subsystem, devname
+        );
+        assert!(!matcher.is_match(&uev));
+
+        uev.devpath = format!(
+            "{}/0.4.0001/{}/virtio1/{}/{}",
+            root_bus, relpath, subsystem, devname
+        );
+        assert!(!matcher.is_match(&uev));
+
+        uev.devpath = format!(
+            "{}/0.0.10000/{}/virtio1/{}/{}",
+            root_bus, relpath, subsystem, devname
+        );
+        assert!(!matcher.is_match(&uev));
+    }
+
    #[tokio::test]
    async fn test_scsi_block_matcher() {
        let root_bus = create_pci_root_bus_path();
--- a/src/agent/src/linux_abi.rs
+++ b/src/agent/src/linux_abi.rs
@@ -65,6 +65,10 @@ pub fn create_pci_root_bus_path() -> String {
    ret
 }

+#[cfg(target_arch = "s390x")]
+pub fn create_ccw_root_bus_path() -> String {
+    String::from("/devices/css0")
+}
 // From https://www.kernel.org/doc/Documentation/acpi/namespace.txt
 // The Linux kernel's core ACPI subsystem creates struct acpi_device
 // objects for ACPI namespace objects representing devices, power resources
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -5,8 +5,8 @@

 #[macro_use]
 extern crate lazy_static;
+extern crate capctl;
 extern crate oci;
-extern crate prctl;
 extern crate prometheus;
 extern crate protocols;
 extern crate regex;
@@ -32,7 +32,10 @@ use std::os::unix::io::AsRawFd;
 use std::path::Path;
 use std::process::exit;
 use std::sync::Arc;
+use tracing::{instrument, span};

+#[cfg(target_arch = "s390x")]
+mod ccw;
 mod config;
 mod console;
 mod device;
@@ -51,11 +54,12 @@ mod test_utils;
 mod uevent;
 mod util;
 mod version;
+mod watcher;

 use mount::{cgroups_mount, general_mount};
 use sandbox::Sandbox;
 use signal::setup_signal_handler;
-use slog::Logger;
+use slog::{error, info, o, warn, Logger};
 use uevent::watch_uevents;

 use futures::future::join_all;
@@ -70,6 +74,7 @@ use tokio::{
 };

 mod rpc;
+mod tracer;

 const NAME: &str = "kata-agent";
 const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
@@ -79,6 +84,7 @@ lazy_static! {
        Arc::new(RwLock::new(config::AgentConfig::new()));
 }

+#[instrument]
 fn announce(logger: &Logger, config: &AgentConfig) {
    info!(logger, "announce";
    "agent-commit" => version::VERSION_COMMIT,
@@ -199,6 +205,17 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
        ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
    }

+    if config.tracing != tracer::TraceType::Disabled {
+        let _ = tracer::setup_tracing(NAME, &logger, &config)?;
+    }
+
+    let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
+
+    // XXX: Start the root trace transaction.
+    //
+    // XXX: Note that *ALL* spans needs to start after this point!!
+    let _enter = root.enter();
+
    // Start the sandbox and wait for its ttRPC server to end
    start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;

@@ -227,6 +244,10 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
        }
    }

+    if config.tracing != tracer::TraceType::Disabled {
+        tracer::end_tracing();
+    }
+
    eprintln!("{} shutdown complete", NAME);

    Ok(())
@@ -248,6 +269,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    }

    if args.len() == 2 && args[1] == "init" {
+        reset_sigpipe();
        rustjail::container::init_child();
        exit(0);
    }
@@ -259,6 +281,7 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
    rt.block_on(real_main())
 }

+#[instrument]
 async fn start_sandbox(
    logger: &Logger,
    config: &AgentConfig,
@@ -305,7 +328,7 @@ async fn start_sandbox(
    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str());
    server.start().await?;

-    let _ = rx.await?;
+    rx.await?;
    server.shutdown().await?;

    Ok(())
@@ -345,6 +368,7 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result
    Ok(())
 }

+#[instrument]
 fn sethostname(hostname: &OsStr) -> Result<()> {
    let size = hostname.len() as usize;

@@ -358,5 +382,16 @@ fn sethostname(hostname: &OsStr) -> Result<()> {
    }
 }

+// The Rust standard library had suppressed the default SIGPIPE behavior,
+// see https://github.com/rust-lang/rust/pull/13158.
+// Since the parent's signal handler would be inherited by it's child process,
+// thus we should re-enable the standard SIGPIPE behavior as a workaround to
+// fix the issue of https://github.com/kata-containers/kata-containers/issues/1887.
+fn reset_sigpipe() {
+    unsafe {
+        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
+    }
+}
+
 use crate::config::AgentConfig;
 use std::os::unix::io::{FromRawFd, RawFd};
--- a/src/agent/src/metrics.rs
+++ b/src/agent/src/metrics.rs
@@ -8,6 +8,7 @@ extern crate procfs;
 use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder};

 use anyhow::Result;
+use tracing::instrument;

 const NAMESPACE_KATA_AGENT: &str = "kata_agent";
 const NAMESPACE_KATA_GUEST: &str = "kata_guest";
@@ -68,6 +69,7 @@ lazy_static! {
    prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo").as_ref() , "Statistics about memory usage in the system.", &["item"]).unwrap();
 }

+#[instrument]
 pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
    AGENT_SCRAPE_COUNT.inc();

@@ -87,6 +89,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
    Ok(String::from_utf8(buffer).unwrap())
 }

+#[instrument]
 fn update_agent_metrics() {
    let me = procfs::process::Process::myself();

@@ -136,6 +139,7 @@ fn update_agent_metrics() {
    }
 }

+#[instrument]
 fn update_guest_metrics() {
    // try get load and task info
    match procfs::LoadAverage::new() {
@@ -218,6 +222,7 @@ fn update_guest_metrics() {
    }
 }

+#[instrument]
 fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
    gv.with_label_values(&["mem_total"])
        .set(meminfo.mem_total as f64);
@@ -332,6 +337,7 @@ fn set_gauge_vec_meminfo(gv: &prometheus::GaugeVec, meminfo: &procfs::Meminfo) {
        .set(meminfo.k_reclaimable.unwrap_or(0) as f64);
 }

+#[instrument]
 fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procfs::CpuTime) {
    gv.with_label_values(&[cpu, "user"])
        .set(cpu_time.user as f64);
@@ -355,6 +361,7 @@ fn set_gauge_vec_cpu_time(gv: &prometheus::GaugeVec, cpu: &str, cpu_time: &procf
        .set(cpu_time.guest_nice.unwrap_or(0.0) as f64);
 }

+#[instrument]
 fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat) {
    gv.with_label_values(&[diskstat.name.as_str(), "reads"])
        .set(diskstat.reads as f64);
@@ -393,6 +400,7 @@ fn set_gauge_vec_diskstat(gv: &prometheus::GaugeVec, diskstat: &procfs::DiskStat
 }

 // set_gauge_vec_netdev set gauge for NetDevLine
+#[instrument]
 fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceStatus) {
    gv.with_label_values(&[status.name.as_str(), "recv_bytes"])
        .set(status.recv_bytes as f64);
@@ -429,6 +437,7 @@ fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceS
 }

 // set_gauge_vec_proc_status set gauge for ProcStatus
+#[instrument]
 fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process::Status) {
    gv.with_label_values(&["vmpeak"])
        .set(status.vmpeak.unwrap_or(0) as f64);
@@ -469,6 +478,7 @@ fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process
 }

 // set_gauge_vec_proc_io set gauge for ProcIO
+#[instrument]
 fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::Io) {
    gv.with_label_values(&["rchar"]).set(io_stat.rchar as f64);
    gv.with_label_values(&["wchar"]).set(io_stat.wchar as f64);
@@ -483,6 +493,7 @@ fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::I
 }

 // set_gauge_vec_proc_stat set gauge for ProcStat
+#[instrument]
 fn set_gauge_vec_proc_stat(gv: &prometheus::GaugeVec, stat: &procfs::process::Stat) {
    gv.with_label_values(&["utime"]).set(stat.utime as f64);
    gv.with_label_values(&["stime"]).set(stat.stime as f64);
--- a/src/agent/src/mount.rs
+++ b/src/agent/src/mount.rs
@@ -6,13 +6,16 @@
 use std::collections::HashMap;
 use std::ffi::CString;
 use std::fs;
+use std::fs::File;
 use std::io;
+use std::io::{BufRead, BufReader};
+use std::iter;
 use std::os::unix::fs::{MetadataExt, PermissionsExt};
-
 use std::path::Path;
 use std::ptr::null;
 use std::str::FromStr;
 use std::sync::Arc;
+
 use tokio::sync::Mutex;

 use libc::{c_void, mount};
@@ -20,8 +23,6 @@ use nix::mount::{self, MsFlags};
 use nix::unistd::Gid;

 use regex::Regex;
-use std::fs::File;
-use std::io::{BufRead, BufReader};

 use crate::device::{
    get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
@@ -30,17 +31,22 @@ use crate::linux_abi::*;
 use crate::pci;
 use crate::protocols::agent::Storage;
 use crate::Sandbox;
+#[cfg(target_arch = "s390x")]
+use crate::{ccw, device::get_virtio_blk_ccw_device_name};
 use anyhow::{anyhow, Context, Result};
 use slog::Logger;
+use tracing::instrument;

 pub const DRIVER_9P_TYPE: &str = "9p";
 pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
 pub const DRIVER_BLK_TYPE: &str = "blk";
+pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
 pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
 pub const DRIVER_SCSI_TYPE: &str = "scsi";
 pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
 pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
 pub const DRIVER_LOCAL_TYPE: &str = "local";
+pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";

 pub const TYPE_ROOTFS: &str = "rootfs";

@@ -131,7 +137,7 @@ lazy_static! {
    ];
 }

-pub const STORAGE_HANDLER_LIST: [&str; 8] = [
+pub const STORAGE_HANDLER_LIST: &[&str] = &[
    DRIVER_BLK_TYPE,
    DRIVER_9P_TYPE,
    DRIVER_VIRTIOFS_TYPE,
@@ -140,6 +146,7 @@ pub const STORAGE_HANDLER_LIST: [&str; 8] = [
    DRIVER_LOCAL_TYPE,
    DRIVER_SCSI_TYPE,
    DRIVER_NVDIMM_TYPE,
+    DRIVER_WATCHABLE_BIND_TYPE,
 ];

 #[derive(Debug, Clone)]
@@ -156,6 +163,7 @@ pub struct BareMount<'a> {
 // * evaluate all symlinks
 // * ensure the source exists
 impl<'a> BareMount<'a> {
+    #[instrument]
    pub fn new(
        s: &'a str,
        d: &'a str,
@@ -174,6 +182,7 @@ impl<'a> BareMount<'a> {
        }
    }

+    #[instrument]
    pub fn mount(&self) -> Result<()> {
        let source;
        let dest;
@@ -232,6 +241,7 @@ impl<'a> BareMount<'a> {
    }
 }

+#[instrument]
 async fn ephemeral_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -278,6 +288,7 @@ async fn ephemeral_storage_handler(
    Ok("".to_string())
 }

+#[instrument]
 async fn local_storage_handler(
    _logger: &Logger,
    storage: &Storage,
@@ -324,6 +335,7 @@ async fn local_storage_handler(
    Ok("".to_string())
 }

+#[instrument]
 async fn virtio9p_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -333,6 +345,7 @@ async fn virtio9p_storage_handler(
 }

 // virtiommio_blk_storage_handler handles the storage for mmio blk driver.
+#[instrument]
 async fn virtiommio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -343,6 +356,7 @@ async fn virtiommio_blk_storage_handler(
 }

 // virtiofs_storage_handler handles the storage for virtio-fs.
+#[instrument]
 async fn virtiofs_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -352,6 +366,7 @@ async fn virtiofs_storage_handler(
 }

 // virtio_blk_storage_handler handles the storage for blk driver.
+#[instrument]
 async fn virtio_blk_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -377,7 +392,33 @@ async fn virtio_blk_storage_handler(
    common_storage_handler(logger, &storage)
 }

-// virtio_scsi_storage_handler handles the storage for scsi driver.
+// virtio_blk_ccw_storage_handler handles storage for the blk-ccw driver (s390x)
+#[cfg(target_arch = "s390x")]
+#[instrument]
+async fn virtio_blk_ccw_storage_handler(
+    logger: &Logger,
+    storage: &Storage,
+    sandbox: Arc<Mutex<Sandbox>>,
+) -> Result<String> {
+    let mut storage = storage.clone();
+    let ccw_device = ccw::Device::from_str(&storage.source)?;
+    let dev_path = get_virtio_blk_ccw_device_name(&sandbox, &ccw_device).await?;
+    storage.source = dev_path;
+    common_storage_handler(logger, &storage)
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[instrument]
+async fn virtio_blk_ccw_storage_handler(
+    _: &Logger,
+    _: &Storage,
+    _: Arc<Mutex<Sandbox>>,
+) -> Result<String> {
+    Err(anyhow!("CCW is only supported on s390x"))
+}
+
+// virtio_scsi_storage_handler handles the  storage for scsi driver.
+#[instrument]
 async fn virtio_scsi_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -392,6 +433,7 @@ async fn virtio_scsi_storage_handler(
    common_storage_handler(logger, &storage)
 }

+#[instrument]
 fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String> {
    // Mount the storage device.
    let mount_point = storage.mount_point.to_string();
@@ -400,6 +442,7 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
 }

 // nvdimm_storage_handler handles the storage for NVDIMM driver.
+#[instrument]
 async fn nvdimm_storage_handler(
    logger: &Logger,
    storage: &Storage,
@@ -413,7 +456,22 @@ async fn nvdimm_storage_handler(
    common_storage_handler(logger, &storage)
 }

+async fn bind_watcher_storage_handler(
+    logger: &Logger,
+    storage: &Storage,
+    sandbox: Arc<Mutex<Sandbox>>,
+) -> Result<()> {
+    let mut locked = sandbox.lock().await;
+    let container_id = locked.id.clone();
+
+    locked
+        .bind_watcher
+        .add_container(container_id, iter::once(storage.clone()), logger)
+        .await
+}
+
 // mount_storage performs the mount described by the storage structure.
+#[instrument]
 fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -464,7 +522,8 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
 }

 /// Looks for `mount_point` entry in the /proc/mounts.
-fn is_mounted(mount_point: &str) -> Result<bool> {
+#[instrument]
+pub fn is_mounted(mount_point: &str) -> Result<bool> {
    let mount_point = mount_point.trim_end_matches('/');
    let found = fs::metadata(mount_point).is_ok()
        // Looks through /proc/mounts and check if the mount exists
@@ -481,6 +540,7 @@ fn is_mounted(mount_point: &str) -> Result<bool> {
    Ok(found)
 }

+#[instrument]
 fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
    let mut flags = MsFlags::empty();
    let mut options: String = "".to_string();
@@ -509,6 +569,7 @@ fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) {
 // associated operations such as waiting for the device to show up, and mount
 // it to a specific location, according to the type of handler chosen, and for
 // each storage.
+#[instrument]
 pub async fn add_storages(
    logger: Logger,
    storages: Vec<Storage>,
@@ -524,6 +585,9 @@ pub async fn add_storages(

        let res = match handler_name.as_str() {
            DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_BLK_CCW_TYPE => {
+                virtio_blk_ccw_storage_handler(&logger, &storage, sandbox.clone()).await
+            }
            DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, &storage, sandbox.clone()).await,
            DRIVER_VIRTIOFS_TYPE => {
                virtiofs_storage_handler(&logger, &storage, sandbox.clone()).await
@@ -539,6 +603,11 @@ pub async fn add_storages(
                virtio_scsi_storage_handler(&logger, &storage, sandbox.clone()).await
            }
            DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await,
+            DRIVER_WATCHABLE_BIND_TYPE => {
+                bind_watcher_storage_handler(&logger, &storage, sandbox.clone()).await?;
+                // Don't register watch mounts, they're hanlded separately by the watcher.
+                Ok(String::new())
+            }
            _ => {
                return Err(anyhow!(
                    "Failed to find the storage handler {}",
@@ -558,6 +627,7 @@ pub async fn add_storages(
    Ok(mount_list)
 }

+#[instrument]
 fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    let options_vec: Vec<&str> = m.options.clone();

@@ -583,6 +653,7 @@ fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> {
    Ok(())
 }

+#[instrument]
 pub fn general_mount(logger: &Logger) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -600,6 +671,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result<String> {

 // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and
 // any error ecountered.
+#[instrument]
 pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result<String> {
    if mount_point.is_empty() {
        return Err(anyhow!("Invalid mount point {}", mount_point));
@@ -630,6 +702,7 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
    ))
 }

+#[instrument]
 pub fn get_cgroup_mounts(
    logger: &Logger,
    cg_path: &str,
@@ -720,6 +793,7 @@ pub fn get_cgroup_mounts(
    Ok(cg_mounts)
 }

+#[instrument]
 pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "mount"));

@@ -735,6 +809,7 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result<
    Ok(())
 }

+#[instrument]
 pub fn remove_mounts(mounts: &[String]) -> Result<()> {
    for m in mounts.iter() {
        mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?;
@@ -744,6 +819,7 @@ pub fn remove_mounts(mounts: &[String]) -> Result<()> {

 // ensure_destination_exists will recursively create a given mountpoint. If directories
 // are created, their permissions are initialized to mountPerm(0755)
+#[instrument]
 fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
    let d = Path::new(destination);
    if !d.exists() {
@@ -764,6 +840,7 @@ fn ensure_destination_exists(destination: &str, fs_type: &str) -> Result<()> {
    Ok(())
 }

+#[instrument]
 fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
    let mut options = HashMap::new();
    for opt in option_list.iter() {
--- a/src/agent/src/namespace.rs
+++ b/src/agent/src/namespace.rs
@@ -11,6 +11,7 @@ use std::fmt;
 use std::fs;
 use std::fs::File;
 use std::path::{Path, PathBuf};
+use tracing::instrument;

 use crate::mount::{BareMount, FLAGS};
 use slog::Logger;
@@ -20,6 +21,7 @@ pub const NSTYPEIPC: &str = "ipc";
 pub const NSTYPEUTS: &str = "uts";
 pub const NSTYPEPID: &str = "pid";

+#[instrument]
 pub fn get_current_thread_ns_path(ns_type: &str) -> String {
    format!(
        "/proc/{}/task/{}/ns/{}",
@@ -40,6 +42,7 @@ pub struct Namespace {
 }

 impl Namespace {
+    #[instrument]
    pub fn new(logger: &Logger) -> Self {
        Namespace {
            logger: logger.clone(),
@@ -50,11 +53,13 @@ impl Namespace {
        }
    }

+    #[instrument]
    pub fn get_ipc(mut self) -> Self {
        self.ns_type = NamespaceType::Ipc;
        self
    }

+    #[instrument]
    pub fn get_uts(mut self, hostname: &str) -> Self {
        self.ns_type = NamespaceType::Uts;
        if !hostname.is_empty() {
@@ -63,6 +68,7 @@ impl Namespace {
        self
    }

+    #[instrument]
    pub fn get_pid(mut self) -> Self {
        self.ns_type = NamespaceType::Pid;
        self
@@ -76,6 +82,7 @@ impl Namespace {

    // setup creates persistent namespace without switching to it.
    // Note, pid namespaces cannot be persisted.
+    #[instrument]
    pub async fn setup(mut self) -> Result<Self> {
        fs::create_dir_all(&self.persistent_ns_dir)?;

--- a/src/agent/src/random.rs
+++ b/src/agent/src/random.rs
@@ -9,6 +9,7 @@ use nix::fcntl::{self, OFlag};
 use nix::sys::stat::Mode;
 use std::fs;
 use std::os::unix::io::{AsRawFd, FromRawFd};
+use tracing::instrument;

 pub const RNGDEV: &str = "/dev/random";
 pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
@@ -20,6 +21,7 @@ type IoctlRequestType = libc::c_int;
 #[cfg(target_env = "gnu")]
 type IoctlRequestType = libc::c_ulong;

+#[instrument]
 pub fn reseed_rng(data: &[u8]) -> Result<()> {
    let len = data.len() as libc::c_long;
    fs::write(RNGDEV, data)?;
@@ -37,10 +39,10 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
            &len as *const libc::c_long,
        )
    };
-    let _ = Errno::result(ret).map(drop)?;
+    Errno::result(ret).map(drop)?;

    let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
-    let _ = Errno::result(ret).map(drop)?;
+    Errno::result(ret).map(drop)?;

    Ok(())
 }
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
@@ -51,6 +51,14 @@ use crate::sandbox::Sandbox;
 use crate::version::{AGENT_VERSION, API_VERSION};
 use crate::AGENT_CONFIG;

+use crate::trace_rpc_call;
+use crate::tracer::extract_carrier_from_ttrpc;
+use opentelemetry::global;
+use tracing::span;
+use tracing_opentelemetry::OpenTelemetrySpanExt;
+
+use tracing::instrument;
+
 use libc::{self, c_ushort, pid_t, winsize, TIOCSWINSZ};
 use std::convert::TryFrom;
 use std::fs;
@@ -74,7 +82,7 @@ macro_rules! sl {
    };
 }

-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct AgentService {
    sandbox: Arc<Mutex<Sandbox>>,
 }
@@ -97,13 +105,14 @@ fn verify_cid(id: &str) -> Result<()> {
 }

 impl AgentService {
+    #[instrument]
    async fn do_create_container(
        &self,
        req: protocols::agent::CreateContainerRequest,
    ) -> Result<()> {
        let cid = req.container_id.clone();

-        let _ = verify_cid(&cid)?;
+        verify_cid(&cid)?;

        let mut oci_spec = req.OCI.clone();
        let use_sandbox_pidns = req.get_sandbox_pidns();
@@ -196,6 +205,7 @@ impl AgentService {
        Ok(())
    }

+    #[instrument]
    async fn do_start_container(&self, req: protocols::agent::StartContainerRequest) -> Result<()> {
        let cid = req.container_id;

@@ -221,6 +231,7 @@ impl AgentService {
        Ok(())
    }

+    #[instrument]
    async fn do_remove_container(
        &self,
        req: protocols::agent::RemoveContainerRequest,
@@ -253,11 +264,14 @@ impl AgentService {
        if req.timeout == 0 {
            let s = Arc::clone(&self.sandbox);
            let mut sandbox = s.lock().await;
-            let ctr = sandbox
-                .get_container(&cid)
-                .ok_or_else(|| anyhow!("Invalid container id"))?;

-            ctr.destroy().await?;
+            sandbox.bind_watcher.remove_container(&cid).await;
+
+            sandbox
+                .get_container(&cid)
+                .ok_or_else(|| anyhow!("Invalid container id"))?
+                .destroy()
+                .await?;

            remove_container_resources(&mut sandbox)?;

@@ -273,6 +287,7 @@ impl AgentService {
            let mut sandbox = s.lock().await;
            if let Some(ctr) = sandbox.get_container(&cid2) {
                ctr.destroy().await.unwrap();
+                sandbox.bind_watcher.remove_container(&cid2).await;
                tx.send(1).unwrap();
            };
        });
@@ -298,6 +313,7 @@ impl AgentService {
        Ok(())
    }

+    #[instrument]
    async fn do_exec_process(&self, req: protocols::agent::ExecProcessRequest) -> Result<()> {
        let cid = req.container_id.clone();
        let exec_id = req.exec_id.clone();
@@ -326,6 +342,7 @@ impl AgentService {
        Ok(())
    }

+    #[instrument]
    async fn do_signal_process(&self, req: protocols::agent::SignalProcessRequest) -> Result<()> {
        let cid = req.container_id.clone();
        let eid = req.exec_id.clone();
@@ -360,6 +377,7 @@ impl AgentService {
        Ok(())
    }

+    #[instrument]
    async fn do_wait_process(
        &self,
        req: protocols::agent::WaitProcessRequest,
@@ -509,9 +527,10 @@ impl AgentService {
 impl protocols::agent_ttrpc::AgentService for AgentService {
    async fn create_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::CreateContainerRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "create_container", req);
        match self.do_create_container(req).await {
            Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
            Ok(_) => Ok(Empty::new()),
@@ -520,9 +539,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn start_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::StartContainerRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "start_container", req);
        match self.do_start_container(req).await {
            Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
            Ok(_) => Ok(Empty::new()),
@@ -531,9 +551,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn remove_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::RemoveContainerRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "remove_container", req);
        match self.do_remove_container(req).await {
            Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
            Ok(_) => Ok(Empty::new()),
@@ -542,9 +563,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn exec_process(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::ExecProcessRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "exec_process", req);
        match self.do_exec_process(req).await {
            Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
            Ok(_) => Ok(Empty::new()),
@@ -553,9 +575,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn signal_process(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::SignalProcessRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "signal_process", req);
        match self.do_signal_process(req).await {
            Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
            Ok(_) => Ok(Empty::new()),
@@ -564,9 +587,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn wait_process(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::WaitProcessRequest,
    ) -> ttrpc::Result<WaitProcessResponse> {
+        trace_rpc_call!(ctx, "wait_process", req);
        self.do_wait_process(req)
            .await
            .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -574,9 +598,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn update_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::UpdateContainerRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "update_container", req);
        let cid = req.container_id.clone();
        let res = req.resources;

@@ -608,9 +633,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn stats_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::StatsContainerRequest,
    ) -> ttrpc::Result<StatsContainerResponse> {
+        trace_rpc_call!(ctx, "stats_container", req);
        let cid = req.container_id;
        let s = Arc::clone(&self.sandbox);
        let mut sandbox = s.lock().await;
@@ -628,9 +654,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn pause_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::PauseContainerRequest,
    ) -> ttrpc::Result<protocols::empty::Empty> {
+        trace_rpc_call!(ctx, "pause_container", req);
        let cid = req.get_container_id();
        let s = Arc::clone(&self.sandbox);
        let mut sandbox = s.lock().await;
@@ -650,9 +677,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn resume_container(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::ResumeContainerRequest,
    ) -> ttrpc::Result<protocols::empty::Empty> {
+        trace_rpc_call!(ctx, "resume_container", req);
        let cid = req.get_container_id();
        let s = Arc::clone(&self.sandbox);
        let mut sandbox = s.lock().await;
@@ -702,9 +730,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn close_stdin(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::CloseStdinRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "close_stdin", req);
+
        let cid = req.container_id.clone();
        let eid = req.exec_id;
        let s = Arc::clone(&self.sandbox);
@@ -736,9 +766,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn tty_win_resize(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::TtyWinResizeRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "tty_win_resize", req);
+
        let cid = req.container_id.clone();
        let eid = req.exec_id.clone();
        let s = Arc::clone(&self.sandbox);
@@ -774,9 +806,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn update_interface(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::UpdateInterfaceRequest,
    ) -> ttrpc::Result<Interface> {
+        trace_rpc_call!(ctx, "update_interface", req);
+
        let interface = req.interface.into_option().ok_or_else(|| {
            ttrpc_error(
                ttrpc::Code::INVALID_ARGUMENT,
@@ -799,9 +833,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn update_routes(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::UpdateRoutesRequest,
    ) -> ttrpc::Result<Routes> {
+        trace_rpc_call!(ctx, "update_routes", req);
+
        let new_routes = req
            .routes
            .into_option()
@@ -837,9 +873,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn list_interfaces(
        &self,
-        _ctx: &TtrpcContext,
-        _req: protocols::agent::ListInterfacesRequest,
+        ctx: &TtrpcContext,
+        req: protocols::agent::ListInterfacesRequest,
    ) -> ttrpc::Result<Interfaces> {
+        trace_rpc_call!(ctx, "list_interfaces", req);
+
        let list = self
            .sandbox
            .lock()
@@ -862,9 +900,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn list_routes(
        &self,
-        _ctx: &TtrpcContext,
-        _req: protocols::agent::ListRoutesRequest,
+        ctx: &TtrpcContext,
+        req: protocols::agent::ListRoutesRequest,
    ) -> ttrpc::Result<Routes> {
+        trace_rpc_call!(ctx, "list_routes", req);
+
        let list = self
            .sandbox
            .lock()
@@ -899,9 +939,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn create_sandbox(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::CreateSandboxRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "create_sandbox", req);
+
        {
            let sandbox = self.sandbox.clone();
            let mut s = sandbox.lock().await;
@@ -926,7 +968,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
            }

            for m in req.kernel_modules.iter() {
-                let _ = load_kernel_module(m)
+                load_kernel_module(m)
                    .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
            }

@@ -962,9 +1004,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn destroy_sandbox(
        &self,
-        _ctx: &TtrpcContext,
-        _req: protocols::agent::DestroySandboxRequest,
+        ctx: &TtrpcContext,
+        req: protocols::agent::DestroySandboxRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "destroy_sandbox", req);
+
        let s = Arc::clone(&self.sandbox);
        let mut sandbox = s.lock().await;
        // destroy all containers, clean up, notify agent to exit
@@ -981,9 +1025,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn add_arp_neighbors(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::AddARPNeighborsRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "add_arp_neighbors", req);
+
        let neighs = req
            .neighbors
            .into_option()
@@ -1013,11 +1059,12 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn online_cpu_mem(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::OnlineCPUMemRequest,
    ) -> ttrpc::Result<Empty> {
        let s = Arc::clone(&self.sandbox);
        let sandbox = s.lock().await;
+        trace_rpc_call!(ctx, "online_cpu_mem", req);

        sandbox
            .online_cpu_memory(&req)
@@ -1028,9 +1075,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn reseed_random_dev(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::ReseedRandomDevRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "reseed_random_dev", req);
+
        random::reseed_rng(req.data.as_slice())
            .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;

@@ -1039,9 +1088,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn get_guest_details(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::GuestDetailsRequest,
    ) -> ttrpc::Result<GuestDetailsResponse> {
+        trace_rpc_call!(ctx, "get_guest_details", req);
+
        info!(sl!(), "get guest details!");
        let mut resp = GuestDetailsResponse::new();
        // to get memory block size
@@ -1065,9 +1116,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn mem_hotplug_by_probe(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::MemHotplugByProbeRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "mem_hotplug_by_probe", req);
+
        do_mem_hotplug_by_probe(&req.memHotplugProbeAddr)
            .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;

@@ -1076,9 +1129,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn set_guest_date_time(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::SetGuestDateTimeRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "set_guest_date_time", req);
+
        do_set_guest_date_time(req.Sec, req.Usec)
            .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;

@@ -1087,9 +1142,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn copy_file(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::CopyFileRequest,
    ) -> ttrpc::Result<Empty> {
+        trace_rpc_call!(ctx, "copy_file", req);
+
        do_copy_file(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;

        Ok(Empty::new())
@@ -1097,9 +1154,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {

    async fn get_metrics(
        &self,
-        _ctx: &TtrpcContext,
+        ctx: &TtrpcContext,
        req: protocols::agent::GetMetricsRequest,
    ) -> ttrpc::Result<Metrics> {
+        trace_rpc_call!(ctx, "get_metrics", req);
+
        match get_metrics(&req) {
            Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
            Ok(s) => {
@@ -1529,22 +1588,22 @@ fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
 fn cleanup_process(p: &mut Process) -> Result<()> {
    if p.parent_stdin.is_some() {
        p.close_stream(StreamType::ParentStdin);
-        let _ = unistd::close(p.parent_stdin.unwrap())?;
+        unistd::close(p.parent_stdin.unwrap())?;
    }

    if p.parent_stdout.is_some() {
        p.close_stream(StreamType::ParentStdout);
-        let _ = unistd::close(p.parent_stdout.unwrap())?;
+        unistd::close(p.parent_stdout.unwrap())?;
    }

    if p.parent_stderr.is_some() {
        p.close_stream(StreamType::ParentStderr);
-        let _ = unistd::close(p.parent_stderr.unwrap())?;
+        unistd::close(p.parent_stderr.unwrap())?;
    }

    if p.term_master.is_some() {
        p.close_stream(StreamType::TermMaster);
-        let _ = unistd::close(p.term_master.unwrap())?;
+        unistd::close(p.term_master.unwrap())?;
    }

    p.notify_term_close();
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -9,6 +9,7 @@ use crate::namespace::Namespace;
 use crate::netlink::Handle;
 use crate::network::Network;
 use crate::uevent::{Uevent, UeventMatcher};
+use crate::watcher::BindWatcher;
 use anyhow::{anyhow, Context, Result};
 use libc::pid_t;
 use oci::{Hook, Hooks};
@@ -28,6 +29,7 @@ use std::{thread, time};
 use tokio::sync::mpsc::{channel, Receiver, Sender};
 use tokio::sync::oneshot;
 use tokio::sync::Mutex;
+use tracing::instrument;

 type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);

@@ -53,9 +55,11 @@ pub struct Sandbox {
    pub hooks: Option<Hooks>,
    pub event_rx: Arc<Mutex<Receiver<String>>>,
    pub event_tx: Option<Sender<String>>,
+    pub bind_watcher: BindWatcher,
 }

 impl Sandbox {
+    #[instrument]
    pub fn new(logger: &Logger) -> Result<Self> {
        let fs_type = get_mount_fs_type("/")?;
        let logger = logger.new(o!("subsystem" => "sandbox"));
@@ -83,6 +87,7 @@ impl Sandbox {
            hooks: None,
            event_rx,
            event_tx: Some(tx),
+            bind_watcher: BindWatcher::new(),
        })
    }

@@ -94,6 +99,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn set_sandbox_storage(&mut self, path: &str) -> bool {
        match self.storages.get_mut(path) {
            None => {
@@ -116,6 +122,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn unset_sandbox_storage(&mut self, path: &str) -> Result<bool> {
        match self.storages.get_mut(path) {
            None => Err(anyhow!("Sandbox storage with path {} not found", path)),
@@ -135,6 +142,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> {
        let mounts = vec![path.to_string()];
        remove_mounts(&mounts)?;
@@ -148,6 +156,7 @@ impl Sandbox {
    //
    // It's assumed that caller is calling this method after
    // acquiring a lock on sandbox.
+    #[instrument]
    pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> {
        if self.unset_sandbox_storage(path)? {
            return self.remove_sandbox_storage(path);
@@ -156,6 +165,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub async fn setup_shared_namespaces(&mut self) -> Result<bool> {
        // Set up shared IPC namespace
        self.shared_ipcns = Namespace::new(&self.logger)
@@ -178,6 +188,7 @@ impl Sandbox {
        self.containers.insert(c.id.clone(), c);
    }

+    #[instrument]
    pub fn update_shared_pidns(&mut self, c: &LinuxContainer) -> Result<()> {
        // Populate the shared pid path only if this is an infra container and
        // sandbox_pidns has not been passed in the create_sandbox request.
@@ -215,6 +226,7 @@ impl Sandbox {
        None
    }

+    #[instrument]
    pub async fn destroy(&mut self) -> Result<()> {
        for ctr in self.containers.values_mut() {
            ctr.destroy().await?;
@@ -222,6 +234,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> {
        if req.nb_cpus > 0 {
            // online cpus
@@ -265,6 +278,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    pub fn add_hooks(&mut self, dir: &str) -> Result<()> {
        let mut hooks = Hooks::default();
        if let Ok(hook) = self.find_hooks(dir, "prestart") {
@@ -280,6 +294,7 @@ impl Sandbox {
        Ok(())
    }

+    #[instrument]
    fn find_hooks(&self, hook_path: &str, hook_type: &str) -> Result<Vec<Hook>> {
        let mut hooks = Vec::new();
        for entry in fs::read_dir(Path::new(hook_path).join(hook_type))? {
@@ -316,6 +331,7 @@ impl Sandbox {
        Ok(hooks)
    }

+    #[instrument]
    pub async fn run_oom_event_monitor(&self, mut rx: Receiver<String>, container_id: String) {
        let logger = self.logger.clone();

@@ -348,6 +364,7 @@ impl Sandbox {
    }
 }

+#[instrument]
 fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Result<i32> {
    let mut count = 0;
    let re = Regex::new(pattern)?;
@@ -393,6 +410,7 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res
 const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50;
 const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100;

+#[instrument]
 fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
    let mut onlined_count: i32 = 0;

@@ -422,6 +440,7 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
    ))
 }

+#[instrument]
 fn online_memory(logger: &Logger) -> Result<()> {
    online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?;
    Ok(())
@@ -537,7 +556,6 @@ mod tests {
    }

    #[tokio::test]
-    #[allow(unused_assignments)]
    async fn unset_and_remove_sandbox_storage() {
        skip_if_not_root!();

@@ -571,7 +589,7 @@ mod tests {
        assert_eq!(s.set_sandbox_storage(&destdir_path), true);
        assert!(s.unset_and_remove_sandbox_storage(&destdir_path).is_ok());

-        let mut other_dir_str = String::new();
+        let other_dir_str;
        {
            // Create another folder in a separate scope to ensure that is
            // deleted
--- a/src/agent/src/signal.rs
+++ b/src/agent/src/signal.rs
@@ -6,10 +6,10 @@

 use crate::sandbox::Sandbox;
 use anyhow::{anyhow, Result};
+use capctl::prctl::set_subreaper;
 use nix::sys::wait::WaitPidFlag;
 use nix::sys::wait::{self, WaitStatus};
 use nix::unistd;
-use prctl::set_child_subreaper;
 use slog::{error, info, o, Logger};
 use std::sync::Arc;
 use tokio::select;
@@ -22,6 +22,9 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc<Mutex<Sandbox>>) -> Result
    info!(logger, "handling signal"; "signal" => "SIGCHLD");

    loop {
+        // Avoid reaping the undesirable child's signal, e.g., execute_hook's
+        // The lock should be released immediately.
+        rustjail::container::WAIT_PID_LOCKER.lock().await;
        let result = wait::waitpid(
            Some(Pid::from_raw(-1)),
            Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL),
@@ -85,7 +88,7 @@ pub async fn setup_signal_handler(
 ) -> Result<()> {
    let logger = logger.new(o!("subsystem" => "signals"));

-    set_child_subreaper(true)
+    set_subreaper(true)
        .map_err(|err| anyhow!(err).context("failed to setup agent as a child subreaper"))?;

    let mut sigchild_stream = signal(SignalKind::child())?;
--- a/src/agent/src/test_utils.rs
+++ b/src/agent/src/test_utils.rs
@@ -7,7 +7,6 @@
 #[cfg(test)]
 mod test_utils {
    #[macro_export]
-    #[allow(unused_macros)]
    macro_rules! skip_if_root {
        () => {
            if nix::unistd::Uid::effective().is_root() {
@@ -18,7 +17,6 @@ mod test_utils {
    }

    #[macro_export]
-    #[allow(unused_macros)]
    macro_rules! skip_if_not_root {
        () => {
            if !nix::unistd::Uid::effective().is_root() {
@@ -29,7 +27,6 @@ mod test_utils {
    }

    #[macro_export]
-    #[allow(unused_macros)]
    macro_rules! skip_loop_if_root {
        ($msg:expr) => {
            if nix::unistd::Uid::effective().is_root() {
@@ -44,7 +41,6 @@ mod test_utils {
    }

    #[macro_export]
-    #[allow(unused_macros)]
    macro_rules! skip_loop_if_not_root {
        ($msg:expr) => {
            if !nix::unistd::Uid::effective().is_root() {
--- a/src/agent/src/tracer.rs
+++ b/src/agent/src/tracer.rs
@@ -0,0 +1,122 @@
+// Copyright (c) 2020-2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use crate::config::AgentConfig;
+use anyhow::Result;
+use opentelemetry::sdk::propagation::TraceContextPropagator;
+use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
+use slog::{info, o, Logger};
+use std::collections::HashMap;
+use std::error::Error;
+use std::fmt;
+use std::str::FromStr;
+use tracing_opentelemetry::OpenTelemetryLayer;
+use tracing_subscriber::layer::SubscriberExt;
+use tracing_subscriber::Registry;
+use ttrpc::r#async::TtrpcContext;
+
+#[derive(Debug, PartialEq)]
+pub enum TraceType {
+    Disabled,
+    Isolated,
+}
+
+#[derive(Debug)]
+pub struct TraceTypeError {
+    details: String,
+}
+
+impl TraceTypeError {
+    fn new(msg: &str) -> TraceTypeError {
+        TraceTypeError {
+            details: msg.into(),
+        }
+    }
+}
+
+impl Error for TraceTypeError {}
+
+impl fmt::Display for TraceTypeError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.details)
+    }
+}
+
+impl FromStr for TraceType {
+    type Err = TraceTypeError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "isolated" => Ok(TraceType::Isolated),
+            "disabled" => Ok(TraceType::Disabled),
+            _ => Err(TraceTypeError::new("invalid trace type")),
+        }
+    }
+}
+
+pub fn setup_tracing(name: &'static str, logger: &Logger, _agent_cfg: &AgentConfig) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "vsock-tracer"));
+
+    let exporter = vsock_exporter::Exporter::builder()
+        .with_logger(&logger)
+        .init();
+
+    let config = Config::default();
+
+    let builder = opentelemetry::sdk::trace::TracerProvider::builder()
+        .with_simple_exporter(exporter)
+        .with_config(config);
+
+    let provider = builder.build();
+
+    // We don't need a versioned tracer.
+    let version = None;
+
+    let tracer = provider.get_tracer(name, version);
+
+    let _global_provider = global::set_tracer_provider(provider);
+
+    let layer = OpenTelemetryLayer::new(tracer);
+
+    let subscriber = Registry::default().with(layer);
+
+    tracing::subscriber::set_global_default(subscriber)?;
+
+    global::set_text_map_propagator(TraceContextPropagator::new());
+
+    info!(logger, "tracing setup");
+
+    Ok(())
+}
+
+pub fn end_tracing() {
+    global::shutdown_tracer_provider();
+}
+
+pub fn extract_carrier_from_ttrpc(ttrpc_context: &TtrpcContext) -> HashMap<String, String> {
+    let mut carrier = HashMap::new();
+    for (k, v) in &ttrpc_context.metadata {
+        carrier.insert(k.clone(), v.join(","));
+    }
+
+    carrier
+}
+
+#[macro_export]
+macro_rules! trace_rpc_call {
+    ($ctx: ident, $name:literal, $req: ident) => {
+        // extract context from request context
+        let parent_context = global::get_text_map_propagator(|propagator| {
+            propagator.extract(&extract_carrier_from_ttrpc($ctx))
+        });
+
+        // generate tracing span
+        let rpc_span = span!(tracing::Level::INFO, $name, "mod"="rpc.rs", req=?$req);
+
+        // assign parent span from external context
+        rpc_span.set_parent(parent_context);
+        let _enter = rpc_span.enter();
+    };
+}
--- a/src/agent/src/uevent.rs
+++ b/src/agent/src/uevent.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 use tokio::select;
 use tokio::sync::watch::Receiver;
 use tokio::sync::Mutex;
+use tracing::instrument;

 // Convenience macro to obtain the scope logger
 macro_rules! sl {
@@ -64,6 +65,7 @@ impl Uevent {
        event
    }

+    #[instrument]
    async fn process_add(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
        // Special case for memory hot-adds first
        let online_path = format!("{}/{}/online", SYSFS_DIR, &self.devpath);
@@ -95,6 +97,7 @@ impl Uevent {
        }
    }

+    #[instrument]
    async fn process(&self, logger: &Logger, sandbox: &Arc<Mutex<Sandbox>>) {
        if self.action == U_EVENT_ACTION_ADD {
            return self.process_add(logger, sandbox).await;
@@ -103,6 +106,7 @@ impl Uevent {
    }
 }

+#[instrument]
 pub async fn wait_for_uevent(
    sandbox: &Arc<Mutex<Sandbox>>,
    matcher: impl UeventMatcher,
@@ -110,7 +114,7 @@ pub async fn wait_for_uevent(
    let mut sb = sandbox.lock().await;
    for uev in sb.uevent_map.values() {
        if matcher.is_match(uev) {
-            info!(sl!(), "Device {:?} found in pci device map", uev);
+            info!(sl!(), "Device {:?} found in device map", uev);
            return Ok(uev.clone());
        }
    }
@@ -145,6 +149,7 @@ pub async fn wait_for_uevent(
    Ok(uev)
 }

+#[instrument]
 pub async fn watch_uevents(
    sandbox: Arc<Mutex<Sandbox>>,
    mut shutdown: Receiver<bool>,
--- a/src/agent/src/util.rs
+++ b/src/agent/src/util.rs
@@ -11,6 +11,7 @@ use std::os::unix::io::{FromRawFd, RawFd};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::sync::watch::Receiver;
 use tokio_vsock::{Incoming, VsockListener, VsockStream};
+use tracing::instrument;

 // Size of I/O read buffer
 const BUF_SIZE: usize = 8192;
@@ -56,10 +57,12 @@ where
    Ok(total_bytes)
 }

+#[instrument]
 pub fn get_vsock_incoming(fd: RawFd) -> Incoming {
    unsafe { VsockListener::from_raw_fd(fd).incoming() }
 }

+#[instrument]
 pub async fn get_vsock_stream(fd: RawFd) -> Result<VsockStream> {
    let stream = get_vsock_incoming(fd).next().await.unwrap()?;
    Ok(stream)
--- a/src/agent/src/watcher.rs
+++ b/src/agent/src/watcher.rs
@@ -0,0 +1,771 @@
+// Copyright (c) 2021 Apple Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::time::SystemTime;
+
+use tokio::fs;
+use tokio::sync::Mutex;
+use tokio::task;
+use tokio::time::{self, Duration};
+
+use anyhow::{ensure, Context, Result};
+use async_recursion::async_recursion;
+use nix::mount::{umount, MsFlags};
+use slog::{debug, error, Logger};
+
+use crate::mount::BareMount;
+use crate::protocols::agent as protos;
+
+/// The maximum number of file system entries agent will watch for each mount.
+const MAX_ENTRIES_PER_STORAGE: usize = 8;
+
+/// The maximum size of a watchable mount in bytes.
+const MAX_SIZE_PER_WATCHABLE_MOUNT: u64 = 1024 * 1024;
+
+/// How often to check for modified files.
+const WATCH_INTERVAL_SECS: u64 = 2;
+
+/// Destination path for tmpfs
+const WATCH_MOUNT_POINT_PATH: &str = "/run/kata-containers/shared/containers/watchable/";
+
+/// Represents a single watched storage entry which may have multiple files to watch.
+#[derive(Default, Debug, Clone)]
+struct Storage {
+    /// A mount point without inotify capabilities.
+    source_mount_point: PathBuf,
+
+    /// The target mount point, where the watched files will be copied/mirrored
+    /// when being changed, added or removed. This will be subdirectory of a tmpfs
+    target_mount_point: PathBuf,
+
+    /// Flag to indicate that the Storage should be watched. Storage will be watched until
+    /// the source becomes too large, either in number of files (>8) or total size (>1MB).
+    watch: bool,
+
+    /// The list of files to watch from the source mount point and updated in the target one.
+    watched_files: HashMap<PathBuf, SystemTime>,
+}
+
+impl Drop for Storage {
+    fn drop(&mut self) {
+        let _ = std::fs::remove_dir_all(&self.target_mount_point);
+    }
+}
+
+impl Storage {
+    async fn new(storage: protos::Storage) -> Result<Storage> {
+        let entry = Storage {
+            source_mount_point: PathBuf::from(&storage.source),
+            target_mount_point: PathBuf::from(&storage.mount_point),
+            watch: true,
+            watched_files: HashMap::new(),
+        };
+
+        Ok(entry)
+    }
+
+    async fn update_target(&self, logger: &Logger, source_path: impl AsRef<Path>) -> Result<()> {
+        let source_file_path = source_path.as_ref();
+
+        let dest_file_path = if self.source_mount_point.is_file() {
+            // Simple file to file copy
+            // Assume target mount is a file path
+            self.target_mount_point.clone()
+        } else {
+            let dest_file_path = self.make_target_path(&source_file_path)?;
+
+            if let Some(path) = dest_file_path.parent() {
+                debug!(logger, "Creating destination directory: {}", path.display());
+                fs::create_dir_all(path)
+                    .await
+                    .with_context(|| format!("Unable to mkdir all for {}", path.display()))?;
+            }
+
+            dest_file_path
+        };
+
+        debug!(
+            logger,
+            "Copy from {} to {}",
+            source_file_path.display(),
+            dest_file_path.display()
+        );
+        fs::copy(&source_file_path, &dest_file_path)
+            .await
+            .with_context(|| {
+                format!(
+                    "Copy from {} to {} failed",
+                    source_file_path.display(),
+                    dest_file_path.display()
+                )
+            })?;
+
+        Ok(())
+    }
+
+    async fn scan(&mut self, logger: &Logger) -> Result<usize> {
+        debug!(logger, "Scanning for changes");
+
+        let mut remove_list = Vec::new();
+        let mut updated_files: Vec<PathBuf> = Vec::new();
+
+        // Remove deleted files for tracking list
+        self.watched_files.retain(|st, _| {
+            if st.exists() {
+                true
+            } else {
+                remove_list.push(st.to_path_buf());
+                false
+            }
+        });
+
+        // Delete from target
+        for path in remove_list {
+            // File has been deleted, remove it from target mount
+            let target = self.make_target_path(path)?;
+            debug!(logger, "Removing file from mount: {}", target.display());
+            let _ = fs::remove_file(target).await;
+        }
+
+        // Scan new & changed files
+        self.scan_path(
+            logger,
+            self.source_mount_point.clone().as_path(),
+            &mut updated_files,
+        )
+        .await
+        .with_context(|| "Scan path failed")?;
+
+        // Update identified files:
+        for path in &updated_files {
+            self.update_target(logger, path.as_path()).await?;
+        }
+
+        Ok(updated_files.len())
+    }
+
+    #[async_recursion]
+    async fn scan_path(
+        &mut self,
+        logger: &Logger,
+        path: &Path,
+        update_list: &mut Vec<PathBuf>,
+    ) -> Result<u64> {
+        let mut size: u64 = 0;
+        debug!(logger, "Scanning path: {}", path.display());
+
+        if path.is_file() {
+            let metadata = path
+                .metadata()
+                .with_context(|| format!("Failed to query metadata for: {}", path.display()))?;
+
+            let modified = metadata
+                .modified()
+                .with_context(|| format!("Failed to get modified date for: {}", path.display()))?;
+
+            size += metadata.len();
+
+            ensure!(
+                self.watched_files.len() <= MAX_ENTRIES_PER_STORAGE,
+                "Too many file system entries to watch (must be < {})",
+                MAX_ENTRIES_PER_STORAGE
+            );
+
+            // Insert will return old entry if any
+            if let Some(old_st) = self.watched_files.insert(path.to_path_buf(), modified) {
+                if modified > old_st {
+                    update_list.push(PathBuf::from(&path))
+                }
+            } else {
+                // Storage just added, copy to target
+                debug!(logger, "New entry: {}", path.display());
+                update_list.push(PathBuf::from(&path))
+            }
+        } else {
+            // Scan dir recursively
+            let mut entries = fs::read_dir(path)
+                .await
+                .with_context(|| format!("Failed to read dir: {}", path.display()))?;
+
+            while let Some(entry) = entries.next_entry().await? {
+                let path = entry.path();
+                let res_size = self
+                    .scan_path(logger, path.as_path(), update_list)
+                    .await
+                    .with_context(|| format!("Unable to scan inner path: {}", path.display()))?;
+                size += res_size;
+            }
+        }
+        ensure!(
+            size <= MAX_SIZE_PER_WATCHABLE_MOUNT,
+            "Too many file system entries to watch (must be < {})",
+            MAX_SIZE_PER_WATCHABLE_MOUNT,
+        );
+
+        Ok(size)
+    }
+
+    fn make_target_path(&self, source_file_path: impl AsRef<Path>) -> Result<PathBuf> {
+        let relative_path = source_file_path
+            .as_ref()
+            .strip_prefix(&self.source_mount_point)
+            .with_context(|| {
+                format!(
+                    "Failed to strip prefix: {} - {}",
+                    source_file_path.as_ref().display().to_string(),
+                    &self.source_mount_point.display()
+                )
+            })?;
+
+        let dest_file_path = Path::new(&self.target_mount_point).join(relative_path);
+        Ok(dest_file_path)
+    }
+}
+
+#[derive(Default, Debug)]
+struct SandboxStorages(Vec<Storage>);
+
+impl SandboxStorages {
+    async fn add(
+        &mut self,
+        list: impl IntoIterator<Item = protos::Storage>,
+
+        logger: &Logger,
+    ) -> Result<()> {
+        for storage in list.into_iter() {
+            let entry = Storage::new(storage)
+                .await
+                .with_context(|| "Failed to add storage")?;
+            self.0.push(entry);
+        }
+
+        // Perform initial copy
+        self.check(logger)
+            .await
+            .with_context(|| "Failed to perform initial check")?;
+
+        Ok(())
+    }
+
+    async fn check(&mut self, logger: &Logger) -> Result<()> {
+        for entry in self.0.iter_mut().filter(|e| e.watch) {
+            if let Err(e) = entry.scan(logger).await {
+                // If an error was observed, we will stop treating this Storage as being watchable, and
+                // instead clean up the target-mount files on the tmpfs and bind mount the source_mount_point
+                // to target_mount_point.
+                error!(logger, "error observed when watching: {:?}", e);
+                entry.watch = false;
+
+                // Remove destination contents, but not the directory itself, since this is
+                // assumed to be bind-mounted into a container. If source/mount is a file, no need to cleanup
+                if entry.target_mount_point.as_path().is_dir() {
+                    for dir_entry in std::fs::read_dir(entry.target_mount_point.as_path())? {
+                        let dir_entry = dir_entry?;
+                        let path = dir_entry.path();
+                        if dir_entry.file_type()?.is_dir() {
+                            tokio::fs::remove_dir_all(path).await?;
+                        } else {
+                            tokio::fs::remove_file(path).await?;
+                        }
+                    }
+                }
+
+                //  - Create bind mount from source to destination
+                BareMount::new(
+                    entry.source_mount_point.to_str().unwrap(),
+                    entry.target_mount_point.to_str().unwrap(),
+                    "bind",
+                    MsFlags::MS_BIND,
+                    "bind",
+                    logger,
+                )
+                .mount()?;
+            }
+        }
+        Ok(())
+    }
+}
+
+/// Handles watchable mounts. The watcher will manage one or more mounts for one or more containers. For each
+/// mount that is added, the watcher will maintain a list of files to monitor, and periodically checks for new,
+/// removed or changed (modified date) files. When a change is identified, the watcher will either copy the new
+/// or updated file to a target mount point, or remove the removed file from the target mount point.  All WatchableStorage
+/// target mount points are expected to reside within a single tmpfs, whose root is created by the BindWatcher.
+///
+/// This is a temporary workaround to handle config map updates until we get inotify on 9p/virtio-fs.
+/// More context on this:
+/// - https://github.com/kata-containers/runtime/issues/1505
+/// - https://github.com/kata-containers/kata-containers/issues/1879
+#[derive(Debug, Default)]
+pub struct BindWatcher {
+    /// Container ID -> Vec of watched entries
+    sandbox_storages: Arc<Mutex<HashMap<String, SandboxStorages>>>,
+    watch_thread: Option<task::JoinHandle<()>>,
+}
+
+impl Drop for BindWatcher {
+    fn drop(&mut self) {
+        self.cleanup();
+    }
+}
+
+impl BindWatcher {
+    pub fn new() -> BindWatcher {
+        Default::default()
+    }
+
+    pub async fn add_container(
+        &mut self,
+        id: String,
+        mounts: impl IntoIterator<Item = protos::Storage>,
+        logger: &Logger,
+    ) -> Result<()> {
+        if self.watch_thread.is_none() {
+            // Virtio-fs shared path is RO by default, so we back the target-mounts by tmpfs.
+            self.mount(logger).await?;
+
+            // Spawn background thread to monitor changes
+            self.watch_thread = Some(Self::spawn_watcher(
+                logger.clone(),
+                Arc::clone(&self.sandbox_storages),
+                WATCH_INTERVAL_SECS,
+            ));
+        }
+
+        self.sandbox_storages
+            .lock()
+            .await
+            .entry(id)
+            .or_insert_with(SandboxStorages::default)
+            .add(mounts, logger)
+            .await
+            .with_context(|| "Failed to add container")?;
+
+        Ok(())
+    }
+
+    pub async fn remove_container(&self, id: &str) {
+        self.sandbox_storages.lock().await.remove(id);
+    }
+
+    fn spawn_watcher(
+        logger: Logger,
+        sandbox_storages: Arc<Mutex<HashMap<String, SandboxStorages>>>,
+        interval_secs: u64,
+    ) -> tokio::task::JoinHandle<()> {
+        tokio::spawn(async move {
+            let mut interval = time::interval(Duration::from_secs(interval_secs));
+
+            loop {
+                interval.tick().await;
+
+                debug!(&logger, "Looking for changed files");
+                for (_, entries) in sandbox_storages.lock().await.iter_mut() {
+                    if let Err(err) = entries.check(&logger).await {
+                        // We don't fail background loop, but rather log error instead.
+                        error!(logger, "Check failed: {}", err);
+                    }
+                }
+            }
+        })
+    }
+
+    async fn mount(&self, logger: &Logger) -> Result<()> {
+        fs::create_dir_all(WATCH_MOUNT_POINT_PATH).await?;
+
+        BareMount::new(
+            "tmpfs",
+            WATCH_MOUNT_POINT_PATH,
+            "tmpfs",
+            MsFlags::empty(),
+            "",
+            logger,
+        )
+        .mount()?;
+
+        Ok(())
+    }
+
+    fn cleanup(&mut self) {
+        if let Some(handle) = self.watch_thread.take() {
+            // Stop our background thread
+            handle.abort();
+        }
+
+        let _ = umount(WATCH_MOUNT_POINT_PATH);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::mount::is_mounted;
+    use crate::skip_if_not_root;
+    use std::fs;
+    use std::thread;
+
+    #[tokio::test]
+    async fn watch_entries() {
+        skip_if_not_root!();
+
+        // If there's an error with an entry, let's make sure it is removed, and that the
+        // mount-destination behaves like a standard bind-mount.
+
+        // Create an entries vector with three storage objects: storage, storage1, storage2.
+        // We'll first verify each are evaluated correctly, then increase the first entry's contents
+        // so it fails mount size check (>1MB) (test handling for failure on mount that is a directory).
+        // We'll then similarly cause failure with storage2 (test handling for failure on mount that is
+        // a single file). We'll then verify that storage1 continues to be watchable.
+        let source_dir = tempfile::tempdir().unwrap();
+        let dest_dir = tempfile::tempdir().unwrap();
+
+        let storage = protos::Storage {
+            source: source_dir.path().display().to_string(),
+            mount_point: dest_dir.path().display().to_string(),
+            ..Default::default()
+        };
+        std::fs::File::create(source_dir.path().join("small.txt"))
+            .unwrap()
+            .set_len(10)
+            .unwrap();
+
+        let source_dir1 = tempfile::tempdir().unwrap();
+        let dest_dir1 = tempfile::tempdir().unwrap();
+        let storage1 = protos::Storage {
+            source: source_dir1.path().display().to_string(),
+            mount_point: dest_dir1.path().display().to_string(),
+            ..Default::default()
+        };
+        std::fs::File::create(source_dir1.path().join("large.txt"))
+            .unwrap()
+            .set_len(MAX_SIZE_PER_WATCHABLE_MOUNT)
+            .unwrap();
+
+        // And finally, create a single file mount:
+        let source_dir2 = tempfile::tempdir().unwrap();
+        let dest_dir2 = tempfile::tempdir().unwrap();
+
+        let source_path = source_dir2.path().join("mounted-file");
+        let dest_path = dest_dir2.path().join("mounted-file");
+        let mounted_file = std::fs::File::create(&source_path).unwrap();
+        mounted_file.set_len(MAX_SIZE_PER_WATCHABLE_MOUNT).unwrap();
+
+        let storage2 = protos::Storage {
+            source: source_path.display().to_string(),
+            mount_point: dest_path.display().to_string(),
+            ..Default::default()
+        };
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        let mut entries = SandboxStorages {
+            ..Default::default()
+        };
+
+        entries
+            .add(std::iter::once(storage), &logger)
+            .await
+            .unwrap();
+
+        entries
+            .add(std::iter::once(storage1), &logger)
+            .await
+            .unwrap();
+
+        entries
+            .add(std::iter::once(storage2), &logger)
+            .await
+            .unwrap();
+
+        // Check that there are three entries, and that the
+        // destination (mount point) matches what we expect for
+        // the first:
+        assert!(entries.check(&logger).await.is_ok());
+        assert_eq!(entries.0.len(), 3);
+        assert_eq!(std::fs::read_dir(dest_dir.path()).unwrap().count(), 1);
+
+        // Add a second file which will trip file size check:
+        std::fs::File::create(source_dir.path().join("big.txt"))
+            .unwrap()
+            .set_len(MAX_SIZE_PER_WATCHABLE_MOUNT)
+            .unwrap();
+
+        assert!(entries.check(&logger).await.is_ok());
+
+        // Verify Storage 0 is no longer going to be watched:
+        assert!(!entries.0[0].watch);
+
+        // Verify that the directory has two entries:
+        assert_eq!(std::fs::read_dir(dest_dir.path()).unwrap().count(), 2);
+
+        // Verify that the directory is a bind mount. Add an entry without calling check,
+        // and verify that the destination directory includes these files in the case of
+        // mount that is no longer being watched (storage), but not within the still-being
+        // watched (storage1):
+        fs::write(source_dir.path().join("1.txt"), "updated").unwrap();
+        fs::write(source_dir1.path().join("2.txt"), "updated").unwrap();
+
+        assert_eq!(std::fs::read_dir(source_dir.path()).unwrap().count(), 3);
+        assert_eq!(std::fs::read_dir(dest_dir.path()).unwrap().count(), 3);
+        assert_eq!(std::fs::read_dir(source_dir1.path()).unwrap().count(), 2);
+        assert_eq!(std::fs::read_dir(dest_dir1.path()).unwrap().count(), 1);
+
+        // Verify that storage1 is still working. After running check, we expect that the number
+        // of entries to increment
+        assert!(entries.check(&logger).await.is_ok());
+        assert_eq!(std::fs::read_dir(dest_dir1.path()).unwrap().count(), 2);
+
+        // Break storage2 by increasing the file size
+        mounted_file
+            .set_len(MAX_SIZE_PER_WATCHABLE_MOUNT + 10)
+            .unwrap();
+        assert!(entries.check(&logger).await.is_ok());
+        // Verify Storage 2 is no longer going to be watched:
+        assert!(!entries.0[2].watch);
+
+        // Verify bind mount is working -- let's write to the file and observe output:
+        fs::write(&source_path, "updated").unwrap();
+        assert_eq!(fs::read_to_string(&source_path).unwrap(), "updated");
+    }
+
+    #[tokio::test]
+    async fn watch_directory_too_large() {
+        let source_dir = tempfile::tempdir().unwrap();
+        let dest_dir = tempfile::tempdir().unwrap();
+        let mut entry = Storage::new(protos::Storage {
+            source: source_dir.path().display().to_string(),
+            mount_point: dest_dir.path().display().to_string(),
+            ..Default::default()
+        })
+        .await
+        .unwrap();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        // Create a file that is too large:
+        std::fs::File::create(source_dir.path().join("big.txt"))
+            .unwrap()
+            .set_len(MAX_SIZE_PER_WATCHABLE_MOUNT + 1)
+            .unwrap();
+        thread::sleep(Duration::from_secs(1));
+        assert!(entry.scan(&logger).await.is_err());
+        fs::remove_file(source_dir.path().join("big.txt")).unwrap();
+
+        std::fs::File::create(source_dir.path().join("big.txt"))
+            .unwrap()
+            .set_len(MAX_SIZE_PER_WATCHABLE_MOUNT - 1)
+            .unwrap();
+        thread::sleep(Duration::from_secs(1));
+        assert!(entry.scan(&logger).await.is_ok());
+
+        std::fs::File::create(source_dir.path().join("too-big.txt"))
+            .unwrap()
+            .set_len(2)
+            .unwrap();
+        thread::sleep(Duration::from_secs(1));
+        assert!(entry.scan(&logger).await.is_err());
+
+        fs::remove_file(source_dir.path().join("big.txt")).unwrap();
+        fs::remove_file(source_dir.path().join("too-big.txt")).unwrap();
+
+        // Up to eight files should be okay:
+        fs::write(source_dir.path().join("1.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("2.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("3.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("4.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("5.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("6.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("7.txt"), "updated").unwrap();
+        fs::write(source_dir.path().join("8.txt"), "updated").unwrap();
+        assert_eq!(entry.scan(&logger).await.unwrap(), 8);
+
+        // Nine files is too many:
+        fs::write(source_dir.path().join("9.txt"), "updated").unwrap();
+        thread::sleep(Duration::from_secs(1));
+        assert!(entry.scan(&logger).await.is_err());
+    }
+
+    #[tokio::test]
+    async fn watch_directory() {
+        // Prepare source directory:
+        // ./tmp/1.txt
+        // ./tmp/A/B/2.txt
+        let source_dir = tempfile::tempdir().unwrap();
+        fs::write(source_dir.path().join("1.txt"), "one").unwrap();
+        fs::create_dir_all(source_dir.path().join("A/B")).unwrap();
+        fs::write(source_dir.path().join("A/B/1.txt"), "two").unwrap();
+
+        let dest_dir = tempfile::tempdir().unwrap();
+
+        let mut entry = Storage::new(protos::Storage {
+            source: source_dir.path().display().to_string(),
+            mount_point: dest_dir.path().display().to_string(),
+            ..Default::default()
+        })
+        .await
+        .unwrap();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        assert_eq!(entry.scan(&logger).await.unwrap(), 2);
+
+        // Should copy no files since nothing is changed since last check
+        assert_eq!(entry.scan(&logger).await.unwrap(), 0);
+
+        // Should copy 1 file
+        thread::sleep(Duration::from_secs(1));
+        fs::write(source_dir.path().join("A/B/1.txt"), "updated").unwrap();
+        assert_eq!(entry.scan(&logger).await.unwrap(), 1);
+        assert_eq!(
+            fs::read_to_string(dest_dir.path().join("A/B/1.txt")).unwrap(),
+            "updated"
+        );
+
+        // Should copy no new files after copy happened
+        assert_eq!(entry.scan(&logger).await.unwrap(), 0);
+
+        // Update another file
+        fs::write(source_dir.path().join("1.txt"), "updated").unwrap();
+        assert_eq!(entry.scan(&logger).await.unwrap(), 1);
+    }
+
+    #[tokio::test]
+    async fn watch_file() {
+        let source_dir = tempfile::tempdir().unwrap();
+        let source_file = source_dir.path().join("1.txt");
+
+        fs::write(&source_file, "one").unwrap();
+
+        let dest_dir = tempfile::tempdir().unwrap();
+        let dest_file = dest_dir.path().join("1.txt");
+
+        let mut entry = Storage::new(protos::Storage {
+            source: source_file.display().to_string(),
+            mount_point: dest_file.display().to_string(),
+            ..Default::default()
+        })
+        .await
+        .unwrap();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        assert_eq!(entry.scan(&logger).await.unwrap(), 1);
+
+        thread::sleep(Duration::from_secs(1));
+        fs::write(&source_file, "two").unwrap();
+        assert_eq!(entry.scan(&logger).await.unwrap(), 1);
+        assert_eq!(fs::read_to_string(&dest_file).unwrap(), "two");
+        assert_eq!(entry.scan(&logger).await.unwrap(), 0);
+    }
+
+    #[tokio::test]
+    async fn delete_file() {
+        let source_dir = tempfile::tempdir().unwrap();
+        let source_file = source_dir.path().join("1.txt");
+        fs::write(&source_file, "one").unwrap();
+
+        let dest_dir = tempfile::tempdir().unwrap();
+        let target_file = dest_dir.path().join("1.txt");
+
+        let mut entry = Storage::new(protos::Storage {
+            source: source_dir.path().display().to_string(),
+            mount_point: dest_dir.path().display().to_string(),
+            ..Default::default()
+        })
+        .await
+        .unwrap();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        assert_eq!(entry.scan(&logger).await.unwrap(), 1);
+        assert_eq!(entry.watched_files.len(), 1);
+
+        assert!(target_file.exists());
+        assert!(entry.watched_files.contains_key(&source_file));
+
+        // Remove source file
+        fs::remove_file(&source_file).unwrap();
+
+        assert_eq!(entry.scan(&logger).await.unwrap(), 0);
+
+        assert_eq!(entry.watched_files.len(), 0);
+        assert!(!target_file.exists());
+    }
+
+    #[tokio::test]
+    async fn make_target_path() {
+        let source_dir = tempfile::tempdir().unwrap();
+        let target_dir = tempfile::tempdir().unwrap();
+
+        let source_dir = source_dir.path();
+        let target_dir = target_dir.path();
+
+        let entry = Storage::new(protos::Storage {
+            source: source_dir.display().to_string(),
+            mount_point: target_dir.display().to_string(),
+            ..Default::default()
+        })
+        .await
+        .unwrap();
+
+        assert_eq!(
+            entry.make_target_path(source_dir.join("1.txt")).unwrap(),
+            target_dir.join("1.txt")
+        );
+
+        assert_eq!(
+            entry
+                .make_target_path(source_dir.join("a/b/2.txt"))
+                .unwrap(),
+            target_dir.join("a/b/2.txt")
+        );
+    }
+
+    #[tokio::test]
+    async fn create_tmpfs() {
+        skip_if_not_root!();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let mut watcher = BindWatcher::default();
+
+        watcher.mount(&logger).await.unwrap();
+        assert!(is_mounted(WATCH_MOUNT_POINT_PATH).unwrap());
+
+        watcher.cleanup();
+        assert!(!is_mounted(WATCH_MOUNT_POINT_PATH).unwrap());
+    }
+
+    #[tokio::test]
+    async fn spawn_thread() {
+        skip_if_not_root!();
+
+        let source_dir = tempfile::tempdir().unwrap();
+        fs::write(source_dir.path().join("1.txt"), "one").unwrap();
+
+        let dest_dir = tempfile::tempdir().unwrap();
+
+        let storage = protos::Storage {
+            source: source_dir.path().display().to_string(),
+            mount_point: dest_dir.path().display().to_string(),
+            ..Default::default()
+        };
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let mut watcher = BindWatcher::default();
+
+        watcher
+            .add_container("test".into(), std::iter::once(storage), &logger)
+            .await
+            .unwrap();
+
+        thread::sleep(Duration::from_secs(WATCH_INTERVAL_SECS));
+
+        let out = fs::read_to_string(dest_dir.path().join("1.txt")).unwrap();
+        assert_eq!(out, "one");
+    }
+}
--- a/src/agent/vsock-exporter/Cargo.toml
+++ b/src/agent/vsock-exporter/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "vsock-exporter"
+version = "0.1.0"
+authors = ["James O. D. Hunt <james.o.hunt@intel.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+nix = "0.21.0"
+libc = "0.2.94"
+thiserror = "1.0.24"
+opentelemetry = { version = "0.14.0", features=["serialize"] }
+serde = { version = "1.0.126", features = ["derive"] }
+vsock = "0.2.3"
+bincode = "1.3.3"
+byteorder = "1.4.3"
+slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_info"] }
+async-trait = "0.1.50"
--- a/src/agent/vsock-exporter/src/lib.rs
+++ b/src/agent/vsock-exporter/src/lib.rs
@@ -0,0 +1,196 @@
+// Copyright (c) 2020-2021 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// The VSOCK Exporter sends trace spans "out" to the forwarder running on the
+// host (which then forwards them on to a trace collector). The data is sent
+// via a VSOCK socket that the forwarder process is listening on. To allow the
+// forwarder to know how much data to each for each trace span the simplest
+// protocol is employed which uses a header packet and the payload (trace
+// span) data. The header packet is a simple count of the number of bytes in the
+// payload, which allows the forwarder to know how many bytes it must read to
+// consume the trace span. The payload is a serialised version of the trace span.
+
+use async_trait::async_trait;
+use byteorder::{ByteOrder, NetworkEndian};
+use opentelemetry::sdk::export::trace::{ExportResult, SpanData, SpanExporter};
+use opentelemetry::sdk::export::ExportError;
+use slog::{error, o, Logger};
+use std::io::{ErrorKind, Write};
+use std::net::Shutdown;
+use std::sync::Mutex;
+use vsock::{SockAddr, VsockStream};
+
+const ANY_CID: &str = "any";
+
+// Must match the value of the variable of the same name in the trace forwarder.
+const HEADER_SIZE_BYTES: u64 = std::mem::size_of::<u64>() as u64;
+
+// By default, the VSOCK exporter should talk "out" to the host where the
+// forwarder is running.
+const DEFAULT_CID: u32 = libc::VMADDR_CID_HOST;
+
+// The VSOCK port the forwarders listens on by default
+const DEFAULT_PORT: u32 = 10240;
+
+#[derive(Debug)]
+pub struct Exporter {
+    port: u32,
+    cid: u32,
+    conn: Mutex<VsockStream>,
+    logger: Logger,
+}
+
+impl Exporter {
+    /// Create a new exporter builder.
+    pub fn builder() -> Builder {
+        Builder::default()
+    }
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("connection error: {0}")]
+    ConnectionError(String),
+    #[error("serialisation error: {0}")]
+    SerialisationError(#[from] bincode::Error),
+    #[error("I/O error: {0}")]
+    IOError(#[from] std::io::Error),
+}
+
+impl ExportError for Error {
+    fn exporter_name(&self) -> &'static str {
+        "vsock-exporter"
+    }
+}
+
+fn make_io_error(desc: String) -> std::io::Error {
+    std::io::Error::new(ErrorKind::Other, desc)
+}
+
+// Send a trace span to the forwarder running on the host.
+fn write_span(writer: &mut dyn Write, span: &SpanData) -> Result<(), std::io::Error> {
+    let encoded_payload: Vec<u8> =
+        bincode::serialize(&span).map_err(|e| make_io_error(e.to_string()))?;
+
+    let payload_len: u64 = encoded_payload.len() as u64;
+
+    let mut payload_len_as_bytes: [u8; HEADER_SIZE_BYTES as usize] =
+        [0; HEADER_SIZE_BYTES as usize];
+
+    // Encode the header
+    NetworkEndian::write_u64(&mut payload_len_as_bytes, payload_len);
+
+    // Send the header
+    writer
+        .write_all(&payload_len_as_bytes)
+        .map_err(|e| make_io_error(format!("failed to write trace header: {:?}", e)))?;
+
+    writer
+        .write_all(&encoded_payload)
+        .map_err(|e| make_io_error(format!("failed to write trace payload: {:?}", e)))
+}
+
+fn handle_batch(writer: &mut dyn Write, batch: Vec<SpanData>) -> ExportResult {
+    for span_data in batch {
+        write_span(writer, &span_data).map_err(Error::IOError)?;
+    }
+
+    Ok(())
+}
+
+#[async_trait]
+impl SpanExporter for Exporter {
+    async fn export(&mut self, batch: Vec<SpanData>) -> ExportResult {
+        let conn = self.conn.lock();
+
+        match conn {
+            Ok(mut c) => handle_batch(&mut *c, batch),
+            Err(e) => {
+                error!(self.logger, "failed to obtain connection";
+                        "error" => format!("{}", e));
+
+                return Err(Error::ConnectionError(e.to_string()).into());
+            }
+        }
+    }
+
+    fn shutdown(&mut self) {
+        let conn = match self.conn.lock() {
+            Ok(conn) => conn,
+            Err(e) => {
+                error!(self.logger, "failed to obtain connection";
+                        "error" => format!("{}", e));
+                return;
+            }
+        };
+
+        conn.shutdown(Shutdown::Write)
+            .expect("failed to shutdown VSOCK connection");
+    }
+}
+
+#[derive(Debug)]
+pub struct Builder {
+    port: u32,
+    cid: u32,
+    logger: Logger,
+}
+
+impl Default for Builder {
+    fn default() -> Self {
+        let logger = Logger::root(slog::Discard, o!());
+
+        Builder {
+            cid: DEFAULT_CID,
+            port: DEFAULT_PORT,
+            logger,
+        }
+    }
+}
+
+impl Builder {
+    pub fn with_cid(self, cid: u32) -> Self {
+        Builder { cid, ..self }
+    }
+
+    pub fn with_port(self, port: u32) -> Self {
+        Builder { port, ..self }
+    }
+
+    pub fn with_logger(self, logger: &Logger) -> Self {
+        Builder {
+            logger: logger.new(o!()),
+            ..self
+        }
+    }
+
+    pub fn init(self) -> Exporter {
+        let Builder { port, cid, logger } = self;
+
+        let sock_addr = SockAddr::new_vsock(self.cid, self.port);
+
+        let cid_str: String;
+
+        if self.cid == libc::VMADDR_CID_ANY {
+            cid_str = ANY_CID.to_string();
+        } else {
+            cid_str = format!("{}", self.cid);
+        }
+
+        let msg = format!(
+            "failed to connect to VSOCK server (port: {}, cid: {}) - {}",
+            self.port, cid_str, "ensure trace forwarder is running on host"
+        );
+
+        let conn = VsockStream::connect(&sock_addr).expect(&msg);
+
+        Exporter {
+            port,
+            cid,
+            conn: Mutex::new(conn),
+            logger: logger.new(o!("cid" => cid_str, "port" => port)),
+        }
+    }
+}
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -582,7 +582,8 @@ $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit
 	install \
 	show-header \
 	show-summary \
-	show-variables
+	show-variables \
+	vendor

 $(TARGET).coverage: $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST)
 	$(QUIET_TEST)go test -o $@ -covermode count
@@ -647,6 +648,14 @@ install-scripts: $(SCRIPTS)
 install-completions:
 	$(QUIET_INST)install --mode 0644 -D  $(BASH_COMPLETIONS) $(DESTDIR)/$(BASH_COMPLETIONSDIR)/$(notdir $(BASH_COMPLETIONS));

+handle_vendor:
+	go mod tidy
+	go mod vendor
+	go mod verify
+
+vendor: handle_vendor
+	./hack/tree_status.sh
+
 clean:
 	$(QUIET_CLEAN)rm -f \
 		$(CONFIGS) \
--- a/src/runtime/README.md
+++ b/src/runtime/README.md
@@ -1,7 +1,4 @@
-[![Build Status](https://travis-ci.org/kata-containers/kata-containers.svg?branch=master)](https://travis-ci.org/kata-containers/kata-containers)
-[![Build Status](http://jenkins.katacontainers.io/job/kata-containers-runtime-ubuntu-18-04-master/badge/icon)](http://jenkins.katacontainers.io/job/kata-containers-runtime-ubuntu-18-04-master/)
 [![Go Report Card](https://goreportcard.com/badge/github.com/kata-containers/kata-containers)](https://goreportcard.com/report/github.com/kata-containers/kata-containers)
-[![GoDoc](https://godoc.org/github.com/kata-containers/runtime?status.svg)](https://godoc.org/github.com/kata-containers/runtime)

 # Runtime

@@ -84,7 +81,7 @@ $ kata-runtime check

 [![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/kata-containers)

-See the [installation guides](https://github.com/kata-containers/documentation/tree/master/install/README.md)
+See the [installation guides](https://github.com/kata-containers/kata-containers/blob/main/docs/install/README.md)
 available for various operating systems.

 ## Quick start for developers
--- a/src/runtime/arch/amd64-options.mk
+++ b/src/runtime/arch/amd64-options.mk
@@ -5,7 +5,7 @@

 # Intel x86-64 settings

-MACHINETYPE := pc
+MACHINETYPE := q35
 KERNELPARAMS :=
 MACHINEACCELERATORS :=
 CPUFEATURES := pmu=off
--- a/src/runtime/arch/s390x-options.mk
+++ b/src/runtime/arch/s390x-options.mk
@@ -11,3 +11,10 @@ MACHINEACCELERATORS :=
 CPUFEATURES :=

 QEMUCMD := qemu-system-s390x
+
+# See https://github.com/kata-containers/osbuilder/issues/217
+NEEDS_CC_SETTING = $(shell grep -E "\<(fedora|suse)\>" /etc/os-release 2> /dev/null)
+ifneq (,$(NEEDS_CC_SETTING))
+	CC := gcc
+	export CC
+endif
--- a/src/runtime/cli/config/configuration-acrn.toml.in
+++ b/src/runtime/cli/config/configuration-acrn.toml.in
@@ -150,6 +150,10 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@"

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
--- a/src/runtime/cli/config/configuration-clh.toml.in
+++ b/src/runtime/cli/config/configuration-clh.toml.in
@@ -165,6 +165,10 @@ block_device_driver = "virtio-blk"

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
--- a/src/runtime/cli/config/configuration-fc.toml.in
+++ b/src/runtime/cli/config/configuration-fc.toml.in
@@ -178,7 +178,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
 # VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
 # a large PCI bar, as this is a current limitation with hotplugging on
-# a bridge. This value is valid for "pc" machine type.
+# a bridge.
 # Default false
 #hotplug_vfio_on_root_bus = true

@@ -287,6 +287,10 @@ kernel_modules=[]

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
--- a/src/runtime/cli/config/configuration-qemu.toml.in
+++ b/src/runtime/cli/config/configuration-qemu.toml.in
@@ -16,6 +16,14 @@ kernel = "@KERNELPATH@"
 image = "@IMAGEPATH@"
 machine_type = "@MACHINETYPE@"

+# Enable confidential guest support.
+# Toggling that setting may trigger different hardware features, ranging
+# from memory encryption to both memory and CPU-state encryption and integrity.
+# The Kata Containers runtime dynamically detects the available feature set and
+# aims at enabling the largest possible one.
+# Default false
+# confidential_guest = true
+
 # List of valid annotation names for the hypervisor
 # Each member of the list is a regular expression, which is the base name
 # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
@@ -269,7 +277,7 @@ pflashes = []
 # VFIO devices are hotplugged on a bridge by default.
 # Enable hotplugging on root bus. This may be required for devices with
 # a large PCI bar, as this is a current limitation with hotplugging on
-# a bridge. This value is valid for "pc" machine type.
+# a bridge.
 # Default false
 #hotplug_vfio_on_root_bus = true

@@ -437,6 +445,10 @@ kernel_modules=[]

 #debug_console_enabled = true

+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
 [netmon]
 # If enabled, the network monitoring process gets started when the
 # sandbox is created. This allows for the detection of some additional
@@ -532,3 +544,30 @@ experimental=@DEFAULTEXPFEATURES@
 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
 # enable_pprof = true
+
+# WARNING: All the options in the following section have not been implemented yet.
+# This section was added as a placeholder. DO NOT USE IT!
+[image]
+# Container image service.
+#
+# Offload the CRI image management service to the Kata agent.
+# (default: false)
+#service_offload = true
+
+# Container image decryption keys provisioning.
+# Applies only if service_offload is true.
+# Keys can be provisioned locally (e.g. through a special command or
+# a local file) or remotely (usually after the guest is remotely attested).
+# The provision setting is a complete URL that lets the Kata agent decide
+# which method to use in order to fetch the keys.
+#
+# Keys can be stored in a local file, in a measured and attested initrd:
+#provision=data:///local/key/file
+#
+# Keys could be fetched through a special command or binary from the
+# initrd (guest) image, e.g. a firmware call:
+#provision=file:///path/to/bin/fetcher/in/guest
+#
+# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
+# a HTTPS URL:
+#provision=https://my-key-broker.foo/tenant/<tenant-id>
--- a/src/runtime/cli/kata-check.go
+++ b/src/runtime/cli/kata-check.go
@@ -25,6 +25,7 @@ import (
 	"strings"
 	"syscall"

+	"github.com/containerd/cgroups"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@@ -389,13 +390,6 @@ EXAMPLES:
 		if verbose {
 			kataLog.Logger.SetLevel(logrus.InfoLevel)
 		}
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-
-		span, _ := katautils.Trace(ctx, "check")
-		defer span.End()

 		if !context.Bool("no-network-checks") && os.Getenv(noNetworkEnvVar) == "" {
 			cmd := RelCmdCheck
@@ -407,8 +401,7 @@ EXAMPLES:
 			if os.Geteuid() == 0 {
 				kataLog.Warn("Not running network checks as super user")
 			} else {
-
-				err = HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
+				err := HandleReleaseVersions(cmd, version, context.Bool("include-all-releases"))
 				if err != nil {
 					return err
 				}
@@ -424,7 +417,12 @@ EXAMPLES:
 			return errors.New("check: cannot determine runtime config")
 		}

-		err = setCPUtype(runtimeConfig.HypervisorType)
+		// check if cgroup can work use the same logic for creating containers
+		if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly {
+			return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.")
+		}
+
+		err := setCPUtype(runtimeConfig.HypervisorType)
 		if err != nil {
 			return err
 		}
@@ -437,7 +435,6 @@ EXAMPLES:
 		}

 		err = hostIsVMContainerCapable(details)
-
 		if err != nil {
 			return err
 		}
--- a/src/runtime/cli/kata-env.go
+++ b/src/runtime/cli/kata-env.go
@@ -13,7 +13,6 @@ import (
 	"strings"

 	"github.com/BurntSushi/toml"
-	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
@@ -266,9 +265,9 @@ func getMemoryInfo() MemoryInfo {
 	}

 	return MemoryInfo{
-		Total:     mi.MemTotal,
-		Free:      mi.MemFree,
-		Available: mi.MemAvailable,
+		Total:     *mi.MemTotal,
+		Free:      *mi.MemFree,
+		Available: *mi.MemAvailable,
 	}
 }

@@ -448,14 +447,6 @@ var kataEnvCLICommand = cli.Command{
 		},
 	},
 	Action: func(context *cli.Context) error {
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-
-		span, _ := katautils.Trace(ctx, "kata-env")
-		defer span.End()
-
 		return handleSettings(defaultOutputFile, context)
 	},
 }
--- a/src/runtime/cli/kata-exec.go
+++ b/src/runtime/cli/kata-exec.go
@@ -14,7 +14,6 @@ import (
 	"net/http"
 	"net/url"
 	"os"
-	"path/filepath"
 	"strings"

 	"sync"
@@ -26,7 +25,6 @@ import (
 	clientUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client"
 	"github.com/pkg/errors"
 	"github.com/urfave/cli"
-	"go.opentelemetry.io/otel/label"
 )

 const (
@@ -38,10 +36,8 @@ const (

 	subCommandName = "exec"
 	// command-line parameters name
-	paramRuntimeNamespace                    = "runtime-namespace"
 	paramDebugConsolePort                    = "kata-debug-port"
 	defaultKernelParamDebugConsoleVPortValue = 1026
-	defaultRuntimeNamespace                  = "k8s.io"
 )

 var (
@@ -57,34 +53,16 @@ var kataExecCLICommand = cli.Command{
 	Name:  subCommandName,
 	Usage: "Enter into guest by debug console",
 	Flags: []cli.Flag{
-		cli.StringFlag{
-			Name:  paramRuntimeNamespace,
-			Usage: "Namespace that containerd or CRI-O are using for containers. (Default: k8s.io, only works for containerd)",
-		},
 		cli.Uint64Flag{
 			Name:  paramDebugConsolePort,
 			Usage: "Port that debug console is listening on. (Default: 1026)",
 		},
 	},
 	Action: func(context *cli.Context) error {
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-		span, _ := katautils.Trace(ctx, subCommandName)
-		defer span.End()
-
-		namespace := context.String(paramRuntimeNamespace)
-		if namespace == "" {
-			namespace = defaultRuntimeNamespace
-		}
-		span.SetAttributes(label.Key("namespace").String(namespace))
-
 		port := context.Uint64(paramDebugConsolePort)
 		if port == 0 {
 			port = defaultKernelParamDebugConsoleVPortValue
 		}
-		span.SetAttributes(label.Key("port").Uint64(port))

 		sandboxID := context.Args().Get(0)

@@ -92,9 +70,8 @@ var kataExecCLICommand = cli.Command{
 			return err
 		}

-		span.SetAttributes(label.Key("sandbox").String(sandboxID))
+		conn, err := getConn(sandboxID, port)

-		conn, err := getConn(namespace, sandboxID, port)
 		if err != nil {
 			return err
 		}
@@ -177,9 +154,8 @@ func (s *iostream) Read(data []byte) (n int, err error) {
 	return s.conn.Read(data)
 }

-func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
-	socketAddr := filepath.Join(string(filepath.Separator), "containerd-shim", namespace, sandboxID, "shim-monitor.sock")
-	client, err := kataMonitor.BuildUnixSocketClient(socketAddr, defaultTimeout)
+func getConn(sandboxID string, port uint64) (net.Conn, error) {
+	client, err := kataMonitor.BuildShimClient(sandboxID, defaultTimeout)
 	if err != nil {
 		return nil, err
 	}
@@ -190,7 +166,7 @@ func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) {
 	}

 	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("Failed to get %s: %d", socketAddr, resp.StatusCode)
+		return nil, fmt.Errorf("Failure from %s shim-monitor: %d", sandboxID, resp.StatusCode)
 	}

 	defer resp.Body.Close()
--- a/src/runtime/cli/kata-metrics.go
+++ b/src/runtime/cli/kata-metrics.go
@@ -0,0 +1,38 @@
+// Copyright (c) 2021 Apple Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+package main
+
+import (
+	"fmt"
+
+	kataMonitor "github.com/kata-containers/kata-containers/src/runtime/pkg/kata-monitor"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
+	"github.com/urfave/cli"
+)
+
+var kataMetricsCLICommand = cli.Command{
+	Name:      "metrics",
+	Usage:     "gather metrics associated with infrastructure used to run a sandbox",
+	UsageText: "metrics <sandbox id>",
+	Action: func(context *cli.Context) error {
+
+		sandboxID := context.Args().Get(0)
+
+		if err := katautils.VerifyContainerID(sandboxID); err != nil {
+			return err
+		}
+
+		// Get the metrics!
+		metrics, err := kataMonitor.GetSandboxMetrics(sandboxID)
+		if err != nil {
+			return err
+		}
+
+		fmt.Printf("%s\n", metrics)
+
+		return nil
+	},
+}
--- a/src/runtime/cli/main.go
+++ b/src/runtime/cli/main.go
@@ -22,14 +22,12 @@ import (
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
 	vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
+	tl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory/template"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli"
-	"go.opentelemetry.io/otel"
-	"go.opentelemetry.io/otel/label"
-	otelTrace "go.opentelemetry.io/otel/trace"
 )

 // specConfig is the name of the file holding the containers configuration
@@ -125,6 +123,7 @@ var runtimeCommands = []cli.Command{
 	kataCheckCLICommand,
 	kataEnvCLICommand,
 	kataExecCLICommand,
+	kataMetricsCLICommand,
 	factoryCLICommand,
 }

@@ -132,10 +131,6 @@ var runtimeCommands = []cli.Command{
 // parsing occurs.
 var runtimeBeforeSubcommands = beforeSubcommands

-// runtimeAfterSubcommands is the function to run after the command-line
-// has been parsed.
-var runtimeAfterSubcommands = afterSubcommands
-
 // runtimeCommandNotFound is the function to handle an invalid sub-command.
 var runtimeCommandNotFound = commandNotFound

@@ -168,10 +163,6 @@ func init() {

 // setupSignalHandler sets up signal handling, starting a go routine to deal
 // with signals as they arrive.
-//
-// Note that the specified context is NOT used to create a trace span (since the
-// first (root) span must be created in beforeSubcommands()): it is simply
-// used to pass to the crash handling functions to finalise tracing.
 func setupSignalHandler(ctx context.Context) {
 	signals.SetLogger(kataLog)

@@ -181,10 +172,6 @@ func setupSignalHandler(ctx context.Context) {
 		signal.Notify(sigCh, sig)
 	}

-	dieCb := func() {
-		katautils.StopTracing(ctx)
-	}
-
 	go func() {
 		for {
 			sig := <-sigCh
@@ -198,7 +185,6 @@ func setupSignalHandler(ctx context.Context) {

 			if signals.FatalSignal(nativeSignal) {
 				kataLog.WithField("signal", sig).Error("received fatal signal")
-				signals.Die(dieCb)
 			} else if debug && signals.NonFatalSignal(nativeSignal) {
 				kataLog.WithField("signal", sig).Debug("handling signal")
 				signals.Backtrace()
@@ -210,24 +196,15 @@ func setupSignalHandler(ctx context.Context) {
 // setExternalLoggers registers the specified logger with the external
 // packages which accept a logger to handle their own logging.
 func setExternalLoggers(ctx context.Context, logger *logrus.Entry) {
-	var span otelTrace.Span
-
-	// Only create a new span if a root span already exists. This is
-	// required to ensure that this function will not disrupt the root
-	// span logic by creating a span before the proper root span has been
-	// created.
-
-	if otelTrace.SpanFromContext(ctx) != nil {
-		span, ctx = katautils.Trace(ctx, "setExternalLoggers")
-		defer span.End()
-	}
-
 	// Set virtcontainers logger.
 	vci.SetLogger(ctx, logger)

 	// Set vm factory logger.
 	vf.SetLogger(ctx, logger)

+	// Set vm factory template logger.
+	tl.SetLogger(ctx, logger)
+
 	// Set the OCI package logger.
 	oci.SetLogger(ctx, logger)

@@ -244,7 +221,6 @@ func beforeSubcommands(c *cli.Context) error {
 	var configFile string
 	var runtimeConfig oci.RuntimeConfig
 	var err error
-	var traceFlushFunc func()

 	katautils.SetConfigOptions(name, defaultRuntimeConfiguration, defaultSysConfRuntimeConfiguration)

@@ -270,7 +246,6 @@ func beforeSubcommands(c *cli.Context) error {
 	// Issue: https://github.com/kata-containers/runtime/issues/2428

 	ignoreConfigLogs := false
-	var traceRootSpan string

 	subCmdIsCheckCmd := (c.NArg() >= 1 && ((c.Args()[0] == "kata-check") || (c.Args()[0] == "check")))
 	if subCmdIsCheckCmd {
@@ -302,16 +277,13 @@ func beforeSubcommands(c *cli.Context) error {
 		cmdName := c.Args().First()
 		if c.App.Command(cmdName) != nil {
 			kataLog = kataLog.WithField("command", cmdName)
-
-			// Name for the root span (used for tracing) now the
-			// sub-command name is known.
-			traceRootSpan = name + " " + cmdName
 		}

-		// Since a context is required, pass a new (throw-away) one - we
-		// cannot use the main context as tracing hasn't been enabled yet
-		// (meaning any spans created at this point will be silently ignored).
-		setExternalLoggers(context.Background(), kataLog)
+		ctx, err := cliContextToContext(c)
+		if err != nil {
+			return err
+		}
+		setExternalLoggers(ctx, kataLog)

 		if c.NArg() == 1 && (c.Args()[0] == "kata-env" || c.Args()[0] == "env") {
 			// simply report the logging setup
@@ -325,20 +297,6 @@ func beforeSubcommands(c *cli.Context) error {
 	}
 	if !subCmdIsCheckCmd {
 		debug = runtimeConfig.Debug
-
-		if traceRootSpan != "" {
-			// Create the tracer.
-			//
-			// Note: no spans are created until the command-line has been parsed.
-			// This delays collection of trace data slightly but benefits the user by
-			// ensuring the first span is the name of the sub-command being
-			// invoked from the command-line.
-			traceFlushFunc, err = setupTracing(c, traceRootSpan, &runtimeConfig)
-			if err != nil {
-				return err
-			}
-			defer traceFlushFunc()
-		}
 	}

 	args := strings.Join(c.Args(), " ")
@@ -377,36 +335,6 @@ func handleShowConfig(context *cli.Context) {
 	}
 }

-func setupTracing(context *cli.Context, rootSpanName string, config *oci.RuntimeConfig) (func(), error) {
-	flush, err := katautils.CreateTracer(name, config)
-	if err != nil {
-		return nil, err
-	}
-
-	ctx, err := cliContextToContext(context)
-	if err != nil {
-		return nil, err
-	}
-
-	// Create the root span now that the sub-command name is
-	// known.
-	//
-	// Note that this "Before" function is called (and returns)
-	// before the subcommand handler is called. As such, we cannot
-	// "Finish()" the span here - that is handled in the .After
-	// function.
-	tracer := otel.Tracer("kata")
-	newCtx, span := tracer.Start(ctx, rootSpanName)
-
-	span.SetAttributes(label.Key("subsystem").String("runtime"))
-
-	// Add tracer to metadata and update the context
-	context.App.Metadata["tracer"] = tracer
-	context.App.Metadata["context"] = newCtx
-
-	return flush, nil
-}
-
 // add supported experimental features in context
 func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error {
 	ctx, err := cliContextToContext(clictx)
@@ -420,22 +348,11 @@ func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error
 	}

 	ctx = exp.ContextWithExp(ctx, exps)
-	// Add tracer to metadata and update the context
+	// Add experimental features to metadata and update the context
 	clictx.App.Metadata["context"] = ctx
 	return nil
 }

-func afterSubcommands(c *cli.Context) error {
-	ctx, err := cliContextToContext(c)
-	if err != nil {
-		return err
-	}
-
-	katautils.StopTracing(ctx)
-
-	return nil
-}
-
 // function called when an invalid command is specified which causes the
 // runtime to error.
 func commandNotFound(c *cli.Context, command string) {
@@ -502,7 +419,6 @@ func createRuntimeApp(ctx context.Context, args []string) error {
 	app.Flags = runtimeFlags
 	app.Commands = runtimeCommands
 	app.Before = runtimeBeforeSubcommands
-	app.After = runtimeAfterSubcommands
 	app.EnableBashCompletion = true

 	// allow sub-commands to access context
@@ -578,12 +494,5 @@ func cliContextToContext(c *cli.Context) (context.Context, error) {
 func main() {
 	// create a new empty context
 	ctx := context.Background()
-
-	dieCb := func() {
-		katautils.StopTracing(ctx)
-	}
-
-	defer signals.HandlePanic(dieCb)
-
 	createRuntime(ctx)
 }
--- a/src/runtime/cli/main_test.go
+++ b/src/runtime/cli/main_test.go
@@ -20,7 +20,6 @@ import (
 	"strings"
 	"testing"

-	"github.com/dlespiau/covertool/pkg/cover"
 	ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
@@ -152,16 +151,6 @@ func runUnitTests(m *testing.M) {
 // TestMain is the common main function used by ALL the test functions
 // for this package.
 func TestMain(m *testing.M) {
-	// Parse the command line using the stdlib flag package so the flags defined
-	// in the testing package get populated.
-	cover.ParseAndStripTestFlags()
-
-	// Make sure we have the opportunity to flush the coverage report to disk when
-	// terminating the process.
-	defer func() {
-		cover.FlushProfiles()
-	}()
-
 	// If the test binary name is kata-runtime.coverage, we've are being asked to
 	// run the coverage-instrumented kata-runtime.
 	if path.Base(os.Args[0]) == name+".coverage" ||
--- a/src/runtime/cli/version.go
+++ b/src/runtime/cli/version.go
@@ -6,7 +6,6 @@
 package main

 import (
-	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/urfave/cli"
 )

@@ -14,14 +13,6 @@ var versionCLICommand = cli.Command{
 	Name:  "version",
 	Usage: "display version details",
 	Action: func(context *cli.Context) error {
-		ctx, err := cliContextToContext(context)
-		if err != nil {
-			return err
-		}
-
-		span, _ := katautils.Trace(ctx, "version")
-		defer span.End()
-
 		cli.VersionPrinter(context)
 		return nil
 	},
--- a/src/runtime/containerd-shim-v2/create.go
+++ b/src/runtime/containerd-shim-v2/create.go
@@ -19,14 +19,15 @@ import (
 	"github.com/containerd/typeurl"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
-	otelTrace "go.opentelemetry.io/otel/trace"

 	// only register the proto type
+	crioption "github.com/containerd/containerd/pkg/runtimeoptions/v1"
 	_ "github.com/containerd/containerd/runtime/linux/runctypes"
 	_ "github.com/containerd/containerd/runtime/v2/runc/options"
-	crioption "github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1"
+	oldcrioption "github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1"

 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@@ -69,19 +70,24 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
 		// create tracer
 		// This is the earliest location we can create the tracer because we must wait
 		// until the runtime config is loaded
-		_, err = katautils.CreateTracer("kata", s.config)
+		jaegerConfig := &katatrace.JaegerConfig{
+			JaegerEndpoint: s.config.JaegerEndpoint,
+			JaegerUser:     s.config.JaegerUser,
+			JaegerPassword: s.config.JaegerPassword,
+		}
+		_, err = katatrace.CreateTracer("kata", jaegerConfig)
 		if err != nil {
 			return nil, err
 		}

 		// create root span
-		var rootSpan otelTrace.Span
-		rootSpan, s.rootCtx = trace(s.ctx, "root span")
+		rootSpan, newCtx := katatrace.Trace(s.ctx, shimLog, "root span", shimTracingTags)
+		s.rootCtx = newCtx
 		defer rootSpan.End()

 		// create span
-		var span otelTrace.Span
-		span, s.ctx = trace(s.rootCtx, "create")
+		span, newCtx := katatrace.Trace(s.rootCtx, shimLog, "create", shimTracingTags)
+		s.ctx = newCtx
 		defer span.End()

 		if rootFs.Mounted, err = checkAndMount(s, r); err != nil {
@@ -116,8 +122,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
 		go s.startManagementServer(ctx, ociSpec)

 	case vc.PodContainer:
-		var span otelTrace.Span
-		span, ctx = trace(s.ctx, "create")
+		span, ctx := katatrace.Trace(s.ctx, shimLog, "create", shimTracingTags)
 		defer span.End()

 		if s.sandbox == nil {
@@ -184,6 +189,16 @@ func loadRuntimeConfig(s *service, r *taskAPI.CreateTaskRequest, anno map[string
 		// and we'll ignore it.
 		if ok {
 			configPath = option.ConfigPath
+		} else {
+			// Some versions of containerd, such as 1.4.3, and 1.4.4
+			// still rely on the runtime options coming from
+			// github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1
+			// Knowing that, instead of breaking compatibility with such
+			// versions, let's work this around on our side
+			oldOption, ok := v.(*oldcrioption.Options)
+			if ok {
+				configPath = oldOption.ConfigPath
+			}
 		}
 	}

--- a/src/runtime/containerd-shim-v2/metrics.go
+++ b/src/runtime/containerd-shim-v2/metrics.go
@@ -8,7 +8,7 @@ package containerdshim
 import (
 	"context"

-	"github.com/containerd/cgroups"
+	cgroupsv1 "github.com/containerd/cgroups/stats/v1"
 	"github.com/containerd/typeurl"

 	google_protobuf "github.com/gogo/protobuf/types"
@@ -31,11 +31,11 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl
 	return data, nil
 }

-func statsToMetrics(stats *vc.ContainerStats) *cgroups.Metrics {
-	metrics := &cgroups.Metrics{}
+func statsToMetrics(stats *vc.ContainerStats) *cgroupsv1.Metrics {
+	metrics := &cgroupsv1.Metrics{}

 	if stats.CgroupStats != nil {
-		metrics = &cgroups.Metrics{
+		metrics = &cgroupsv1.Metrics{
 			Hugetlb: setHugetlbStats(stats.CgroupStats.HugetlbStats),
 			Pids:    setPidsStats(stats.CgroupStats.PidsStats),
 			CPU:     setCPUStats(stats.CgroupStats.CPUStats),
@@ -49,12 +49,12 @@ func statsToMetrics(stats *vc.ContainerStats) *cgroups.Metrics {
 	return metrics
 }

-func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroups.HugetlbStat {
-	var hugetlbStats []*cgroups.HugetlbStat
+func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat {
+	var hugetlbStats []*cgroupsv1.HugetlbStat
 	for _, v := range vcHugetlb {
 		hugetlbStats = append(
 			hugetlbStats,
-			&cgroups.HugetlbStat{
+			&cgroupsv1.HugetlbStat{
 				Usage:   v.Usage,
 				Max:     v.MaxUsage,
 				Failcnt: v.Failcnt,
@@ -64,8 +64,8 @@ func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroups.HugetlbSta
 	return hugetlbStats
 }

-func setPidsStats(vcPids vc.PidsStats) *cgroups.PidsStat {
-	pidsStats := &cgroups.PidsStat{
+func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat {
+	pidsStats := &cgroupsv1.PidsStat{
 		Current: vcPids.Current,
 		Limit:   vcPids.Limit,
 	}
@@ -73,19 +73,19 @@ func setPidsStats(vcPids vc.PidsStats) *cgroups.PidsStat {
 	return pidsStats
 }

-func setCPUStats(vcCPU vc.CPUStats) *cgroups.CPUStat {
+func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat {

 	var perCPU []uint64
 	perCPU = append(perCPU, vcCPU.CPUUsage.PercpuUsage...)

-	cpuStats := &cgroups.CPUStat{
-		Usage: &cgroups.CPUUsage{
+	cpuStats := &cgroupsv1.CPUStat{
+		Usage: &cgroupsv1.CPUUsage{
 			Total:  vcCPU.CPUUsage.TotalUsage,
 			Kernel: vcCPU.CPUUsage.UsageInKernelmode,
 			User:   vcCPU.CPUUsage.UsageInUsermode,
 			PerCPU: perCPU,
 		},
-		Throttling: &cgroups.Throttle{
+		Throttling: &cgroupsv1.Throttle{
 			Periods:          vcCPU.ThrottlingData.Periods,
 			ThrottledPeriods: vcCPU.ThrottlingData.ThrottledPeriods,
 			ThrottledTime:    vcCPU.ThrottlingData.ThrottledTime,
@@ -95,27 +95,27 @@ func setCPUStats(vcCPU vc.CPUStats) *cgroups.CPUStat {
 	return cpuStats
 }

-func setMemoryStats(vcMemory vc.MemoryStats) *cgroups.MemoryStat {
-	memoryStats := &cgroups.MemoryStat{
-		Usage: &cgroups.MemoryEntry{
+func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat {
+	memoryStats := &cgroupsv1.MemoryStat{
+		Usage: &cgroupsv1.MemoryEntry{
 			Limit:   vcMemory.Usage.Limit,
 			Usage:   vcMemory.Usage.Usage,
 			Max:     vcMemory.Usage.MaxUsage,
 			Failcnt: vcMemory.Usage.Failcnt,
 		},
-		Swap: &cgroups.MemoryEntry{
+		Swap: &cgroupsv1.MemoryEntry{
 			Limit:   vcMemory.SwapUsage.Limit,
 			Usage:   vcMemory.SwapUsage.Usage,
 			Max:     vcMemory.SwapUsage.MaxUsage,
 			Failcnt: vcMemory.SwapUsage.Failcnt,
 		},
-		Kernel: &cgroups.MemoryEntry{
+		Kernel: &cgroupsv1.MemoryEntry{
 			Limit:   vcMemory.KernelUsage.Limit,
 			Usage:   vcMemory.KernelUsage.Usage,
 			Max:     vcMemory.KernelUsage.MaxUsage,
 			Failcnt: vcMemory.KernelUsage.Failcnt,
 		},
-		KernelTCP: &cgroups.MemoryEntry{
+		KernelTCP: &cgroupsv1.MemoryEntry{
 			Limit:   vcMemory.KernelTCPUsage.Limit,
 			Usage:   vcMemory.KernelTCPUsage.Usage,
 			Max:     vcMemory.KernelTCPUsage.MaxUsage,
@@ -145,8 +145,8 @@ func setMemoryStats(vcMemory vc.MemoryStats) *cgroups.MemoryStat {
 	return memoryStats
 }

-func setBlkioStats(vcBlkio vc.BlkioStats) *cgroups.BlkIOStat {
-	blkioStats := &cgroups.BlkIOStat{
+func setBlkioStats(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat {
+	blkioStats := &cgroupsv1.BlkIOStat{
 		IoServiceBytesRecursive: copyBlkio(vcBlkio.IoServiceBytesRecursive),
 		IoServicedRecursive:     copyBlkio(vcBlkio.IoServicedRecursive),
 		IoQueuedRecursive:       copyBlkio(vcBlkio.IoQueuedRecursive),
@@ -160,10 +160,10 @@ func setBlkioStats(vcBlkio vc.BlkioStats) *cgroups.BlkIOStat {
 	return blkioStats
 }

-func copyBlkio(s []vc.BlkioStatEntry) []*cgroups.BlkIOEntry {
-	ret := make([]*cgroups.BlkIOEntry, len(s))
+func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry {
+	ret := make([]*cgroupsv1.BlkIOEntry, len(s))
 	for i, v := range s {
-		ret[i] = &cgroups.BlkIOEntry{
+		ret[i] = &cgroupsv1.BlkIOEntry{
 			Op:    v.Op,
 			Major: v.Major,
 			Minor: v.Minor,
@@ -174,10 +174,10 @@ func copyBlkio(s []vc.BlkioStatEntry) []*cgroups.BlkIOEntry {
 	return ret
 }

-func setNetworkStats(vcNetwork []*vc.NetworkStats) []*cgroups.NetworkStat {
-	networkStats := make([]*cgroups.NetworkStat, len(vcNetwork))
+func setNetworkStats(vcNetwork []*vc.NetworkStats) []*cgroupsv1.NetworkStat {
+	networkStats := make([]*cgroupsv1.NetworkStat, len(vcNetwork))
 	for i, v := range vcNetwork {
-		networkStats[i] = &cgroups.NetworkStat{
+		networkStats[i] = &cgroupsv1.NetworkStat{
 			Name:      v.Name,
 			RxBytes:   v.RxBytes,
 			RxPackets: v.RxPackets,
--- a/src/runtime/containerd-shim-v2/metrics_test.go
+++ b/src/runtime/containerd-shim-v2/metrics_test.go
@@ -10,7 +10,7 @@ import (
 	"context"
 	"testing"

-	"github.com/containerd/cgroups"
+	"github.com/containerd/cgroups/stats/v1"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock"
 	"github.com/stretchr/testify/assert"
@@ -29,7 +29,7 @@ func TestStatNetworkMetric(t *testing.T) {
 		},
 	}

-	expectedNetwork := []*cgroups.NetworkStat{
+	expectedNetwork := []*v1.NetworkStat{
 		{
 			Name:    "test-network",
 			RxBytes: 10,
--- a/src/runtime/containerd-shim-v2/service.go
+++ b/src/runtime/containerd-shim-v2/service.go
@@ -27,18 +27,22 @@ import (
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
-	"go.opentelemetry.io/otel"
-	"go.opentelemetry.io/otel/label"
-	otelTrace "go.opentelemetry.io/otel/trace"
 	"golang.org/x/sys/unix"

 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
 )

+// shimTracingTags defines tags for the trace span
+var shimTracingTags = map[string]string{
+	"source":  "runtime",
+	"package": "containerdshim",
+}
+
 const (
 	// Define the service's channel size, which is used for
 	// reaping the exited processes exit state and forwarding
@@ -68,7 +72,7 @@ var shimLog = logrus.WithFields(logrus.Fields{
 })

 // New returns a new shim service that can be used via GRPC
-func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shim, error) {
+func New(ctx context.Context, id string, publisher cdshim.Publisher, shutdown func()) (cdshim.Shim, error) {
 	shimLog = shimLog.WithFields(logrus.Fields{
 		"sandbox": id,
 		"pid":     os.Getpid(),
@@ -84,8 +88,6 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi
 	vci.SetLogger(ctx, shimLog)
 	katautils.SetLogger(ctx, shimLog, shimLog.Logger.Level)

-	ctx, cancel := context.WithCancel(ctx)
-
 	s := &service{
 		id:         id,
 		pid:        uint32(os.Getpid()),
@@ -93,7 +95,7 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi
 		containers: make(map[string]*container),
 		events:     make(chan interface{}, chSize),
 		ec:         make(chan exit, bufferSize),
-		cancel:     cancel,
+		cancel:     shutdown,
 	}

 	go s.processExits()
@@ -138,7 +140,7 @@ type service struct {
 	id string
 }

-func newCommand(ctx context.Context, containerdBinary, id, containerdAddress string) (*sysexec.Cmd, error) {
+func newCommand(ctx context.Context, id, containerdBinary, containerdAddress string) (*sysexec.Cmd, error) {
 	ns, err := namespaces.NamespaceRequired(ctx)
 	if err != nil {
 		return nil, err
@@ -176,13 +178,13 @@ func newCommand(ctx context.Context, containerdBinary, id, containerdAddress str

 // StartShim willl start a kata shimv2 daemon which will implemented the
 // ShimV2 APIs such as create/start/update etc containers.
-func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) {
+func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ string, retErr error) {
 	bundlePath, err := os.Getwd()
 	if err != nil {
 		return "", err
 	}

-	address, err := getAddress(ctx, bundlePath, id)
+	address, err := getAddress(ctx, bundlePath, opts.Address, opts.ID)
 	if err != nil {
 		return "", err
 	}
@@ -193,26 +195,41 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
 		return address, nil
 	}

-	cmd, err := newCommand(ctx, containerdBinary, id, containerdAddress)
+	cmd, err := newCommand(ctx, opts.ID, opts.ContainerdBinary, opts.Address)
 	if err != nil {
 		return "", err
 	}

-	address, err = cdshim.SocketAddress(ctx, id)
+	address, err = cdshim.SocketAddress(ctx, opts.Address, opts.ID)
 	if err != nil {
 		return "", err
 	}

 	socket, err := cdshim.NewSocket(address)
+
 	if err != nil {
-		return "", err
+		if !cdshim.SocketEaddrinuse(err) {
+			return "", err
+		}
+		if err := cdshim.RemoveSocket(address); err != nil {
+			return "", errors.Wrap(err, "remove already used socket")
+		}
+		if socket, err = cdshim.NewSocket(address); err != nil {
+			return "", err
+		}
 	}
-	defer socket.Close()
+
+	defer func() {
+		if retErr != nil {
+			socket.Close()
+			_ = cdshim.RemoveSocket(address)
+		}
+	}()
+
 	f, err := socket.File()
 	if err != nil {
 		return "", err
 	}
-	defer f.Close()

 	cmd.ExtraFiles = append(cmd.ExtraFiles, f)

@@ -220,7 +237,7 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container
 		return "", err
 	}
 	defer func() {
-		if err != nil {
+		if retErr != nil {
 			cmd.Process.Kill()
 		}
 	}()
@@ -288,20 +305,8 @@ func getTopic(e interface{}) string {
 	return cdruntime.TaskUnknownTopic
 }

-func trace(ctx context.Context, name string) (otelTrace.Span, context.Context) {
-	if ctx == nil {
-		logrus.WithField("type", "bug").Error("trace called before context set")
-		ctx = context.Background()
-	}
-	tracer := otel.Tracer("kata")
-	ctx, span := tracer.Start(ctx, name)
-	span.SetAttributes([]label.KeyValue{label.Key("source").String("runtime"), label.Key("package").String("containerdshim")}...)
-
-	return span, ctx
-}
-
 func (s *service) Cleanup(ctx context.Context) (_ *taskAPI.DeleteResponse, err error) {
-	span, spanCtx := trace(s.rootCtx, "Cleanup")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Cleanup", shimTracingTags)
 	defer span.End()

 	//Since the binary cleanup will return the DeleteResponse from stdout to
@@ -416,7 +421,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *

 // Start a process
 func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (_ *taskAPI.StartResponse, err error) {
-	span, spanCtx := trace(s.rootCtx, "Start")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Start", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -467,7 +472,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (_ *taskAP

 // Delete the initial process and container
 func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *taskAPI.DeleteResponse, err error) {
-	span, spanCtx := trace(s.rootCtx, "Delete")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Delete", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -519,7 +524,7 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task

 // Exec an additional process inside the container
 func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.rootCtx, "Exec")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "Exec", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -557,7 +562,7 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (_ *p

 // ResizePty of a process
 func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (_ *ptypes.Empty, err error) {
-	span, spanCtx := trace(s.rootCtx, "ResizePty")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "ResizePty", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -596,7 +601,7 @@ func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (_

 // State returns runtime state information for a process
 func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAPI.StateResponse, err error) {
-	span, _ := trace(s.rootCtx, "State")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "State", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -624,6 +629,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP
 			Stderr:     c.stderr,
 			Terminal:   c.terminal,
 			ExitStatus: c.exit,
+			ExitedAt:   c.exitTime,
 		}, nil
 	}

@@ -643,12 +649,13 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (_ *taskAP
 		Stderr:     execs.tty.stderr,
 		Terminal:   execs.tty.terminal,
 		ExitStatus: uint32(execs.exitCode),
+		ExitedAt:   execs.exitTime,
 	}, nil
 }

 // Pause the container
 func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (_ *ptypes.Empty, err error) {
-	span, spanCtx := trace(s.rootCtx, "Pause")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Pause", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -687,7 +694,7 @@ func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (_ *ptypes

 // Resume the container
 func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (_ *ptypes.Empty, err error) {
-	span, spanCtx := trace(s.rootCtx, "Resume")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Resume", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -724,7 +731,7 @@ func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (_ *ptyp

 // Kill a process with the provided signal
 func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.Empty, err error) {
-	span, spanCtx := trace(s.rootCtx, "Kill")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Kill", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -785,7 +792,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
 // Since for kata, it cannot get the process's pid from VM,
 // thus only return the Shim's pid directly.
 func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.PidsResponse, err error) {
-	span, _ := trace(s.rootCtx, "Pids")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "Pids", shimTracingTags)
 	defer span.End()

 	var processes []*task.ProcessInfo
@@ -808,7 +815,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.

 // CloseIO of a process
 func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.rootCtx, "CloseIO")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "CloseIO", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -849,7 +856,7 @@ func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (_ *pt

 // Checkpoint the container
 func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.rootCtx, "Checkpoint")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "Checkpoint", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -863,7 +870,7 @@ func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskReque

 // Connect returns shim information such as the shim's pid
 func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *taskAPI.ConnectResponse, err error) {
-	span, _ := trace(s.rootCtx, "Connect")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "Connect", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -883,7 +890,7 @@ func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *ta
 }

 func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ *ptypes.Empty, err error) {
-	span, _ := trace(s.rootCtx, "Shutdown")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "Shutdown", shimTracingTags)

 	start := time.Now()
 	defer func() {
@@ -899,10 +906,15 @@ func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ *
 	s.mu.Unlock()

 	span.End()
-	katautils.StopTracing(s.ctx)
+	katatrace.StopTracing(s.ctx)

 	s.cancel()

+	// Since we only send an shutdown qmp command to qemu when do stopSandbox, and
+	// didn't wait until qemu process's exit, thus we'd better to make sure it had
+	// exited when shimv2 terminated. Thus here to do the last cleanup of the hypervisor.
+	syscall.Kill(int(s.hpid), syscall.SIGKILL)
+
 	os.Exit(0)

 	// This will never be called, but this is only there to make sure the
@@ -911,7 +923,7 @@ func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ *
 }

 func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (_ *taskAPI.StatsResponse, err error) {
-	span, spanCtx := trace(s.rootCtx, "Stats")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Stats", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -940,7 +952,7 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (_ *taskAP

 // Update a running container
 func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (_ *ptypes.Empty, err error) {
-	span, spanCtx := trace(s.rootCtx, "Update")
+	span, spanCtx := katatrace.Trace(s.rootCtx, shimLog, "Update", shimTracingTags)
 	defer span.End()

 	start := time.Now()
@@ -972,7 +984,7 @@ func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (_ *

 // Wait for a process to exit
 func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (_ *taskAPI.WaitResponse, err error) {
-	span, _ := trace(s.rootCtx, "Wait")
+	span, _ := katatrace.Trace(s.rootCtx, shimLog, "Wait", shimTracingTags)
 	defer span.End()

 	var ret uint32
--- a/src/runtime/containerd-shim-v2/shim_management.go
+++ b/src/runtime/containerd-shim-v2/shim_management.go
@@ -16,7 +16,6 @@ import (
 	"strconv"
 	"strings"

-	"github.com/containerd/containerd/namespaces"
 	cdshim "github.com/containerd/containerd/runtime/v2/shim"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
@@ -129,11 +128,7 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily {

 func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
 	// metrics socket will under sandbox's bundle path
-	metricsAddress, err := socketAddress(ctx, s.id)
-	if err != nil {
-		shimMgtLog.WithError(err).Error("failed to create socket address")
-		return
-	}
+	metricsAddress := SocketAddress(s.id)

 	listener, err := cdshim.NewSocket(metricsAddress)
 	if err != nil {
@@ -166,7 +161,7 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec
 	svr.Serve(listener)
 }

-// mountServeDebug provides a debug endpoint
+// mountPprofHandle provides a debug endpoint
 func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {

 	// return if not enabled
@@ -188,10 +183,8 @@ func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {
 	m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
 }

-func socketAddress(ctx context.Context, id string) (string, error) {
-	ns, err := namespaces.NamespaceRequired(ctx)
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(string(filepath.Separator), "containerd-shim", ns, id, "shim-monitor.sock"), nil
+// SocketAddress returns the address of the abstract domain socket for communicating with the
+// shim management endpoint
+func SocketAddress(id string) string {
+	return filepath.Join(string(filepath.Separator), "run", "vc", id, "shim-monitor")
 }
--- a/src/runtime/containerd-shim-v2/start.go
+++ b/src/runtime/containerd-shim-v2/start.go
@@ -13,8 +13,14 @@ import (
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 )

-func startContainer(ctx context.Context, s *service, c *container) error {
-	//start a container
+func startContainer(ctx context.Context, s *service, c *container) (retErr error) {
+	defer func() {
+		if retErr != nil {
+			// notify the wait goroutine to continue
+			c.exitCh <- exitCode255
+		}
+	}()
+	// start a container
 	if c.cType == "" {
 		err := fmt.Errorf("Bug, the container %s type is empty", c.id)
 		return err
@@ -37,8 +43,8 @@ func startContainer(ctx context.Context, s *service, c *container) error {
 		}
 		go watchSandbox(ctx, s)

-		// We don't rely on the context passed to startContainer as it can be cancelled after
-		// this rpc call.
+		// We use s.ctx(`ctx` derived from `s.ctx`) to check for cancellation of the
+		// shim context and the context passed to startContainer for tracing.
 		go watchOOMEvents(ctx, s)
 	} else {
 		_, err := s.sandbox.StartContainer(ctx, c.id)
@@ -74,10 +80,10 @@ func startContainer(ctx context.Context, s *service, c *container) error {
 		c.ttyio = tty
 		go ioCopy(c.exitIOch, c.stdinCloser, tty, stdin, stdout, stderr)
 	} else {
-		//close the io exit channel, since there is no io for this container,
-		//otherwise the following wait goroutine will hang on this channel.
+		// close the io exit channel, since there is no io for this container,
+		// otherwise the following wait goroutine will hang on this channel.
 		close(c.exitIOch)
-		//close the stdin closer channel to notify that it's safe to close process's
+		// close the stdin closer channel to notify that it's safe to close process's
 		// io.
 		close(c.stdinCloser)
 	}
@@ -87,8 +93,8 @@ func startContainer(ctx context.Context, s *service, c *container) error {
 	return nil
 }

-func startExec(ctx context.Context, s *service, containerID, execID string) (*exec, error) {
-	//start an exec
+func startExec(ctx context.Context, s *service, containerID, execID string) (e *exec, retErr error) {
+	// start an exec
 	c, err := s.getContainer(containerID)
 	if err != nil {
 		return nil, err
@@ -99,6 +105,13 @@ func startExec(ctx context.Context, s *service, containerID, execID string) (*ex
 		return nil, err
 	}

+	defer func() {
+		if retErr != nil {
+			// notify the wait goroutine to continue
+			execs.exitCh <- exitCode255
+		}
+	}()
+
 	_, proc, err := s.sandbox.EnterContainer(ctx, containerID, *execs.cmds)
 	if err != nil {
 		err := fmt.Errorf("cannot enter container %s, with err %s", containerID, err)
--- a/src/runtime/containerd-shim-v2/stream_test.go
+++ b/src/runtime/containerd-shim-v2/stream_test.go
@@ -89,3 +89,191 @@ func TestNewTtyIOFifoReopen(t *testing.T) {
 	checkFifoRead(outr)
 	checkFifoRead(errr)
 }
+
+func TestIoCopy(t *testing.T) {
+	t.Skip("TestIoCopy is failing randonly, see https://github.com/kata-containers/kata-containers/issues/2042")
+
+	assert := assert.New(t)
+	ctx := context.TODO()
+
+	testBytes1 := []byte("Test1")
+	testBytes2 := []byte("Test2")
+	testBytes3 := []byte("Test3")
+
+	fifoPath, err := ioutil.TempDir(testDir, "fifo-path-")
+	assert.NoError(err)
+	dstStdoutPath := filepath.Join(fifoPath, "dststdout")
+	dstStderrPath := filepath.Join(fifoPath, "dststderr")
+
+	// test function: create pipes, and use ioCopy() to copy data from one set to the other
+	// this function will be called multiple times, testing different combinations of closing order
+	// in order to verify that closing a pipe doesn't break the copy for the others
+	ioCopyTest := func(first, second, third string) {
+		var srcStdinPath string
+		if third != "" {
+			srcStdinPath = filepath.Join(fifoPath, "srcstdin")
+		}
+
+		logErrorMsg := func(msg string) string {
+			return "Error found while using order [" + first + " " + second + " " + third + "] - " + msg
+		}
+
+		exitioch := make(chan struct{})
+		stdinCloser := make(chan struct{})
+
+		createFifo := func(f string) (io.ReadCloser, io.WriteCloser) {
+			reader, err := fifo.OpenFifo(ctx, f, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700)
+			if err != nil {
+				t.Fatal(err)
+			}
+			writer, err := fifo.OpenFifo(ctx, f, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700)
+			if err != nil {
+				reader.Close()
+				t.Fatal(err)
+			}
+			return reader, writer
+		}
+
+		// create two sets of stdin, stdout and stderr pipes, to copy data from one to the other
+		srcOutR, srcOutW := createFifo(filepath.Join(fifoPath, "srcstdout"))
+		defer srcOutR.Close()
+		defer srcOutW.Close()
+
+		srcErrR, srcErrW := createFifo(filepath.Join(fifoPath, "srcstderr"))
+		defer srcErrR.Close()
+		defer srcErrW.Close()
+
+		dstInR, dstInW := createFifo(filepath.Join(fifoPath, "dststdin"))
+		defer dstInR.Close()
+		defer dstInW.Close()
+
+		dstOutR, err := fifo.OpenFifo(ctx, dstStdoutPath, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer dstOutR.Close()
+		dstErrR, err := fifo.OpenFifo(ctx, dstStderrPath, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer dstErrR.Close()
+
+		var srcInW io.WriteCloser
+		if srcStdinPath != "" {
+			srcInW, err = fifo.OpenFifo(ctx, srcStdinPath, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer srcInW.Close()
+		}
+
+		tty, err := newTtyIO(ctx, srcStdinPath, dstStdoutPath, dstStderrPath, false)
+		assert.NoError(err)
+		defer tty.close()
+
+		// start the ioCopy threads : copy from src to dst
+		go ioCopy(exitioch, stdinCloser, tty, dstInW, srcOutR, srcErrR)
+
+		var firstW, secondW, thirdW io.WriteCloser
+		var firstR, secondR, thirdR io.Reader
+		getPipes := func(order string) (io.Reader, io.WriteCloser) {
+			switch order {
+			case "out":
+				return dstOutR, srcOutW
+			case "err":
+				return dstErrR, srcErrW
+			case "in":
+				return dstInR, srcInW
+			case "":
+				return nil, nil
+			}
+			t.Fatal("internal error")
+			return nil, nil
+		}
+
+		firstR, firstW = getPipes(first)
+		secondR, secondW = getPipes(second)
+		thirdR, thirdW = getPipes(third)
+
+		checkFifoWrite := func(w io.Writer, b []byte, name string) {
+			_, err := w.Write(b)
+			if name == "in" && (name == third || name == second && first == "out") {
+				// this is expected: when stdout is closed, ioCopy() will close stdin
+				// so if "in" is after "out", we will get here
+			} else {
+				assert.NoError(err, logErrorMsg("Write error on std"+name))
+			}
+		}
+		checkFifoRead := func(r io.Reader, b []byte, name string) {
+			var err error
+			buf := make([]byte, 5)
+			done := make(chan struct{})
+			timer := time.NewTimer(2 * time.Second)
+			go func() {
+				_, err = r.Read(buf)
+				close(done)
+			}()
+			select {
+			case <-done:
+				assert.NoError(err, logErrorMsg("Error reading from std"+name))
+				assert.Equal(b, buf, logErrorMsg("Value mismatch on std"+name))
+			case <-timer.C:
+				//t.Fatal(logErrorMsg("read fifo timeout on std" + name))
+				if name == "in" && (name == third || name == second && first == "out") {
+					// this is expected: when stdout is closed, ioCopy() will close stdin
+					// so if "in" is after "out", we will get here
+				} else {
+					assert.Fail(logErrorMsg("read fifo timeout on std" + name))
+				}
+				return
+			}
+		}
+
+		// write to each pipe, and close them immediately
+		// the ioCopy function should copy the data, then stop the corresponding thread
+		checkFifoWrite(firstW, testBytes1, first)
+		firstW.Close()
+
+		// need to make sure the Close() above is done before we continue
+		time.Sleep(time.Second)
+
+		checkFifoWrite(secondW, testBytes2, second)
+		secondW.Close()
+
+		if thirdW != nil {
+			// need to make sure the Close() above is done before we continue
+			time.Sleep(time.Second)
+
+			checkFifoWrite(thirdW, testBytes3, third)
+			thirdW.Close()
+		}
+
+		// wait for the end of the ioCopy
+		timer := time.NewTimer(2 * time.Second)
+		select {
+		case <-exitioch:
+			// now check that all data has been copied properly
+			checkFifoRead(firstR, testBytes1, first)
+			checkFifoRead(secondR, testBytes2, second)
+			if thirdR != nil {
+				checkFifoRead(thirdR, testBytes3, third)
+			}
+		case <-timer.C:
+			t.Fatal(logErrorMsg("timeout waiting for ioCopy()"))
+		}
+	}
+
+	// try the different combinations
+
+	// tests without stdin
+	ioCopyTest("out", "err", "")
+	ioCopyTest("err", "out", "")
+
+	// tests with stdin
+	ioCopyTest("out", "err", "in")
+	ioCopyTest("out", "in", "err")
+	ioCopyTest("err", "out", "in")
+	ioCopyTest("err", "in", "out")
+	ioCopyTest("in", "out", "err")
+	ioCopyTest("in", "err", "out")
+}
--- a/src/runtime/containerd-shim-v2/utils.go
+++ b/src/runtime/containerd-shim-v2/utils.go
@@ -78,7 +78,7 @@ func validBundle(containerID, bundlePath string) (string, error) {
 	return resolved, nil
 }

-func getAddress(ctx context.Context, bundlePath, id string) (string, error) {
+func getAddress(ctx context.Context, bundlePath, address, id string) (string, error) {
 	var err error

 	// Checks the MUST and MUST NOT from OCI runtime specification
@@ -101,7 +101,7 @@ func getAddress(ctx context.Context, bundlePath, id string) (string, error) {
 		if err != nil {
 			return "", err
 		}
-		address, err := cdshim.SocketAddress(ctx, sandboxID)
+		address, err := cdshim.SocketAddress(ctx, address, sandboxID)
 		if err != nil {
 			return "", err
 		}
--- a/src/runtime/containerd-shim-v2/utils_test.go
+++ b/src/runtime/containerd-shim-v2/utils_test.go
@@ -73,7 +73,7 @@ func init() {
 	var err error

 	fmt.Printf("INFO: creating test directory\n")
-	testDir, err = ioutil.TempDir("", fmt.Sprintf("shimV2-"))
+	testDir, err = ioutil.TempDir("", "shimV2-")
 	if err != nil {
 		panic(fmt.Sprintf("ERROR: failed to create test directory: %v", err))
 	}
--- a/src/runtime/containerd-shim-v2/wait.go
+++ b/src/runtime/containerd-shim-v2/wait.go
@@ -142,7 +142,7 @@ func watchOOMEvents(ctx context.Context, s *service) {

 	for {
 		select {
-		case <-ctx.Done():
+		case <-s.ctx.Done():
 			return
 		default:
 			containerID, err := s.sandbox.GetOOMEvent(ctx)
--- a/src/runtime/go.mod
+++ b/src/runtime/go.mod
@@ -4,67 +4,54 @@ go 1.14

 require (
 	github.com/BurntSushi/toml v0.3.1
-	github.com/blang/semver v0.0.0-20190414102917-ba2c2ddd8906
-	github.com/cilium/ebpf v0.0.0-20200421083123-d05ecd062fb1 // indirect
-	github.com/containerd/cgroups v0.0.0-20190717030353-c4b9ac5c7601
-	github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e
-	github.com/containerd/containerd v1.2.1-0.20181210191522-f05672357f56
-	github.com/containerd/continuity v0.0.0-20200413184840-d3ef23f19fbb // indirect
-	github.com/containerd/cri v1.11.1 // indirect
+	github.com/blang/semver v3.5.1+incompatible
+	github.com/blang/semver/v4 v4.0.0
+	github.com/containerd/cgroups v1.0.1
+	github.com/containerd/console v1.0.2
+	github.com/containerd/containerd v1.5.2
 	github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5
-	github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448
-	github.com/containerd/go-runc v0.0.0-20200220073739-7016d3ce2328 // indirect
-	github.com/containerd/ttrpc v1.0.0
-	github.com/containerd/typeurl v1.0.1-0.20190228175220-2a93cfde8c20
-	github.com/containernetworking/plugins v0.8.2
+	github.com/containerd/fifo v1.0.0
+	github.com/containerd/ttrpc v1.0.2
+	github.com/containerd/typeurl v1.0.2
+	github.com/containernetworking/plugins v0.9.1
 	github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af
-	github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a
-	github.com/docker/distribution v2.7.1+incompatible // indirect
-	github.com/docker/docker v1.13.1 // indirect
-	github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
 	github.com/go-ini/ini v1.28.2
 	github.com/go-openapi/errors v0.18.0
 	github.com/go-openapi/runtime v0.18.0
 	github.com/go-openapi/strfmt v0.18.0
-	github.com/go-openapi/swag v0.18.0
+	github.com/go-openapi/swag v0.19.5
 	github.com/go-openapi/validate v0.18.0
-	github.com/gogo/googleapis v1.4.0 // indirect
-	github.com/gogo/protobuf v1.3.1
+	github.com/gogo/protobuf v1.3.2
 	github.com/hashicorp/go-multierror v1.0.0
-	github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee
+	github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9
+	github.com/kata-containers/govmm v0.0.0-20210622075516-263136e69ac8
 	github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
-	github.com/opencontainers/image-spec v1.0.1 // indirect
-	github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c
-	github.com/opencontainers/runtime-spec v1.0.2-0.20190408193819-a1b50f621a48
-	github.com/opencontainers/selinux v1.4.0
+	github.com/opencontainers/runc v1.0.0-rc93
+	github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d
+	github.com/opencontainers/selinux v1.8.0
 	github.com/pkg/errors v0.9.1
 	github.com/prometheus/client_golang v1.7.1
 	github.com/prometheus/client_model v0.2.0
 	github.com/prometheus/common v0.10.0
-	github.com/prometheus/procfs v0.1.3
+	github.com/prometheus/procfs v0.6.0
 	github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8
-	github.com/seccomp/libseccomp-golang v0.9.1 // indirect
-	github.com/sirupsen/logrus v1.4.2
+	github.com/sirupsen/logrus v1.7.0
 	github.com/smartystreets/goconvey v1.6.4 // indirect
 	github.com/stretchr/testify v1.6.1
-	github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 // indirect
-	github.com/urfave/cli v1.20.1-0.20170926034118-ac249472b7de
-	github.com/vishvananda/netlink v1.0.1-0.20190604022042-c8c507c80ea2
-	github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc
+	github.com/urfave/cli v1.22.2
+	github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852
+	github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae
 	go.opentelemetry.io/otel v0.15.0
 	go.opentelemetry.io/otel/exporters/trace/jaeger v0.15.0
 	go.opentelemetry.io/otel/sdk v0.15.0
-	golang.org/x/net v0.0.0-20200822124328-c89045814202
+	golang.org/x/net v0.0.0-20210226172049-e18ecbb05110
 	golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43
-	golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f
-	google.golang.org/grpc v1.31.1
-	gotest.tools v2.2.0+incompatible // indirect
-	k8s.io/apimachinery v0.18.2
+	golang.org/x/sys v0.0.0-20210324051608-47abb6519492
+	google.golang.org/grpc v1.33.2
+	k8s.io/apimachinery v0.20.6
 )

 replace (
 	github.com/uber-go/atomic => go.uber.org/atomic v1.5.1
 	google.golang.org/genproto => google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8
-	google.golang.org/grpc => google.golang.org/grpc v1.19.0
-	gotest.tools/v3 => gotest.tools v2.2.0+incompatible
 )
--- a/src/runtime/go.sum
+++ b/src/runtime/go.sum
--- a/src/runtime/hack/tree_status.sh
+++ b/src/runtime/hack/tree_status.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+#
+# Copyright 2021 Red Hat Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+set -e
+
+STATUS=$(git status --porcelain)
+if [[ -z $STATUS ]]; then
+    echo "tree is clean"
+else
+    echo "tree is dirty, please commit all changes"
+    echo ""
+    echo "$STATUS"
+    git diff
+    exit 1
+fi
--- a/src/runtime/pkg/kata-monitor/metrics.go
+++ b/src/runtime/pkg/kata-monitor/metrics.go
@@ -176,7 +176,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
 	for sandboxID, namespace := range sandboxes {
 		wg.Add(1)
 		go func(sandboxID, namespace string, results chan<- []*dto.MetricFamily) {
-			sandboxMetrics, err := km.getSandboxMetrics(sandboxID, namespace)
+			sandboxMetrics, err := getParsedMetrics(sandboxID)
 			if err != nil {
 				monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox")
 			}
@@ -229,13 +229,12 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
 			return err
 		}
 	}
-
 	return nil
+
 }

-// getSandboxMetrics will get sandbox's metrics from shim
-func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.MetricFamily, error) {
-	body, err := km.doGet(sandboxID, namespace, defaultTimeout, "metrics")
+func getParsedMetrics(sandboxID string) ([]*dto.MetricFamily, error) {
+	body, err := doGet(sandboxID, defaultTimeout, "metrics")
 	if err != nil {
 		return nil, err
 	}
@@ -243,6 +242,16 @@ func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.Me
 	return parsePrometheusMetrics(sandboxID, body)
 }

+// GetSandboxMetrics will get sandbox's metrics from shim
+func GetSandboxMetrics(sandboxID string) (string, error) {
+	body, err := doGet(sandboxID, defaultTimeout, "metrics")
+	if err != nil {
+		return "", err
+	}
+
+	return string(body), nil
+}
+
 // parsePrometheusMetrics will decode metrics from Prometheus text format
 // and return array of *dto.MetricFamily with an ASC order
 func parsePrometheusMetrics(sandboxID string, body []byte) ([]*dto.MetricFamily, error) {
--- a/src/runtime/pkg/kata-monitor/monitor.go
+++ b/src/runtime/pkg/kata-monitor/monitor.go
@@ -87,13 +87,8 @@ func (km *KataMonitor) GetAgentURL(w http.ResponseWriter, r *http.Request) {
 		commonServeError(w, http.StatusBadRequest, err)
 		return
 	}
-	namespace, err := km.getSandboxNamespace(sandboxID)
-	if err != nil {
-		commonServeError(w, http.StatusBadRequest, err)
-		return
-	}

-	data, err := km.doGet(sandboxID, namespace, defaultTimeout, "agent-url")
+	data, err := doGet(sandboxID, defaultTimeout, "agent-url")
 	if err != nil {
 		commonServeError(w, http.StatusBadRequest, err)
 		return
--- a/src/runtime/pkg/kata-monitor/shim_client.go
+++ b/src/runtime/pkg/kata-monitor/shim_client.go
@@ -11,6 +11,8 @@ import (
 	"net"
 	"net/http"
 	"time"
+
+	shim "github.com/kata-containers/kata-containers/src/runtime/containerd-shim-v2"
 )

 const (
@@ -33,16 +35,13 @@ func getSandboxIDFromReq(r *http.Request) (string, error) {
 	return "", fmt.Errorf("sandbox not found in %+v", r.URL.Query())
 }

-func (km *KataMonitor) buildShimClient(sandboxID, namespace string, timeout time.Duration) (*http.Client, error) {
-	socketAddr, err := km.getMonitorAddress(sandboxID, namespace)
-	if err != nil {
-		return nil, err
-	}
-	return BuildUnixSocketClient(socketAddr, timeout)
+// BuildShimClient builds and returns an http client for communicating with the provided sandbox
+func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) {
+	return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout)
 }

-// BuildUnixSocketClient build http client for Unix socket
-func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) {
+// buildUnixSocketClient build http client for Unix socket
+func buildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) {
 	transport := &http.Transport{
 		DisableKeepAlives: true,
 		Dial: func(proto, addr string) (conn net.Conn, err error) {
@@ -61,8 +60,8 @@ func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Clie
 	return client, nil
 }

-func (km *KataMonitor) doGet(sandboxID, namespace string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) {
-	client, err := km.buildShimClient(sandboxID, namespace, timeoutInSeconds)
+func doGet(sandboxID string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) {
+	client, err := BuildShimClient(sandboxID, timeoutInSeconds)
 	if err != nil {
 		return nil, err
 	}
--- a/src/runtime/pkg/katatestutils/constraints_test.go
+++ b/src/runtime/pkg/katatestutils/constraints_test.go
@@ -16,7 +16,7 @@ import (
 	"strings"
 	"testing"

-	"github.com/blang/semver"
+	semver "github.com/blang/semver/v4"
 	"github.com/stretchr/testify/assert"
 )

--- a/src/runtime/pkg/katautils/config-settings.go.in
+++ b/src/runtime/pkg/katautils/config-settings.go.in
@@ -20,13 +20,13 @@ var defaultCPUFeatures = ""
 var systemdUnitName = "kata-containers.target"

 const defaultKernelParams = ""
-const defaultMachineType = "pc"
+const defaultMachineType = "q35"

 const defaultVCPUCount uint32 = 1
 const defaultMaxVCPUCount uint32 = 0
 const defaultMemSize uint32 = 2048 // MiB
 const defaultMemSlots uint32 = 10
-const defaultMemOffset uint32 = 0 // MiB
+const defaultMemOffset uint64 = 0 // MiB
 const defaultVirtioMem bool = false
 const defaultBridgesCount uint32 = 1
 const defaultInterNetworkingModel = "tcfilter"
@@ -54,6 +54,7 @@ const defaultDisableImageNvdimm = false
 const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/"
 const defaultRxRateLimiterMaxRate = uint64(0)
 const defaultTxRateLimiterMaxRate = uint64(0)
+const defaultConfidentialGuest = false

 var defaultSGXEPCSize = int64(0)

--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2021 Intel Corporation
 // Copyright (c) 2018 HyperHQ Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
@@ -16,6 +16,7 @@ import (

 	"github.com/BurntSushi/toml"
 	govmmQemu "github.com/kata-containers/govmm/qemu"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
 	exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
@@ -28,11 +29,6 @@ const (
 	defaultHypervisor = vc.QemuHypervisor
 )

-var (
-	// if true, enable opentracing support.
-	tracing = false
-)
-
 // The TOML configuration file contains a number of sections (or
 // tables). The names of these tables are in dotted ("nested table")
 // form:
@@ -61,6 +57,12 @@ type tomlConfig struct {
 	Runtime    runtime
 	Factory    factory
 	Netmon     netmon
+	Image      image
+}
+
+type image struct {
+	ServiceOffload bool   `toml:"service_offload"`
+	Provision      string `toml:"provision"`
 }

 type factory struct {
@@ -108,7 +110,7 @@ type hypervisor struct {
 	DefaultMaxVCPUs         uint32   `toml:"default_maxvcpus"`
 	MemorySize              uint32   `toml:"default_memory"`
 	MemSlots                uint32   `toml:"memory_slots"`
-	MemOffset               uint32   `toml:"memory_offset"`
+	MemOffset               uint64   `toml:"memory_offset"`
 	DefaultBridges          uint32   `toml:"default_bridges"`
 	Msize9p                 uint32   `toml:"msize_9p"`
 	PCIeRootPort            uint32   `toml:"pcie_root_port"`
@@ -130,6 +132,7 @@ type hypervisor struct {
 	HotplugVFIOOnRootBus    bool     `toml:"hotplug_vfio_on_root_bus"`
 	DisableVhostNet         bool     `toml:"disable_vhost_net"`
 	GuestMemoryDumpPaging   bool     `toml:"guest_memory_dump_paging"`
+	ConfidentialGuest       bool     `toml:"confidential_guest"`
 }

 type runtime struct {
@@ -154,6 +157,7 @@ type agent struct {
 	Debug               bool     `toml:"enable_debug"`
 	Tracing             bool     `toml:"enable_tracing"`
 	DebugConsoleEnabled bool     `toml:"debug_console_enabled"`
+	DialTimeout         uint32   `toml:"dial_timeout"`
 }

 type netmon struct {
@@ -351,7 +355,7 @@ func (h hypervisor) defaultMemSlots() uint32 {
 	return slots
 }

-func (h hypervisor) defaultMemOffset() uint32 {
+func (h hypervisor) defaultMemOffset() uint64 {
 	offset := h.MemOffset
 	if offset == 0 {
 		offset = defaultMemOffset
@@ -471,6 +475,10 @@ func (a agent) debugConsoleEnabled() bool {
 	return a.DebugConsoleEnabled
 }

+func (a agent) dialTimout() uint32 {
+	return a.DialTimeout
+}
+
 func (a agent) debug() bool {
 	return a.Debug
 }
@@ -702,6 +710,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		EnableAnnotations:       h.EnableAnnotations,
 		GuestMemoryDumpPath:     h.GuestMemoryDumpPath,
 		GuestMemoryDumpPaging:   h.GuestMemoryDumpPaging,
+		ConfidentialGuest:       h.ConfidentialGuest,
 	}, nil
 }

@@ -920,6 +929,7 @@ func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oc
 			TraceType:          agent.traceType(),
 			KernelModules:      agent.kernelModules(),
 			EnableDebugConsole: agent.debugConsoleEnabled(),
+			DialTimeout:        agent.dialTimout(),
 		}
 	}

@@ -1055,6 +1065,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
 		RxRateLimiterMaxRate:    defaultRxRateLimiterMaxRate,
 		TxRateLimiterMaxRate:    defaultTxRateLimiterMaxRate,
 		SGXEPCSize:              defaultSGXEPCSize,
+		ConfidentialGuest:       defaultConfidentialGuest,
 	}
 }

@@ -1101,7 +1112,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
 	}

 	config.Trace = tomlConf.Runtime.Tracing
-	tracing = config.Trace
+	katatrace.SetTracing(config.Trace)

 	if tomlConf.Runtime.InterNetworkModel != "" {
 		err = config.InterNetworkModel.SetModel(tomlConf.Runtime.InterNetworkModel)
--- a/src/runtime/pkg/katautils/create.go
+++ b/src/runtime/pkg/katautils/create.go
@@ -13,13 +13,20 @@ import (
 	"strconv"
 	"strings"

+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"go.opentelemetry.io/otel/label"
 )

+// createTracingTags defines tags for the trace span
+var createTracingTags = map[string]string{
+	"source":    "runtime",
+	"package":   "katautils",
+	"subsystem": "sandbox",
+}
+
 // GetKernelParamsFunc use a variable to allow tests to modify its value
 var GetKernelParamsFunc = getKernelParams

@@ -104,7 +111,8 @@ func SetEphemeralStorageType(ociSpec specs.Spec) specs.Spec {
 // CreateSandbox create a sandbox container
 func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeConfig oci.RuntimeConfig, rootFs vc.RootFs,
 	containerID, bundlePath, console string, disableOutput, systemdCgroup bool) (_ vc.VCSandbox, _ vc.Process, err error) {
-	span, ctx := Trace(ctx, "createSandbox")
+	span, ctx := katatrace.Trace(ctx, nil, "CreateSandbox", createTracingTags)
+	katatrace.AddTag(span, "container_id", containerID)
 	defer span.End()

 	sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, console, disableOutput, systemdCgroup)
@@ -159,7 +167,7 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo

 	sid := sandbox.ID()
 	kataUtilsLogger = kataUtilsLogger.WithField("sandbox", sid)
-	span.SetAttributes(label.Key("sandbox").String(sid))
+	katatrace.AddTag(span, "sandbox_id", sid)

 	containers := sandbox.GetAllContainers()
 	if len(containers) != 1 {
@@ -202,7 +210,8 @@ func checkForFIPS(sandboxConfig *vc.SandboxConfig) error {
 func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Spec, rootFs vc.RootFs, containerID, bundlePath, console string, disableOutput bool) (vc.Process, error) {
 	var c vc.VCContainer

-	span, ctx := Trace(ctx, "createContainer")
+	span, ctx := katatrace.Trace(ctx, nil, "CreateContainer", createTracingTags)
+	katatrace.AddTag(span, "container_id", containerID)
 	defer span.End()

 	ociSpec = SetEphemeralStorageType(ociSpec)
@@ -228,7 +237,7 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp
 		return vc.Process{}, err
 	}

-	span.SetAttributes(label.Key("sandbox").String(sandboxID))
+	katatrace.AddTag(span, "sandbox_id", sandboxID)

 	c, err = sandbox.CreateContainer(ctx, contConfig)
 	if err != nil {
--- a/Show More
+++ b/Show More