Merge pull request #4384 from snir911/2.5.0-alpha2-branch-bump

# Kata Containers 2.5.0-alpha2
Merge pull request #4310 from egernst/core-sched
2026-02-22 06:43:41 +00:00 · 2022-06-08 19:32:57 +03:00 · 2022-06-08 17:42:45 +02:00 · 2022-06-08 11:56:30 +03:00 · 2022-06-07 09:13:45 -05:00 · 2022-06-06 14:48:34 +00:00
755 changed files with 312684 additions and 2687 deletions
--- a/.github/workflows/docs-url-alive-check.yaml
+++ b/.github/workflows/docs-url-alive-check.yaml
@@ -14,31 +14,31 @@ jobs:
      target_branch: ${{ github.base_ref }}
    steps:
    - name: Install Go
-      if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
+      if: github.repository_owner == 'kata-containers'
      uses: actions/setup-go@v2
      with:
        go-version: ${{ matrix.go-version }}
      env:
        GOPATH: ${{ runner.workspace }}/kata-containers
    - name: Set env
-      if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
+      if: github.repository_owner == 'kata-containers'
      run: |
        echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV
        echo "${{ github.workspace }}/bin" >> $GITHUB_PATH
    - name: Checkout code
-      if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
+      if: github.repository_owner == 'kata-containers'
      uses: actions/checkout@v2
      with:
        fetch-depth: 0
        path: ./src/github.com/${{ github.repository }}
    - name: Setup
-      if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
+      if: github.repository_owner == 'kata-containers'
      run: |
        cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
      env:
        GOPATH: ${{ runner.workspace }}/kata-containers
    # docs url alive check
    - name: Docs URL Alive Check
-      if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
+      if: github.repository_owner == 'kata-containers'
      run: |
        cd ${GOPATH}/src/github.com/${{ github.repository }} && make docs-url-alive-check
--- a/.github/workflows/kata-deploy-push.yaml
+++ b/.github/workflows/kata-deploy-push.yaml
@@ -24,6 +24,7 @@ jobs:
          - firecracker
          - rootfs-image
          - rootfs-initrd
+          - virtiofsd
    steps:
      - uses: actions/checkout@v2
      - name: Install docker
--- a/.github/workflows/kata-deploy-test.yaml
+++ b/.github/workflows/kata-deploy-test.yaml
@@ -1,4 +1,5 @@
 on:
+  workflow_dispatch: # this is used to trigger the workflow on non-main branches
  issue_comment:
    types: [created, edited]

@@ -47,6 +48,7 @@ jobs:
          - rootfs-image
          - rootfs-initrd
          - shim-v2
+          - virtiofsd
    steps:
      - name: get-PR-ref
        id: get-PR-ref
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -17,6 +17,7 @@ jobs:
          - rootfs-image
          - rootfs-initrd
          - shim-v2
+          - virtiofsd
    steps:
      - uses: actions/checkout@v2
      - name: Install docker
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ src/agent/kata-agent.service
 src/agent/protocols/src/*.rs
 !src/agent/protocols/src/lib.rs
 build
+src/tools/log-parser/kata-log-parser

--- a/2
+++ b/2
@@ -14,6 +14,8 @@ TOOLS =

 TOOLS += agent-ctl
 TOOLS += trace-forwarder
+TOOLS += runk
+TOOLS += log-parser

 STANDARD_TARGETS = build check clean install test vendor

--- a/README.md
+++ b/README.md
@@ -118,6 +118,7 @@ The table below lists the core parts of the project:
 | [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. |
 | [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. |
 | [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). |
+| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) |
 | [tests](https://github.com/kata-containers/tests) | tests | Excludes unit tests which live with the main code. |

 ### Additional components
@@ -131,6 +132,7 @@ The table below lists the remaining parts of the project:
 | [osbuilder](tools/osbuilder) | infrastructure | Tool to create "mini O/S" rootfs and initrd images and kernel for the hypervisor. |
 | [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
 | [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
+| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
 | [`ci`](https://github.com/kata-containers/ci) | CI | Continuous Integration configuration files and scripts. |
 | [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |

@@ -138,7 +140,7 @@ The table below lists the remaining parts of the project:

 Kata Containers is now
 [available natively for most distributions](docs/install/README.md#packaged-installation-methods).
-However, packaging scripts and metadata are still used to generate snap and GitHub releases. See
+However, packaging scripts and metadata are still used to generate [snap](snap/local) and GitHub releases. See
 the [components](#components) section for further details.

 ## Glossary of Terms
--- a/2
+++ b/2
@@ -1 +1 @@
-2.5.0-alpha0
+2.5.0-alpha2
--- a/ci/go-test.sh
+++ b/ci/go-test.sh
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copyright (c) 2020 Intel Corporation
-#
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-
-cidir=$(dirname "$0")
-source "${cidir}/lib.sh"
-
-run_go_test
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -39,12 +39,6 @@ run_static_checks()
 	bash "$tests_repo_dir/.ci/static-checks.sh" "$@"
 }

-run_go_test()
-{
-	clone_tests_repo
-	bash "$tests_repo_dir/.ci/go-test.sh"
-}
-
 run_docs_url_alive_check()
 {
 	clone_tests_repo
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -465,7 +465,7 @@ script and paste its output directly into a
 > [runtime](../src/runtime) repository.

 To perform analysis on Kata logs, use the
-[`kata-log-parser`](https://github.com/kata-containers/tests/tree/main/cmd/log-parser)
+[`kata-log-parser`](../src/tools/log-parser)
 tool, which can convert the logs into formats (e.g. JSON, TOML, XML, and YAML).

 See [Set up a debug console](#set-up-a-debug-console).
@@ -700,11 +700,11 @@ options to have the kernel boot messages logged into the system journal.
 For generic information on enabling debug in the configuration file, see the
 [Enable full debug](#enable-full-debug) section.

-The kernel boot messages will appear in the `containerd` or `CRI-O` log appropriately,
+The kernel boot messages will appear in the `kata` logs (and in the `containerd` or `CRI-O` log appropriately).
 such as:

 ```bash
-$ sudo journalctl -t containerd
+$ sudo journalctl -t kata
 -- Logs begin at Thu 2020-02-13 16:20:40 UTC, end at Thu 2020-02-13 16:30:23 UTC. --
 ...
 time="2020-09-15T14:56:23.095113803+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[    0.395399] brd: module loaded"
@@ -714,3 +714,4 @@ time="2020-09-15T14:56:23.105268162+08:00" level=debug msg="reading guest consol
 time="2020-09-15T14:56:23.121121598+08:00" level=debug msg="reading guest console" console-protocol=unix console-url=/run/vc/vm/ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791/console.sock pid=107642 sandbox=ab9f633385d4987828d342e47554fc6442445b32039023eeddaa971c1bb56791 source=virtcontainers subsystem=sandbox vmconsole="[    0.421324] memmap_init_zone_device initialised 32768 pages in 12ms"
 ...
 ```
+Refer to the [kata-log-parser documentation](../src/tools/log-parser/README.md) which is useful to fetch these.
--- a/docs/Limitations.md
+++ b/docs/Limitations.md
@@ -46,7 +46,7 @@ The following link shows the latest list of limitations:
 # Contributing

 If you would like to work on resolving a limitation, please refer to the
-[contributors guide](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).
+[contributors guide](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md).
 If you wish to raise an issue for a new limitation, either
 [raise an issue directly on the runtime](https://github.com/kata-containers/kata-containers/issues/new)
 or see the
--- a/docs/Release-Process.md
+++ b/docs/Release-Process.md
@@ -4,11 +4,11 @@
 ## Requirements

 - [hub](https://github.com/github/hub)
-  * Using an [application token](https://github.com/settings/tokens) is required for hub.
+  * Using an [application token](https://github.com/settings/tokens) is required for hub (set to a GITHUB_TOKEN environment variable).

 - GitHub permissions to push tags and create releases in Kata repositories.

- GPG configured to sign git tags. https://help.github.com/articles/generating-a-new-gpg-key/
+- GPG configured to sign git tags. https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key

 - You should configure your GitHub to use your ssh keys (to push to branches). See https://help.github.com/articles/adding-a-new-ssh-key-to-your-github-account/.
    * As an alternative, configure hub to push and fork with HTTPS, `git config --global hub.protocol https` (Not tested yet) *
@@ -48,7 +48,7 @@
 ### Merge all bump version Pull requests

  - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
-  - Trigger the test-kata-deploy workflow on the kata-containers repository bump Pull request using `/test_kata_deploy` (monitor under the "action" tab).
+  - Trigger the `test-kata-deploy` workflow which is under the `Actions` tab on the repository GitHub page (make sure to select the correct branch and validate it passes).
  - Check any failures and fix if needed.
  - Work with the Kata approvers to verify that the CI works and the pull requests are merged.

--- a/docs/Unit-Test-Advice.md
+++ b/docs/Unit-Test-Advice.md
@@ -277,7 +277,9 @@ mod tests {

 ## Temporary files

-Always delete temporary files on success.
+Use `t.TempDir()` to create temporary directory. The directory created by
+`t.TempDir()` is automatically removed when the test and all its subtests
+complete.

 ### Golang temporary files

@@ -286,11 +288,7 @@ func TestSomething(t *testing.T) {
    assert := assert.New(t)

    // Create a temporary directory
-    tmpdir, err := os.MkdirTemp("", "")
-    assert.NoError(err)
-
-    // Delete it at the end of the test
-    defer os.RemoveAll(tmpdir) 
+    tmpdir := t.TempDir()

    // Add test logic that will use the tmpdir here...
 }
--- a/docs/design/README.md
+++ b/docs/design/README.md
@@ -11,7 +11,8 @@ Kata Containers design documents:
 - [`Inotify` support](inotify.md)
 - [Metrics(Kata 2.0)](kata-2-0-metrics.md)
 - [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
-
+- [Design for direct-assigned volume](direct-blk-device-assignment.md)
+- [Design for core-scheduling](core-scheduling.md)
 ---

 - [Design proposals](proposals)
--- a/docs/design/architecture/storage.md
+++ b/docs/design/architecture/storage.md
@@ -20,7 +20,7 @@ For virtio-fs, the [runtime](README.md#runtime) starts one `virtiofsd` daemon
 ## Devicemapper

 The
-[devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/master/snapshots/devmapper)
+[devicemapper `snapshotter`](https://github.com/containerd/containerd/tree/main/snapshots/devmapper)
 is a special case. The `snapshotter` uses dedicated block devices
 rather than formatted filesystems, and operates at the block level
 rather than the file level. This knowledge is used to directly use the
--- a/docs/design/core-scheduling.md
+++ b/docs/design/core-scheduling.md
@@ -0,0 +1,12 @@
+# Core scheduling
+
+Core scheduling is a Linux kernel feature that allows only trusted tasks to run concurrently on
+CPUs sharing compute resources (for example, hyper-threads on a core).
+
+Containerd versions >= 1.6.4 leverage this to treat all of the processes associated with a
+given pod or container to be a single group of trusted tasks. To indicate this should be carried
+out, containerd sets the `SCHED_CORE` environment variable for each shim it spawns. When this is
+set, the Kata Containers shim implementation uses the `prctl` syscall to create a new core scheduling
+domain for the shim process itself as well as future VMM processes it will start.
+
+For more details on the core scheduling feature, see the [Linux documentation](https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html).
--- a/docs/design/direct-blk-device-assignment.md
+++ b/docs/design/direct-blk-device-assignment.md
@@ -0,0 +1,253 @@
+# Motivation
+Today, there exist a few gaps between Container Storage Interface (CSI) and virtual machine (VM) based runtimes such as Kata Containers 
+that prevent them from working together smoothly.
+
+First, it’s cumbersome to use a persistent volume (PV) with Kata Containers. Today, for a PV with Filesystem volume mode, Virtio-fs
+is the only way to surface it inside a Kata Container guest VM. But often mounting the filesystem (FS) within the guest operating system (OS) is 
+desired due to performance benefits, availability of native FS features and security benefits over the Virtio-fs mechanism.
+
+Second, it’s difficult if not impossible to resize a PV online with Kata Containers. While a PV can be expanded on the host OS, 
+the updated metadata needs to be propagated to the guest OS in order for the application container to use the expanded volume. 
+Currently, there is not a way to propagate the PV metadata from the host OS to the guest OS without restarting the Pod sandbox.
+
+# Proposed Solution
+
+Because of the OS boundary, these features cannot be implemented in the CSI node driver plugin running on the host OS 
+as is normally done in the runc container. Instead, they can be done by the Kata Containers agent inside the guest OS, 
+but it requires the CSI driver to pass the relevant information to the Kata Containers runtime. 
+An ideal long term solution would be to have the `kubelet` coordinating the communication between the CSI driver and 
+the container runtime, as described in [KEP-2857](https://github.com/kubernetes/enhancements/pull/2893/files). 
+However, as the KEP is still under review, we would like to propose a short/medium term solution to unblock our use case.
+
+The proposed solution is built on top of a previous [proposal](https://github.com/egernst/kata-containers/blob/da-proposal/docs/design/direct-assign-volume.md) 
+described by Eric Ernst. The previous proposal has two gaps:
+
+1. Writing a `csiPlugin.json` file to the volume root path introduced a security risk. A malicious user can gain unauthorized 
+access to a block device by writing their own `csiPlugin.json` to the above location through an ephemeral CSI plugin.  
+
+2. The proposal didn't describe how to establish a mapping between a volume and a kata sandbox, which is needed for 
+implementing CSI volume resize and volume stat collection APIs.
+
+This document particularly focuses on how to address these two gaps.
+
+## Assumptions and Limitations
+1. The proposal assumes that a block device volume will only be used by one Pod on a node at a time, which we believe 
+is the most common pattern in Kata Containers use cases. It’s also unsafe to have the same block device attached to more than 
+one Kata pod. In the context of Kubernetes, the `PersistentVolumeClaim` (PVC) needs to have the `accessMode` as `ReadWriteOncePod`. 
+2. More advanced Kubernetes volume features such as, `fsGroup`, `fsGroupChangePolicy`, and `subPath` are not supported. 
+
+## End User Interface
+
+1. The user specifies a PV as a direct-assigned volume. How a PV is specified as a direct-assigned volume is left for each CSI implementation to decide.
+There are a few options for reference:
+   1. A storage class parameter specifies whether it's a direct-assigned volume. This avoids any lookups of PVC 
+   or Pod information from the CSI plugin (as external provisioner takes care of these). However, all PVs in the storage class with the parameter set 
+   will have host mounts skipped.
+   2. Use a PVC annotation. This approach requires the CSI plugins have `--extra-create-metadata` [set](https://kubernetes-csi.github.io/docs/external-provisioner.html#persistentvolumeclaim-and-persistentvolume-parameters)
+   to be able to perform a lookup of the PVC annotations from the API server. Pro: API server lookup of annotations only required during creation of PV. 
+   Con: The CSI plugin will always skip host mounting of the PV.
+   3. The CSI plugin can also lookup pod `runtimeclass` during `NodePublish`. This approach can be found in the [ALIBABA CSI plugin](https://github.com/kubernetes-sigs/alibaba-cloud-csi-driver/blob/master/pkg/disk/nodeserver.go#L248).
+2. The CSI node driver delegates the direct assigned volume to the Kata Containers runtime. The CSI node driver APIs need to 
+   be modified to pass the volume mount information and collect volume information to/from the Kata Containers runtime by invoking `kata-runtime` command line commands.
+   * **NodePublishVolume** -- It invokes `kata-runtime direct-volume add --volume-path [volumePath] --mount-info [mountInfo]` 
+   to propagate the volume mount information to the Kata Containers runtime for it to carry out the filesystem mount operation.
+   The `volumePath` is the [target_path](https://github.com/container-storage-interface/spec/blob/master/csi.proto#L1364) in the CSI `NodePublishVolumeRequest`.
+   The `mountInfo` is a serialized JSON string. 
+   * **NodeGetVolumeStats** -- It invokes `kata-runtime direct-volume stats --volume-path [volumePath]` to retrieve the filesystem stats of direct-assigned volume.
+   * **NodeExpandVolume** -- It invokes `kata-runtime direct-volume resize --volume-path [volumePath] --size [size]` to send a resize request to the Kata Containers runtime to
+   resize the direct-assigned volume.
+   * **NodeStageVolume/NodeUnStageVolume** -- It invokes `kata-runtime direct-volume remove --volume-path [volumePath]` to remove the persisted metadata of a direct-assigned volume.
+
+The `mountInfo` object is defined as follows:
+```Golang
+type MountInfo struct {
+    // The type of the volume (ie. block)
+    VolumeType string `json:"volume-type"`
+    // The device backing the volume.
+    Device string `json:"device"`
+    // The filesystem type to be mounted on the volume.
+    FsType string `json:"fstype"`
+    // Additional metadata to pass to the agent regarding this volume.
+    Metadata map[string]string `json:"metadata,omitempty"`
+    // Additional mount options.
+    Options []string `json:"options,omitempty"`
+}
+```
+Notes: given that the `mountInfo` is persisted to the disk by the Kata runtime, it shouldn't container any secrets (such as SMB mount password).
+
+## Implementation Details
+
+### Kata runtime
+Instead of the CSI node driver writing the mount info into a `csiPlugin.json` file under the volume root, 
+as described in the original proposal, here we propose that the CSI node driver passes the mount information to 
+the Kata Containers runtime through a new `kata-runtime` commandline command. The `kata-runtime` then writes the mount 
+information to a `mount-info.json` file in a predefined location (`/run/kata-containers/shared/direct-volumes/[volume_path]/`).
+
+When the Kata Containers runtime starts a container, it verifies whether a volume mount is a direct-assigned volume by checking 
+whether there is a `mountInfo` file under the computed Kata `direct-volumes` directory. If it is, the runtime parses the `mountInfo` file, 
+updates the mount spec with the data in `mountInfo`. The updated mount spec is then passed to the Kata agent in the guest VM together
+with other mounts. The Kata Containers runtime also creates a file named by the sandbox id under the `direct-volumes/[volume_path]/` 
+directory. The reason for adding a sandbox id file is to establish a mapping between the volume and the sandbox using it. 
+Later, when the Kata Containers runtime handles the `get-stats` and `resize` commands, it uses the sandbox id to identify 
+the endpoint of the corresponding `containerd-shim-kata-v2`.
+
+### containerd-shim-kata-v2 changes
+`containerd-shim-kata-v2` provides an API for sandbox management through a Unix domain socket. Two new handlers are proposed: `/direct-volume/stats` and `/direct-volume/resize`:
+
+Example:
+
+```bash
+$ curl --unix-socket "$shim_socket_path" -I -X GET 'http://localhost/direct-volume/stats/[urlSafeVolumePath]'
+$ curl --unix-socket "$shim_socket_path" -I -X POST 'http://localhost/direct-volume/resize' -d '{ "volumePath"": [volumePath], "Size": "123123" }'
+```
+
+The shim then forwards the corresponding request to the `kata-agent` to carry out the operations inside the guest VM. For `resize` operation, 
+the Kata runtime also needs to notify the hypervisor to resize the block device (e.g. call `block_resize` in QEMU). 
+
+### Kata agent changes
+
+The mount spec of a direct-assigned volume is passed to `kata-agent` through the existing `Storage` GRPC object. 
+Two new APIs and three new GRPC objects are added to GRPC protocol between the shim and agent for resizing and getting volume stats:
+```protobuf
+
+rpc GetVolumeStats(VolumeStatsRequest) returns (VolumeStatsResponse);
+rpc ResizeVolume(ResizeVolumeRequest) returns (google.protobuf.Empty);
+
+message VolumeStatsRequest {
+// The volume path on the guest outside the container
+    string volume_guest_path = 1;
+}
+
+message ResizeVolumeRequest {
+// Full VM guest path of the volume (outside the container)
+    string volume_guest_path = 1;
+    uint64 size = 2;
+}
+
+// This should be kept in sync with CSI NodeGetVolumeStatsResponse (https://github.com/container-storage-interface/spec/blob/v1.5.0/csi.proto)
+message VolumeStatsResponse {
+   // This field is OPTIONAL.
+   repeated VolumeUsage usage = 1;
+   // Information about the current condition of the volume.
+   // This field is OPTIONAL.
+   // This field MUST be specified if the VOLUME_CONDITION node
+   // capability is supported.
+   VolumeCondition volume_condition = 2;
+}
+message VolumeUsage {
+   enum Unit {
+      UNKNOWN = 0;
+      BYTES = 1;
+      INODES = 2;
+   }
+   // The available capacity in specified Unit. This field is OPTIONAL.
+   // The value of this field MUST NOT be negative.
+   uint64 available = 1;
+
+   // The total capacity in specified Unit. This field is REQUIRED.
+   // The value of this field MUST NOT be negative.
+   uint64 total = 2;
+
+   // The used capacity in specified Unit. This field is OPTIONAL.
+   // The value of this field MUST NOT be negative.
+   uint64 used = 3;
+
+   // Units by which values are measured. This field is REQUIRED.
+   Unit unit = 4;
+}
+
+// VolumeCondition represents the current condition of a volume.
+message VolumeCondition {
+
+   // Normal volumes are available for use and operating optimally.
+   // An abnormal volume does not meet these criteria.
+   // This field is REQUIRED.
+   bool abnormal = 1;
+
+   // The message describing the condition of the volume.
+   // This field is REQUIRED.
+   string message = 2;
+}
+
+```
+
+### Step by step walk-through
+
+Given the following definition:
+```YAML
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: app
+spec:
+  runtime-class: kata-qemu
+  containers:
+  - name: app
+    image: centos
+    command: ["/bin/sh"]
+    args: ["-c", "while true; do echo $(date -u) >> /data/out.txt; sleep 5; done"]
+    volumeMounts:
+    - name: persistent-storage
+      mountPath: /data
+  volumes:
+  - name: persistent-storage
+    persistentVolumeClaim:
+      claimName: ebs-claim
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  annotations:
+    skip-hostmount: "true"
+  name: ebs-claim
+spec:
+  accessModes:
+    - ReadWriteOncePod
+  volumeMode: Filesystem
+  storageClassName: ebs-sc
+  resources:
+    requests:
+      storage: 4Gi
+---
+kind: StorageClass
+apiVersion: storage.k8s.io/v1
+metadata:
+  name: ebs-sc
+provisioner: ebs.csi.aws.com
+volumeBindingMode: WaitForFirstConsumer
+parameters:
+  csi.storage.k8s.io/fstype: ext4
+
+```
+Let’s assume that changes have been made in the `aws-ebs-csi-driver` node driver.
+
+**Node publish volume**
+1. In the node CSI driver, the `NodePublishVolume` API invokes: `kata-runtime direct-volume add --volume-path "/kubelet/a/b/c/d/sdf" --mount-info "{\"Device\": \"/dev/sdf\", \"fstype\": \"ext4\"}"`.
+2. The `Kata-runtime` writes the mount-info JSON to a file called `mountInfo.json` under `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
+
+**Node unstage volume**
+1. In the node CSI driver, the `NodeUnstageVolume` API invokes: `kata-runtime direct-volume remove --volume-path "/kubelet/a/b/c/d/sdf"`.
+2. Kata-runtime deletes the directory `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
+
+**Use the volume in sandbox**
+1. Upon the request to start a container, the `containerd-shim-kata-v2` examines the container spec,
+and iterates through the mounts. For each mount, if there is a `mountInfo.json` file under `/run/kata-containers/shared/direct-volumes/[mount source path]`,
+it generates a `storage` GRPC object after overwriting the mount spec with the information in `mountInfo.json`.
+2. The shim sends the storage objects to kata-agent through TTRPC.
+3. The shim writes a file with the sandbox id as the name under `/run/kata-containers/shared/direct-volumes/[mount source path]`.
+4. The kata-agent mounts the storage objects for the container.
+
+**Node expand volume**
+1. In the node CSI driver, the `NodeExpandVolume` API invokes: `kata-runtime direct-volume resize –-volume-path "/kubelet/a/b/c/d/sdf" –-size 8Gi`.
+2. The Kata runtime checks whether there is a sandbox id file under the directory `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
+3. The Kata runtime identifies the shim instance through the sandbox id, and sends a GRPC request to resize the volume.
+4. The shim handles the request, asks the hypervisor to resize the block device and sends a GRPC request to Kata agent to resize the filesystem.
+5. Kata agent receives the request and resizes the filesystem.
+
+**Node get volume stats**
+1. In the node CSI driver, the `NodeGetVolumeStats` API invokes: `kata-runtime direct-volume stats –-volume-path "/kubelet/a/b/c/d/sdf"`.
+2. The Kata runtime checks whether there is a sandbox id file under the directory `/run/kata-containers/shared/direct-volumes/kubelet/a/b/c/d/sdf`.
+3. The Kata runtime identifies the shim instance through the sandbox id, and sends a GRPC request to get the volume stats.
+4. The shim handles the request and forwards it to the Kata agent.
+5. Kata agent receives the request and returns the filesystem stats.
--- a/docs/design/kata-2-0-metrics.md
+++ b/docs/design/kata-2-0-metrics.md
@@ -51,6 +51,7 @@ The `kata-monitor` management agent should be started on each node where the Kat
 > **Note**: a *node* running Kata containers will be either a single host system or a worker node belonging to a K8s cluster capable of running Kata pods.

 - Aggregate sandbox metrics running on the node, adding the `sandbox_id` label to them.
+- Attach the additional `cri_uid`, `cri_name` and `cri_namespace` labels to the sandbox metrics, tracking the `uid`, `name` and `namespace` Kubernetes pod metadata.
 - Expose a new Prometheus target, allowing all node metrics coming from the Kata shim to be collected by Prometheus indirectly. This simplifies the targets count in Prometheus and avoids exposing shim's metrics by `ip:port`.

 Only one `kata-monitor` process runs in each node.
--- a/docs/design/virtualization.md
+++ b/docs/design/virtualization.md
@@ -39,7 +39,7 @@ Details of each solution and a summary are provided below.
 Kata Containers with QEMU has complete compatibility with Kubernetes.

 Depending on the host architecture, Kata Containers supports various machine types,
-for example `pc` and `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
+for example `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
 machine type is `q35`. The machine type and its [`Machine accelerators`](#machine-accelerators) can
 be changed by editing the runtime [`configuration`](architecture/README.md#configuration) file.

@@ -60,9 +60,8 @@ Machine accelerators are architecture specific and can be used to improve the pe
 and enable specific features of the machine types. The following machine accelerators
 are used in Kata Containers:

- NVDIMM: This machine accelerator is x86 specific and only supported by `pc` and
-`q35` machine types. `nvdimm` is used to provide the root filesystem as a persistent
-memory device to the Virtual Machine.
+- NVDIMM: This machine accelerator is x86 specific and only supported by `q35` machine types.
+`nvdimm` is used to provide the root filesystem as a persistent memory device to the Virtual Machine.

 #### Hotplug devices

--- a/docs/how-to/README.md
+++ b/docs/how-to/README.md
@@ -15,6 +15,11 @@
 - `qemu`
 - `cloud-hypervisor`
 - `firecracker`
+
+   In the case of `firecracker` the use of a block device `snapshotter` is needed
+   for the VM rootfs. Refer to the following guide for additional configuration
+   steps:
+   - [Setup Kata containers with `firecracker`](how-to-use-kata-containers-with-firecracker.md)
 - `ACRN`

  While `qemu` , `cloud-hypervisor` and `firecracker` work out of the box with installation of Kata,
--- a/docs/how-to/containerd-kata.md
+++ b/docs/how-to/containerd-kata.md
@@ -72,7 +72,6 @@ $ command -v containerd

 ### Install CNI plugins

-> **Note:** You do not need to install CNI plugins if you do not want to use containerd with Kubernetes.
 > If you have installed Kubernetes with `kubeadm`, you might have already installed the CNI plugins.

 You can manually install CNI plugins as follows:
@@ -131,74 +130,42 @@ For

 The `RuntimeClass` is suggested.

-The following configuration includes three runtime classes:
+The following configuration includes two runtime classes:
 - `plugins.cri.containerd.runtimes.runc`: the runc, and it is the default runtime.
 - `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/master/runtime/v2#binary-naming)) 
  where the dot-connected string `io.containerd.kata.v2` is translated to `containerd-shim-kata-v2` (i.e. the 
  binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)).
- `plugins.cri.containerd.runtimes.katacli`: the `containerd-shim-runc-v1` calls `kata-runtime`, which is the legacy process.

 ```toml
    [plugins.cri.containerd]
      no_pivot = false
    [plugins.cri.containerd.runtimes]
-      [plugins.cri.containerd.runtimes.runc]
-         runtime_type = "io.containerd.runc.v1"
-         [plugins.cri.containerd.runtimes.runc.options]
-           NoPivotRoot = false
-           NoNewKeyring = false
-           ShimCgroup = ""
-           IoUid = 0
-           IoGid = 0
-           BinaryName = "runc"
-           Root = ""
-           CriuPath = ""
-           SystemdCgroup = false
+      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
+         privileged_without_host_devices = false
+         runtime_type = "io.containerd.runc.v2"
+        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+            BinaryName = ""
+            CriuImagePath = ""
+            CriuPath = ""
+            CriuWorkPath = ""
+            IoGid = 0
      [plugins.cri.containerd.runtimes.kata]
         runtime_type = "io.containerd.kata.v2"
-      [plugins.cri.containerd.runtimes.katacli]
-         runtime_type = "io.containerd.runc.v1"
-         [plugins.cri.containerd.runtimes.katacli.options]
-           NoPivotRoot = false
-           NoNewKeyring = false
-           ShimCgroup = ""
-           IoUid = 0
-           IoGid = 0
-           BinaryName = "/usr/bin/kata-runtime"
-           Root = ""
-           CriuPath = ""
-           SystemdCgroup = false
-```
-
-From Containerd v1.2.4 and Kata v1.6.0, there is a new runtime option supported, which allows you to specify a specific Kata configuration file as follows:
-
-```toml
-      [plugins.cri.containerd.runtimes.kata]
-         runtime_type = "io.containerd.kata.v2"
-	 privileged_without_host_devices = true
-	 [plugins.cri.containerd.runtimes.kata.options]
-	   ConfigPath = "/etc/kata-containers/config.toml"
+         privileged_without_host_devices = true
+         pod_annotations = ["io.katacontainers.*"]
+         container_annotations = ["io.katacontainers.*"]
+         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata.options]
+            ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration.toml"
 ```

 `privileged_without_host_devices` tells containerd that a privileged Kata container should not have direct access to all host devices. If unset, containerd will pass all host devices to Kata container, which may cause security issues.

+`pod_annotations` is the list of pod annotations passed to both the pod sandbox as well as container through the OCI config.
+
+`container_annotations` is the list of container annotations passed through to the OCI config of the containers.
+
 This `ConfigPath` option is optional. If you do not specify it, shimv2 first tries to get the configuration file from the environment variable `KATA_CONF_FILE`. If neither are set, shimv2 will use the default Kata configuration file paths (`/etc/kata-containers/configuration.toml` and `/usr/share/defaults/kata-containers/configuration.toml`).

-If you use Containerd older than v1.2.4 or a version of Kata older than v1.6.0  and also want to specify a configuration file, you can use the following workaround, since the shimv2 accepts an environment variable, `KATA_CONF_FILE` for the configuration file path. Then, you can create a
-shell script with the following:
-
-```bash
-#!/usr/bin/env bash
-KATA_CONF_FILE=/etc/kata-containers/firecracker.toml containerd-shim-kata-v2 $@
-```
-
-Name it as `/usr/local/bin/containerd-shim-katafc-v2` and reference it in the configuration of containerd:
-
-```toml
-      [plugins.cri.containerd.runtimes.kata-firecracker]
-         runtime_type = "io.containerd.katafc.v2"
-```
-
 #### Kata Containers as the runtime for untrusted workload

 For cases without `RuntimeClass` support, we can use the legacy annotation method to support using Kata Containers 
@@ -218,28 +185,8 @@ and then, run an untrusted workload with Kata Containers:
      runtime_type = "io.containerd.kata.v2"
 ```

-For the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
-
-```toml
-    [plugins.cri.containerd]
-  
-    # "plugins.cri.containerd.default_runtime" is the runtime to use in containerd.
-    [plugins.cri.containerd.default_runtime]
-      # runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
-      runtime_type = "io.containerd.runtime.v1.linux"
-
-    # "plugins.cri.containerd.untrusted_workload_runtime" is a runtime to run untrusted workloads on it.
-    [plugins.cri.containerd.untrusted_workload_runtime]
-      # runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
-      runtime_type = "io.containerd.runtime.v1.linux"
-
-      # runtime_engine is the name of the runtime engine used by containerd.
-      runtime_engine = "/usr/bin/kata-runtime"
-```
-
 You can find more information on the [Containerd config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)

-
 #### Kata Containers as the default runtime

 If you want to set Kata Containers as the only runtime in the deployment, you can simply configure as follows:
@@ -250,15 +197,6 @@ If you want to set Kata Containers as the only runtime in the deployment, you ca
      runtime_type = "io.containerd.kata.v2"
 ```

-Alternatively, for the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
-
-```toml
-    [plugins.cri.containerd]
-    [plugins.cri.containerd.default_runtime]
-      runtime_type = "io.containerd.runtime.v1.linux"
-      runtime_engine = "/usr/bin/kata-runtime"
-```
-
 ### Configuration for `cri-tools`

 > **Note:** If you skipped the [Install `cri-tools`](#install-cri-tools) section, you can skip this section too.
@@ -312,10 +250,12 @@ To run a container with Kata Containers through the containerd command line, you

 ```bash
 $ sudo ctr image pull docker.io/library/busybox:latest
-$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
+$ sudo ctr run --cni --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
 ```

 This launches a BusyBox container named `hello`, and it will be removed by `--rm` after it quits.
+The `--cni` flag enables CNI networking for the container. Without this flag, a container with just a
+loopback interface is created.

 ### Launch Pods with `crictl` command line

--- a/docs/how-to/how-to-set-sandbox-config-kata.md
+++ b/docs/how-to/how-to-set-sandbox-config-kata.md
@@ -91,6 +91,7 @@ There are several kinds of Kata configurations and they are listed below.
 | `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
 | `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
 | `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest |
+| `io.katacontainers.config.hypervisor.use_legacy_serial` | `boolean` | uses legacy serial device for guest's console (QEMU) |

 ## Container Options
 | Key | Value Type | Comments |
@@ -172,7 +173,7 @@ kind: Pod
 metadata:
  name: pod2
  annotations:
-    io.katacontainers.config.runtime.disable_guest_seccomp: false
+    io.katacontainers.config.runtime.disable_guest_seccomp: "false"
 spec:
  runtimeClassName: kata
  containers:
--- a/docs/how-to/how-to-use-kata-containers-with-firecracker.md
+++ b/docs/how-to/how-to-use-kata-containers-with-firecracker.md
@@ -0,0 +1,254 @@
+# Configure Kata Containers to use Firecracker
+
+This document provides an overview on how to run Kata Containers with the AWS Firecracker hypervisor.
+
+## Introduction
+
+AWS Firecracker is an open source virtualization technology that is purpose-built for creating and managing secure, multi-tenant container and function-based services that provide serverless operational models. AWS Firecracker runs workloads in lightweight virtual machines, called `microVMs`, which combine the security and isolation properties provided by hardware virtualization technology with the speed and flexibility of Containers.
+
+Please refer to AWS Firecracker [documentation](https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md) for more details.
+
+## Pre-requisites
+
+This document requires the presence of Kata Containers on your system. Install using the instructions available through the following links:
+
+- Kata Containers [automated installation](../install/README.md)
+
+- Kata Containers manual installation: Automated installation does not seem to be supported for Clear Linux, so please use [manual installation](../Developer-Guide.md) steps.
+> **Note:** Create rootfs image and not initrd image.
+
+## Install AWS Firecracker
+
+Kata Containers only support AWS Firecracker v0.23.4 ([yet](https://github.com/kata-containers/kata-containers/pull/1519)).
+To install Firecracker we need to get the `firecracker` and `jailer` binaries:
+
+```bash
+$ release_url="https://github.com/firecracker-microvm/firecracker/releases"
+$ version="v0.23.1"
+$ arch=`uname -m`
+$ curl ${release_url}/download/${version}/firecracker-${version}-${arch} -o firecracker
+$ curl ${release_url}/download/${version}/jailer-${version}-${arch} -o jailer
+$ chmod +x jailer firecracker
+```
+
+To make the binaries available from the default system `PATH` it is recommended to move them to `/usr/local/bin` or add a symbolic link:
+
+```bash
+$ sudo ln -s $(pwd)/firecracker /usr/local/bin
+$ sudo ln -s $(pwd)/jailer /usr/local/bin
+```
+
+More details can be found in [AWS Firecracker docs](https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md)
+
+In order to run Kata with AWS Firecracker a block device as the backing store for a VM is required. To interact with `containerd` and Kata we use the `devmapper` `snapshotter`.
+
+## Configure `devmapper`
+
+To check support for your `containerd` installation, you can run:
+
+```
+$ ctr plugins ls |grep devmapper
+```
+
+if the output of the above command is:
+
+```
+io.containerd.snapshotter.v1    devmapper                linux/amd64    ok
+```
+then you can skip this section and move on to `Configure Kata Containers with AWS Firecracker`
+
+If the output of the above command is:
+
+```
+io.containerd.snapshotter.v1    devmapper                linux/amd64    error
+```
+
+then we need to setup `devmapper` `snapshotter`. Based on a [very useful
+guide](https://docs.docker.com/storage/storagedriver/device-mapper-driver/)
+from docker, we can set it up using the following scripts:
+
+> **Note:** The following scripts assume a 100G sparse file for storing container images, a 10G sparse file for the thin-provisioning pool and 10G base image files for any sandboxed container created. This means that we will need at least 10GB free space.
+
+```
+#!/bin/bash
+set -ex
+
+DATA_DIR=/var/lib/containerd/devmapper
+POOL_NAME=devpool
+
+mkdir -p ${DATA_DIR}
+
+# Create data file
+sudo touch "${DATA_DIR}/data"
+sudo truncate -s 100G "${DATA_DIR}/data"
+
+# Create metadata file
+sudo touch "${DATA_DIR}/meta"
+sudo truncate -s 10G "${DATA_DIR}/meta"
+
+# Allocate loop devices
+DATA_DEV=$(sudo losetup --find --show "${DATA_DIR}/data")
+META_DEV=$(sudo losetup --find --show "${DATA_DIR}/meta")
+
+# Define thin-pool parameters.
+# See https://www.kernel.org/doc/Documentation/device-mapper/thin-provisioning.txt for details.
+SECTOR_SIZE=512
+DATA_SIZE="$(sudo blockdev --getsize64 -q ${DATA_DEV})"
+LENGTH_IN_SECTORS=$(bc <<< "${DATA_SIZE}/${SECTOR_SIZE}")
+DATA_BLOCK_SIZE=128
+LOW_WATER_MARK=32768
+
+# Create a thin-pool device
+sudo dmsetup create "${POOL_NAME}" \
+    --table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
+
+cat << EOF
+#
+# Add this to your config.toml configuration file and restart `containerd` daemon
+#
+[plugins]
+  [plugins.devmapper]
+    pool_name = "${POOL_NAME}"
+    root_path = "${DATA_DIR}"
+    base_image_size = "10GB"
+    discard_blocks = true
+EOF
+```
+
+Make it executable and run it:
+
+```bash
+$ sudo chmod +x ~/scripts/devmapper/create.sh
+$ cd ~/scripts/devmapper/
+$ sudo ./create.sh
+```
+
+Now, we can add the `devmapper` configuration provided from the script to `/etc/containerd/config.toml`.
+> **Note:** If you are using the default `containerd` configuration (`containerd config default >> /etc/containerd/config.toml`), you may need to edit the existing `[plugins."io.containerd.snapshotter.v1.devmapper"]`configuration.
+Save and restart `containerd`:
+
+
+```bash
+$ sudo systemctl restart containerd
+```
+
+We can use `dmsetup` to verify that the thin-pool was created successfully.
+
+```bash
+$ sudo dmsetup ls
+```
+
+ We should also check that `devmapper` is registered and running:
+
+```bash
+$ sudo ctr plugins ls | grep devmapper
+```
+
+This script needs to be run only once, while setting up the `devmapper` `snapshotter` for `containerd`. Afterwards, make sure that on each reboot, the thin-pool is initialized from the same data directory. Otherwise, all the fetched containers (or the ones that you have created) will be re-initialized. A simple script that re-creates the thin-pool from the same data directory is shown below:
+
+```
+#!/bin/bash
+set -ex
+
+DATA_DIR=/var/lib/containerd/devmapper
+POOL_NAME=devpool
+
+# Allocate loop devices
+DATA_DEV=$(sudo losetup --find --show "${DATA_DIR}/data")
+META_DEV=$(sudo losetup --find --show "${DATA_DIR}/meta")
+
+# Define thin-pool parameters.
+# See https://www.kernel.org/doc/Documentation/device-mapper/thin-provisioning.txt for details.
+SECTOR_SIZE=512
+DATA_SIZE="$(sudo blockdev --getsize64 -q ${DATA_DEV})"
+LENGTH_IN_SECTORS=$(bc <<< "${DATA_SIZE}/${SECTOR_SIZE}")
+DATA_BLOCK_SIZE=128
+LOW_WATER_MARK=32768
+
+# Create a thin-pool device
+sudo dmsetup create "${POOL_NAME}" \
+    --table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
+```
+
+We can create a systemd service to run the above script on each reboot:
+
+```bash
+$ sudo nano /lib/systemd/system/devmapper_reload.service
+```
+
+The service file:
+
+```
+[Unit]
+Description=Devmapper reload script
+
+[Service]
+ExecStart=/path/to/script/reload.sh
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Enable the newly created service:
+
+```bash
+$ sudo systemctl daemon-reload
+$ sudo systemctl enable devmapper_reload.service
+$ sudo systemctl start devmapper_reload.service
+```
+
+## Configure Kata Containers with AWS Firecracker
+
+To configure Kata Containers with AWS Firecracker, copy the generated `configuration-fc.toml` file when building the `kata-runtime` to either `/etc/kata-containers/configuration-fc.toml` or `/usr/share/defaults/kata-containers/configuration-fc.toml`.
+
+The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)
+
+```bash
+$ sudo kata-runtime --show-default-config-paths
+```
+
+## Configure `containerd`
+Next, we need to configure containerd. Add a file in your path (e.g. `/usr/local/bin/containerd-shim-kata-fc-v2`) with the following contents:
+
+```
+#!/bin/bash
+KATA_CONF_FILE=/etc/containers/configuration-fc.toml /usr/local/bin/containerd-shim-kata-v2 $@
+```
+> **Note:** You may need to edit the paths of the configuration file and the `containerd-shim-kata-v2` to correspond to your setup.
+
+Make it executable:
+
+```bash
+$ sudo chmod +x /usr/local/bin/containerd-shim-kata-fc-v2
+```
+
+Add the relevant section in `containerd`’s `config.toml` file (`/etc/containerd/config.toml`):
+
+```
+[plugins.cri.containerd.runtimes]
+  [plugins.cri.containerd.runtimes.kata-fc]
+    runtime_type = "io.containerd.kata-fc.v2"
+```
+
+> **Note:** If you are using the default `containerd` configuration (`containerd config default >> /etc/containerd/config.toml`),
+> the configuration should change to :
+```
+[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
+  [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata-fc]
+    runtime_type = "io.containerd.kata-fc.v2"
+```
+
+Restart `containerd`:
+
+```bash
+$ sudo systemctl restart containerd
+```
+
+## Verify the installation
+
+We are now ready to launch a container using Kata with Firecracker to verify that everything worked:
+
+```bash
+$ sudo ctr images pull --snapshotter devmapper docker.io/library/ubuntu:latest
+$ sudo ctr run --snapshotter devmapper --runtime io.containerd.run.kata-fc.v2 -t --rm docker.io/library/ubuntu
+```
--- a/docs/how-to/privileged.md
+++ b/docs/how-to/privileged.md
@@ -31,7 +31,7 @@ See below example config:
  [plugins.cri]
    [plugins.cri.containerd]
       [plugins.cri.containerd.runtimes.runc]
-         runtime_type = "io.containerd.runc.v1"
+         runtime_type = "io.containerd.runc.v2"
         privileged_without_host_devices = false
       [plugins.cri.containerd.runtimes.kata]
         runtime_type = "io.containerd.kata.v2"
--- a/docs/install/container-manager/containerd/containerd-install.md
+++ b/docs/install/container-manager/containerd/containerd-install.md
@@ -81,7 +81,7 @@
  - Download the standard `systemd(1)` service file and install to
    `/etc/systemd/system/`:

-    - https://raw.githubusercontent.com/containerd/containerd/master/containerd.service
+    - https://raw.githubusercontent.com/containerd/containerd/main/containerd.service

    > **Notes:**
    >
--- a/docs/use-cases/GPU-passthrough-and-Kata.md
+++ b/docs/use-cases/GPU-passthrough-and-Kata.md
@@ -3,4 +3,4 @@
 Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:

 - [Intel](Intel-GPU-passthrough-and-Kata.md)
- [Nvidia](Nvidia-GPU-passthrough-and-Kata.md)
+- [NVIDIA](NVIDIA-GPU-passthrough-and-Kata.md)
--- a/docs/use-cases/NVIDIA-GPU-passthrough-and-Kata.md
+++ b/docs/use-cases/NVIDIA-GPU-passthrough-and-Kata.md
@@ -0,0 +1,592 @@
+# Using NVIDIA GPU device with Kata Containers
+
+An NVIDIA GPU device can be passed to a Kata Containers container using GPU
+passthrough (NVIDIA GPU pass-through mode) as well as GPU mediated passthrough
+(NVIDIA `vGPU` mode).
+
+NVIDIA GPU pass-through mode, an entire physical GPU is directly assigned to one
+VM, bypassing the NVIDIA Virtual GPU Manager. In this mode of operation, the GPU
+is accessed exclusively by the NVIDIA driver running in the VM to which it is
+assigned. The GPU is not shared among VMs.
+
+NVIDIA Virtual GPU (`vGPU`) enables multiple virtual machines (VMs) to have
+simultaneous, direct access to a single physical GPU, using the same NVIDIA
+graphics drivers that are deployed on non-virtualized operating systems. By
+doing this, NVIDIA `vGPU` provides VMs with unparalleled graphics performance,
+compute performance, and application compatibility, together with the
+cost-effectiveness and scalability brought about by sharing a GPU among multiple
+workloads. A `vGPU` can be either time-sliced or Multi-Instance GPU (MIG)-backed
+with [MIG-slices](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
+
+| Technology | Description | Behavior | Detail |
+| --- | --- | --- | --- |
+| NVIDIA GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
+| NVIDIA vGPU time-sliced | GPU time-sliced | Physical GPU time-sliced for multiple VMs | Mediated passthrough |
+| NVIDIA vGPU MIG-backed | GPU with MIG-slices | Physical GPU MIG-sliced for multiple VMs | Mediated passthrough |
+
+## Hardware Requirements
+
+NVIDIA GPUs Recommended for Virtualization:
+
+- NVIDIA Tesla (T4, M10, P6, V100 or newer)
+- NVIDIA Quadro RTX 6000/8000
+
+## Host BIOS Requirements
+
+Some hardware requires a larger PCI BARs window, for example, NVIDIA Tesla P100,
+K40m
+
+```sh
+$ lspci -s d0:00.0 -vv | grep Region
+        Region 0: Memory at e7000000 (32-bit, non-prefetchable) [size=16M]
+        Region 1: Memory at 222800000000 (64-bit, prefetchable) [size=32G] # Above 4G
+        Region 3: Memory at 223810000000 (64-bit, prefetchable) [size=32M]
+```
+
+For large BARs devices, MMIO mapping above 4G address space should be `enabled`
+in the PCI configuration of the BIOS.
+
+Some hardware vendors use a different name in BIOS, such as:
+
+- Above 4G Decoding
+- Memory Hole for PCI MMIO
+- Memory Mapped I/O above 4GB
+
+If one is using a GPU based on the Ampere architecture and later additionally
+SR-IOV needs to be enabled for the `vGPU` use-case.
+
+The following steps outline the workflow for using an NVIDIA GPU with Kata.
+
+## Host Kernel Requirements
+
+The following configurations need to be enabled on your host kernel:
+
+- `CONFIG_VFIO`
+- `CONFIG_VFIO_IOMMU_TYPE1`
+- `CONFIG_VFIO_MDEV`
+- `CONFIG_VFIO_MDEV_DEVICE`
+- `CONFIG_VFIO_PCI`
+
+Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
+line.
+
+## Install and configure Kata Containers
+
+To use non-large BARs devices (for example, NVIDIA Tesla T4), you need Kata
+version 1.3.0 or above. Follow the [Kata Containers setup
+instructions](../install/README.md) to install the latest version of Kata.
+
+To use large BARs devices (for example, NVIDIA Tesla P100), you need Kata
+version 1.11.0 or above.
+
+The following configuration in the Kata `configuration.toml` file as shown below
+can work:
+
+Hotplug for PCI devices with small BARs by `acpi_pcihp` (Linux's ACPI PCI
+Hotplug driver):
+
+```sh
+machine_type = "q35"
+
+hotplug_vfio_on_root_bus = false
+```
+
+Hotplug for PCIe devices with large BARs by `pciehp` (Linux's PCIe Hotplug
+driver):
+
+```sh
+machine_type = "q35"
+
+hotplug_vfio_on_root_bus = true
+pcie_root_port = 1
+```
+
+## Build Kata Containers kernel with GPU support
+
+The default guest kernel installed with Kata Containers does not provide GPU
+support. To use an NVIDIA GPU with Kata Containers, you need to build a kernel
+with the necessary GPU support.
+
+The following kernel config options need to be enabled:
+
+```sh
+# Support PCI/PCIe device hotplug (Required for large BARs device)
+CONFIG_HOTPLUG_PCI_PCIE=y
+
+# Support for loading modules (Required for load NVIDIA drivers)
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+
+# Enable the MMIO access method for PCIe devices (Required for large BARs device)
+ CONFIG_PCI_MMCONFIG=y
+```
+
+The following kernel config options need to be disabled:
+
+```sh
+# Disable Open Source NVIDIA driver nouveau
+# It conflicts with NVIDIA official driver
+CONFIG_DRM_NOUVEAU=n
+```
+
+> **Note**: `CONFIG_DRM_NOUVEAU` is normally disabled by default.
+It is worth checking that it is not enabled in your kernel configuration to
+prevent any conflicts.
+
+Build the Kata Containers kernel with the previous config options, using the
+instructions described in [Building Kata Containers
+kernel](../../tools/packaging/kernel). For further details on building and
+installing guest kernels, see [the developer
+guide](../Developer-Guide.md#install-guest-kernel-images).
+
+There is an easy way to build a guest kernel that supports NVIDIA GPU:
+
+```sh
+## Build guest kernel with ../../tools/packaging/kernel
+
+# Prepare (download guest kernel source, generate .config)
+$ ./build-kernel.sh -v 5.15.23 -g nvidia -f setup
+
+# Build guest kernel
+$ ./build-kernel.sh -v 5.15.23 -g nvidia build
+
+# Install guest kernel
+$ sudo -E ./build-kernel.sh -v 5.15.23 -g nvidia install
+```
+
+To build NVIDIA Driver in Kata container, `linux-headers` are required.
+This is a way to generate deb packages for `linux-headers`:
+
+> **Note**:
+> Run `make rpm-pkg` to build the rpm package.
+> Run `make deb-pkg` to build the deb package.
+>
+
+```sh
+$ cd kata-linux-5.15.23-89
+$ make deb-pkg
+```
+Before using the new guest kernel, please update the `kernel` parameters in
+ `configuration.toml`.
+
+```sh
+kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
+```
+
+## NVIDIA GPU pass-through mode with Kata Containers
+
+Use the following steps to pass an NVIDIA GPU device in pass-through mode with Kata:
+
+1. Find the Bus-Device-Function (BDF) for the GPU device on the host:
+
+   ```sh
+   $ sudo lspci -nn -D | grep -i nvidia
+   0000:d0:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:20b9] (rev a1)
+   ```
+
+   > PCI address `0000:d0:00.0` is assigned to the hardware GPU device.
+   > `10de:20b9` is the device ID of the hardware GPU device.
+
+2. Find the IOMMU group for the GPU device:
+
+   ```sh
+   $ BDF="0000:d0:00.0"
+   $ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
+   ```
+
+   The previous output shows that the GPU belongs to IOMMU group 192. The next
+   step is to bind the GPU to the VFIO-PCI driver.
+
+   ```sh
+   $ BDF="0000:d0:00.0"
+   $ DEV="/sys/bus/pci/devices/$BDF"
+   $ echo "vfio-pci" > $DEV/driver_override
+   $ echo $BDF > $DEV/driver/unbind
+   $ echo $BDF > /sys/bus/pci/drivers_probe
+   # To return the device to the standard driver, we simply clear the
+   # driver_override and reprobe the device, ex:
+   $ echo > $DEV/preferred_driver
+   $ echo $BDF > $DEV/driver/unbind
+   $ echo $BDF > /sys/bus/pci/drivers_probe
+   ```
+
+3. Check the IOMMU group number under `/dev/vfio`:
+
+   ```sh
+   $ ls -l /dev/vfio
+   total 0
+   crw------- 1 zvonkok zvonkok 243,   0 Mar 18 03:06 192
+   crw-rw-rw- 1 root    root     10, 196 Mar 18 02:27 vfio
+   ```
+
+4. Start a Kata container with the GPU device:
+
+   ```sh
+   # You may need to `modprobe vhost-vsock` if you get
+   # host system doesn't support vsock: stat /dev/vhost-vsock
+   $ sudo ctr --debug run --runtime "io.containerd.kata.v2"  --device /dev/vfio/192  --rm -t  "docker.io/library/archlinux:latest" arch uname -r
+   ```
+
+5. Run `lspci` within the container to verify the GPU device is seen in the list
+   of the PCI devices. Note the vendor-device id of the GPU (`10de:20b9`) in the `lspci` output.
+
+   ```sh
+   $ sudo ctr --debug run --runtime "io.containerd.kata.v2"  --device /dev/vfio/192  --rm -t  "docker.io/library/archlinux:latest" arch sh -c "lspci -nn | grep '10de:20b9'"
+   ```
+
+6. Additionally, you can check the PCI BARs space of the NVIDIA GPU device in the container:
+
+   ```sh
+   $ sudo ctr --debug run --runtime "io.containerd.kata.v2"  --device /dev/vfio/192  --rm -t  "docker.io/library/archlinux:latest" arch sh -c "lspci -s 02:00.0 -vv | grep Region"
+   ```
+
+   > **Note**: If you see a message similar to the above, the BAR space of the NVIDIA
+   > GPU has been successfully allocated.
+
+## NVIDIA vGPU mode with Kata Containers
+
+NVIDIA vGPU is a licensed product on all supported GPU boards. A software license
+is required to enable all vGPU features within the guest VM. NVIDIA vGPU manager
+needs to be installed on the host to configure GPUs in vGPU mode. See [NVIDIA Virtual GPU Software Documentation v14.0 through 14.1](https://docs.nvidia.com/grid/14.0/) for more details.
+
+### NVIDIA vGPU time-sliced
+
+In the time-sliced mode, the GPU is not partitioned and the workload uses the
+whole GPU and shares access to the GPU engines. Processes are scheduled in
+series. The best effort scheduler is the default one and can be exchanged by
+other scheduling policies see the documentation above how to do that.
+
+Beware if you had `MIG` enabled before to disable `MIG` on the GPU if you want
+to use `time-sliced` `vGPU`.
+
+```sh
+$ sudo nvidia-smi -mig 0
+```
+
+Enable the virtual functions for the physical GPU in the `sysfs` file system.
+
+```sh
+$ sudo /usr/lib/nvidia/sriov-manage -e 0000:41:00.0
+```
+
+Get the `BDF` of the available virtual function on the GPU, and choose one for the
+following steps.
+
+```sh
+$ cd /sys/bus/pci/devices/0000:41:00.0/
+$ ls -l |  grep virtfn
+```
+
+#### List all available vGPU instances
+
+The following shell snippet will walk the `sysfs` and only print instances
+that are available, that can be created.
+
+```sh
+# The 00.0 is often the PF of the device the VFs will have the funciont in the
+# BDF incremented by some values so e.g. the very first VF is 0000:41:00.4
+
+cd /sys/bus/pci/devices/0000:41:00.0/
+
+for vf in $(ls -d virtfn*)
+do
+        BDF=$(basename $(readlink -f $vf))
+        for md in $(ls -d $vf/mdev_supported_types/*)
+        do
+                AVAIL=$(cat $md/available_instances)
+                NAME=$(cat $md/name)
+                DIR=$(basename $md)
+
+                if [ $AVAIL -gt 0 ]; then
+                        echo "| BDF          | INSTANCES | NAME           | DIR        |"
+                        echo "+--------------+-----------+----------------+------------+"
+                        printf "| %12s |%10d |%15s | %10s |\n\n" "$BDF" "$AVAIL" "$NAME" "$DIR"
+                fi
+
+        done
+done
+```
+
+If there are available instances you get something like this (for the first VF),
+beware that the output is highly dependent on the GPU you have, if there is no
+output check again if `MIG` is really disabled.
+
+```sh
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 |  GRID A100D-4C | nvidia-692 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 |  GRID A100D-8C | nvidia-693 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 | GRID A100D-10C | nvidia-694 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 | GRID A100D-16C | nvidia-695 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 | GRID A100D-20C | nvidia-696 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 | GRID A100D-40C | nvidia-697 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 | GRID A100D-80C | nvidia-698 |
+
+```
+
+Change to the `mdev_supported_types` directory for the virtual function on which
+you want to create the `vGPU`. Taking the first output as an example:
+
+```sh
+$ cd virtfn0/mdev_supported_types/nvidia-692
+$ UUIDGEN=$(uuidgen)
+$ sudo bash -c "echo $UUIDGEN > create"
+```
+
+Confirm that the `vGPU` was created. You should see the `UUID` pointing to a
+subdirectory of the `sysfs` space.
+
+```sh
+$ ls -l /sys/bus/mdev/devices/
+```
+
+Get the `IOMMU` group number and verify there is a `VFIO` device created to use
+with Kata.
+
+```sh
+$ ls -l /sys/bus/mdev/devices/*/
+$ ls -l /dev/vfio
+```
+
+Use the `VFIO` device created in the same way as in the pass-through use-case.
+Beware that the guest needs the NVIDIA guest drivers, so one would need to build
+a new guest `OS` image.
+
+### NVIDIA vGPU MIG-backed
+
+We're not going into detail what `MIG` is but briefly it is a technology to
+partition the hardware into independent instances with guaranteed quality of
+service. For more details see [NVIDIA Multi-Instance GPU User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/).
+
+First enable `MIG` mode for a GPU, depending on the platform you're running
+a reboot would be necessary. Some platforms support GPU reset.
+
+```sh
+$ sudo nvidia-smi -mig 1
+```
+
+If the platform supports a GPU reset one can run, otherwise you will get a
+warning to reboot the server.
+
+```sh
+$ sudo nvidia-smi --gpu-reset
+```
+
+The driver per default provides a number of profiles that users can opt-in when
+configuring the MIG feature.
+
+```sh
+$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
+| GPU instance profiles:                                                      |
+| GPU   Name             ID    Instances   Memory     P2P    SM    DEC   ENC  |
+|                              Free/Total   GiB              CE    JPEG  OFA  |
+|=============================================================================|
+|   0  MIG 1g.10gb       19     7/7        9.50       No     14     0     0   |
+|                                                             1     0     0   |
+-----------------------------------------------------------------------------+
+|   0  MIG 1g.10gb+me    20     1/1        9.50       No     14     1     0   |
+|                                                             1     1     1   |
+-----------------------------------------------------------------------------+
+|   0  MIG 2g.20gb       14     3/3        19.50      No     28     1     0   |
+|                                                             2     0     0   |
+-----------------------------------------------------------------------------+
+                              ...
+```
+
+Create the GPU instances that correspond to the `vGPU` types of the `MIG-backed`
+`vGPUs` that you will create [NVIDIA A100 PCIe 80GB Virtual GPU Types](https://docs.nvidia.com/grid/13.0/grid-vgpu-user-guide/index.html#vgpu-types-nvidia-a100-pcie-80gb).
+
+```sh
+# MIG 1g.10gb --> vGPU A100D-1-10C
+$ sudo nvidia-smi mig -cgi 19
+```
+
+List the GPU instances and get the GPU instance id to create the compute
+instance.
+
+```sh
+$ sudo nvidia-smi mig -lgi # list the created GPU instances
+$ sudo nvidia-smi mig -cci -gi 9 # each GPU instance can have several compute
+                                 # instances. Instance -> Workload
+```
+
+Verify that the compute instances were created within the GPU instance
+
+```sh
+$ nvidia-smi
+                              ... snip ...
+-----------------------------------------------------------------------------+
+| MIG devices:                                                                |
+------------------+----------------------+-----------+-----------------------+
+| GPU  GI  CI  MIG |         Memory-Usage |        Vol|         Shared        |
+|      ID  ID  Dev |           BAR1-Usage | SM     Unc| CE  ENC  DEC  OFA  JPG|
+|                  |                      |        ECC|                       |
+|==================+======================+===========+=======================|
+|  0    9   0   0  |      0MiB /  9728MiB | 14      0 |  1   0    0    0    0 |
+|                  |      0MiB /  4095MiB |           |                       |
+------------------+----------------------+-----------+-----------------------+
+                              ... snip ...
+```
+
+We can use the [snippet](#list-all-available-vgpu-instances) from before to list
+the available `vGPU` instances, this time `MIG-backed`.
+
+```sh
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.4 |         1 |GRID A100D-1-10C | nvidia-699 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:00.5 |         1 |GRID A100D-1-10C | nvidia-699 |
+
+| BDF          | INSTANCES | NAME           | DIR        |
+--------------+-----------+----------------+------------+
+| 0000:41:01.6 |         1 |GRID A100D-1-10C | nvidia-699 |
+                       ... snip ...
+```
+
+Repeat the steps after the [snippet](#list-all-available-vgpu-instances) listing
+to create the corresponding `mdev` device and use the guest `OS` created in the
+previous section with `time-sliced` `vGPUs`.
+
+## Install NVIDIA Driver + Toolkit in Kata Containers Guest OS
+
+Consult the [Developer-Guide](https://github.com/kata-containers/kata-containers/blob/main/docs/Developer-Guide.md#create-a-rootfs-image) on how to create a
+rootfs base image for a distribution of your choice. This is going to be used as
+a base for a NVIDIA enabled guest OS. Use the `EXTRA_PKGS` variable to install
+all the needed packages to compile the drivers. Also copy the kernel development
+packages from the previous `make deb-pkg` into `$ROOTFS_DIR`.
+
+```sh
+export EXTRA_PKGS="gcc make curl gnupg"
+```
+
+Having the `$ROOTFS_DIR` exported in the previous step we can now install all the
+needed parts in the guest OS. In this case, we have an Ubuntu based rootfs.
+
+First off all mount the special filesystems into the rootfs
+
+```sh
+$ sudo mount -t sysfs -o ro none ${ROOTFS_DIR}/sys
+$ sudo mount -t proc -o ro none ${ROOTFS_DIR}/proc
+$ sudo mount -t tmpfs none ${ROOTFS_DIR}/tmp
+$ sudo mount -o bind,ro /dev ${ROOTFS_DIR}/dev
+$ sudo mount -t devpts none ${ROOTFS_DIR}/dev/pts
+```
+
+Now we can enter `chroot`
+
+```sh
+$ sudo chroot ${ROOTFS_DIR}
+```
+
+Inside the rootfs one is going to install the drivers and toolkit to enable the
+easy creation of GPU containers with Kata. We can also use this rootfs for any
+other container not specifically only for GPUs.
+
+As a prerequisite install the copied kernel development packages
+
+```sh
+$ sudo dpkg -i *.deb
+```
+
+Get the driver run file, since we need to build the driver against a kernel that
+is not running on the host we need the ability to specify the exact version we
+want the driver to build against. Take the kernel version one used for building
+the NVIDIA kernel (`5.15.23-nvidia-gpu`).
+
+```sh
+$ wget https://us.download.nvidia.com/XFree86/Linux-x86_64/510.54/NVIDIA-Linux-x86_64-510.54.run
+$ chmod +x NVIDIA-Linux-x86_64-510.54.run
+# Extract the source files so we can run the installer with arguments
+$ ./NVIDIA-Linux-x86_64-510.54.run -x
+$ cd NVIDIA-Linux-x86_64-510.54
+$ ./nvidia-installer -k 5.15.23-nvidia-gpu
+```
+
+Having the drivers installed we need to install the toolkit which will take care
+of providing the right bits into the container.
+
+```sh
+$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
+$ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+$ curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+$ apt update
+$ apt install nvidia-container-toolkit
+```
+
+Create the hook execution file for Kata:
+
+```
+# Content of $ROOTFS_DIR/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh
+
+#!/bin/bash -x
+
+/usr/bin/nvidia-container-toolkit -debug $@
+```
+
+As the last step one can do some cleanup of files or package caches. Build the
+rootfs and configure it for use with Kata according to the development guide.
+
+Enable the `guest_hook_path` in Kata's `configuration.toml`
+
+```sh
+guest_hook_path = "/usr/share/oci/hooks"
+```
+
+One has built a NVIDIA rootfs, kernel and now we can run any GPU container
+without installing the drivers into the container. Check NVIDIA device status
+with `nvidia-smi`
+
+```sh
+$  sudo ctr --debug run --runtime "io.containerd.kata.v2"  --device /dev/vfio/192  --rm -t "docker.io/nvidia/cuda:11.6.0-base-ubuntu20.04" cuda nvidia-smi
+Fri Mar 18 10:36:59 2022
+-----------------------------------------------------------------------------+
+| NVIDIA-SMI 510.54       Driver Version: 510.54       CUDA Version: 11.6     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  NVIDIA A30X         Off  | 00000000:02:00.0 Off |                    0 |
+| N/A   38C    P0    67W / 230W |      0MiB / 24576MiB |      0%      Default |
+|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
+
+-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
+```
+
+As the last step one can remove the additional packages and files that were added
+to the `$ROOTFS_DIR` to keep it as small as possible.
+
+## References
+
+- [Configuring a VM for GPU Pass-Through by Using the QEMU Command Line](https://docs.nvidia.com/grid/latest/grid-vgpu-user-guide/index.html#using-gpu-pass-through-red-hat-el-qemu-cli)
+- https://gitlab.com/nvidia/container-images/driver/-/tree/master
+- https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers
--- a/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md
+++ b/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md
@@ -1,293 +0,0 @@
-# Using Nvidia GPU device with Kata Containers
-
-An Nvidia GPU device can be passed to a Kata Containers container using GPU passthrough
-(Nvidia GPU pass-through mode) as well as GPU mediated passthrough (Nvidia vGPU mode). 
-
-Nvidia GPU pass-through mode, an entire physical GPU is directly assigned to one VM,
-bypassing the Nvidia Virtual GPU Manager. In this mode of operation, the GPU is accessed
-exclusively by the Nvidia driver running in the VM to which it is assigned.
-The GPU is not shared among VMs.
-
-Nvidia Virtual GPU (vGPU) enables multiple virtual machines (VMs) to have simultaneous,
-direct access to a single physical GPU, using the same Nvidia graphics drivers that are
-deployed on non-virtualized operating systems. By doing this, Nvidia vGPU provides VMs
-with unparalleled graphics performance, compute performance, and application compatibility,
-together with the cost-effectiveness and scalability brought about by sharing a GPU
-among multiple workloads.
-
-| Technology | Description | Behaviour | Detail |
-| --- | --- | --- | --- |
-| Nvidia GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
-| Nvidia vGPU mode | GPU sharing | Physical GPU shared by multiple VMs | Mediated passthrough |
-
-## Hardware Requirements
-Nvidia GPUs Recommended for Virtualization:
-
- Nvidia Tesla (T4, M10, P6, V100 or newer)
- Nvidia Quadro RTX 6000/8000
-
-## Host BIOS Requirements
-
-Some hardware requires a larger PCI BARs window, for example, Nvidia Tesla P100, K40m
-```
-$ lspci -s 04:00.0 -vv | grep Region
-      Region 0: Memory at c6000000 (32-bit, non-prefetchable) [size=16M]
-      Region 1: Memory at 383800000000 (64-bit, prefetchable) [size=16G] #above 4G
-      Region 3: Memory at 383c00000000 (64-bit, prefetchable) [size=32M]
-```
-
-For large BARs devices, MMIO mapping above 4G address space should be `enabled`
-in the PCI configuration of the BIOS.
-
-Some hardware vendors use different name in BIOS, such as:
-
- Above 4G Decoding
- Memory Hole for PCI MMIO
- Memory Mapped I/O above 4GB
-
-The following steps outline the workflow for using an Nvidia GPU with Kata.
-
-## Host Kernel Requirements
-The following configurations need to be enabled on your host kernel:
-
- `CONFIG_VFIO`
- `CONFIG_VFIO_IOMMU_TYPE1`
- `CONFIG_VFIO_MDEV`
- `CONFIG_VFIO_MDEV_DEVICE`
- `CONFIG_VFIO_PCI`
-
-Your host kernel needs to be booted with `intel_iommu=on` on the kernel command line.
-
-## Install and configure Kata Containers
-To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata version 1.3.0 or above.
-Follow the [Kata Containers setup instructions](../install/README.md)
-to install the latest version of Kata.
-
-To use large BARs devices (for example, Nvidia Tesla P100), you need Kata version 1.11.0 or above.
-
-The following configuration in the Kata `configuration.toml` file as shown below can work:
-
-Hotplug for PCI devices by `acpi_pcihp` (Linux's ACPI PCI Hotplug driver):
-```
-machine_type = "q35"
-
-hotplug_vfio_on_root_bus = false
-```
-
-Hotplug for PCIe devices by `pciehp` (Linux's PCIe Hotplug driver):
-```
-machine_type = "q35"
-
-hotplug_vfio_on_root_bus = true
-pcie_root_port = 1
-```
-
-## Build Kata Containers kernel with GPU support
-The default guest kernel installed with Kata Containers does not provide GPU support.
-To use an Nvidia GPU with Kata Containers, you need to build a kernel with the
-necessary GPU support.
-
-The following kernel config options need to be enabled:
-```
-# Support PCI/PCIe device hotplug (Required for large BARs device)
-CONFIG_HOTPLUG_PCI_PCIE=y
-
-# Support for loading modules (Required for load Nvidia drivers)
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-
-# Enable the MMIO access method for PCIe devices (Required for large BARs device)
-CONFIG_PCI_MMCONFIG=y
-```
-
-The following kernel config options need to be disabled:
-```
-# Disable Open Source Nvidia driver nouveau
-# It conflicts with Nvidia official driver
-CONFIG_DRM_NOUVEAU=n
-```
-> **Note**: `CONFIG_DRM_NOUVEAU` is normally disabled by default.
-It is worth checking that it is not enabled in your kernel configuration to prevent any conflicts.
-
-
-Build the Kata Containers kernel with the previous config options,
-using the instructions described in [Building Kata Containers kernel](../../tools/packaging/kernel).
-For further details on building and installing guest kernels,
-see [the developer guide](../Developer-Guide.md#install-guest-kernel-images).
-
-There is an easy way to build a guest kernel that supports Nvidia GPU:
-```
-## Build guest kernel with ../../tools/packaging/kernel
-
-# Prepare (download guest kernel source, generate .config)
-$ ./build-kernel.sh -v 4.19.86 -g nvidia -f setup
-
-# Build guest kernel
-$ ./build-kernel.sh -v 4.19.86 -g nvidia build
-
-# Install guest kernel
-$ sudo -E ./build-kernel.sh -v 4.19.86 -g nvidia install
-/usr/share/kata-containers/vmlinux-nvidia-gpu.container -> vmlinux-4.19.86-70-nvidia-gpu
-/usr/share/kata-containers/vmlinuz-nvidia-gpu.container -> vmlinuz-4.19.86-70-nvidia-gpu
-```
-
-To build Nvidia Driver in Kata container, `kernel-devel` is required.  
-This is a way to generate rpm packages for `kernel-devel`:
-```
-$ cd kata-linux-4.19.86-68
-$ make rpm-pkg
-Output RPMs:
-~/rpmbuild/RPMS/x86_64/kernel-devel-4.19.86_nvidia_gpu-1.x86_64.rpm
-```
-> **Note**:
-> - `kernel-devel` should be installed in Kata container before run Nvidia driver installer.
-> - Run `make deb-pkg` to build the deb package.
-
-Before using the new guest kernel, please update the `kernel` parameters in `configuration.toml`.
-```
-kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
-```
-
-## Nvidia GPU pass-through mode with Kata Containers
-Use the following steps to pass an Nvidia GPU device in pass-through mode with Kata:
-
-1. Find the Bus-Device-Function (BDF) for GPU device on host:
-   ```
-   $ sudo lspci -nn -D | grep -i nvidia
-   0000:04:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
-   0000:84:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
-   ```
-   > PCI address `0000:04:00.0` is assigned to the hardware GPU device.
-   > `10de:15f8` is the device ID of the hardware GPU device.
-
-2. Find the IOMMU group for the GPU device:
-   ```
-   $ BDF="0000:04:00.0"
-   $ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
-   /sys/kernel/iommu_groups/45
-   ```
-   The previous output shows that the GPU belongs to IOMMU group 45.
-
-3. Check the IOMMU group number under `/dev/vfio`:
-   ```
-   $ ls -l /dev/vfio
-   total 0
-   crw------- 1 root root 248,   0 Feb 28 09:57 45
-   crw------- 1 root root 248,   1 Feb 28 09:57 54
-   crw-rw-rw- 1 root root  10, 196 Feb 28 09:57 vfio
-   ```
-
-4. Start a Kata container with GPU device:
-   ```
-   $ sudo docker run -it --runtime=kata-runtime --cap-add=ALL --device /dev/vfio/45 centos /bin/bash
-   ```
-
-5. Run `lspci` within the container to verify the GPU device is seen in the list
-   of the PCI devices. Note the vendor-device id of the GPU (`10de:15f8`) in the `lspci` output.
-   ```
-   $ lspci -nn -D | grep '10de:15f8'
-   0000:01:01.0 3D controller [0302]: NVIDIA Corporation GP100GL [Tesla P100 PCIe 16GB] [10de:15f8] (rev a1)
-   ```
-
-6. Additionally, you can check the PCI BARs space of the Nvidia GPU device in the container:
-   ```
-   $ lspci -s 01:01.0 -vv | grep Region
-		Region 0: Memory at c0000000 (32-bit, non-prefetchable) [disabled] [size=16M]
-		Region 1: Memory at 4400000000 (64-bit, prefetchable) [disabled] [size=16G]
-		Region 3: Memory at 4800000000 (64-bit, prefetchable) [disabled] [size=32M]
-   ```
-   > **Note**: If you see a message similar to the above, the BAR space of the Nvidia
-   > GPU has been successfully allocated.
-
-## Nvidia vGPU mode with Kata Containers
-
-Nvidia vGPU is a licensed product on all supported GPU boards. A software license
-is required to enable all vGPU features within the guest VM.
-
-> **Note**: There is no suitable test environment, so it is not written here.
-
-
-## Install Nvidia Driver in Kata Containers
-Download the official Nvidia driver from
-[https://www.nvidia.com/Download/index.aspx](https://www.nvidia.com/Download/index.aspx),
-for example `NVIDIA-Linux-x86_64-418.87.01.run`.
-
-Install the `kernel-devel`(generated in the previous steps) for guest kernel:
-```
-$ sudo rpm -ivh kernel-devel-4.19.86_gpu-1.x86_64.rpm
-```
-
-Here is an example to extract, compile and install Nvidia driver:
-```
-## Extract
-$ sh ./NVIDIA-Linux-x86_64-418.87.01.run -x
-
-## Compile and install (It will take some time)
-$ cd NVIDIA-Linux-x86_64-418.87.01
-$ sudo ./nvidia-installer -a -q --ui=none \
- --no-cc-version-check \
- --no-opengl-files --no-install-libglvnd \
- --kernel-source-path=/usr/src/kernels/`uname -r`
-```
-
-Or just run one command line:
-```
-$ sudo sh ./NVIDIA-Linux-x86_64-418.87.01.run -a -q --ui=none \
- --no-cc-version-check \
- --no-opengl-files --no-install-libglvnd \
- --kernel-source-path=/usr/src/kernels/`uname -r`
-```
-
-To view detailed logs of the installer:
-```
-$ tail -f /var/log/nvidia-installer.log
-```
-
-Load Nvidia driver module manually
-```
-# Optional（generate modules.dep and map files for Nvidia driver）
-$ sudo depmod
-
-# Load module
-$ sudo modprobe nvidia-drm
-
-# Check module
-$ lsmod | grep nvidia
-nvidia_drm             45056  0
-nvidia_modeset       1093632  1 nvidia_drm
-nvidia              18202624  1 nvidia_modeset
-drm_kms_helper        159744  1 nvidia_drm
-drm                   364544  3 nvidia_drm,drm_kms_helper
-i2c_core               65536  3 nvidia,drm_kms_helper,drm
-ipmi_msghandler        49152  1 nvidia
-```
-
-
-Check Nvidia device status with `nvidia-smi`
-```
-$ nvidia-smi
-Tue Mar  3 00:03:49 2020
-+-----------------------------------------------------------------------------+
-| NVIDIA-SMI 418.87.01    Driver Version: 418.87.01    CUDA Version: 10.1     |
-|-------------------------------+----------------------+----------------------+
-| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
-| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
-|===============================+======================+======================|
-|   0  Tesla P100-PCIE...  Off  | 00000000:01:01.0 Off |                    0 |
-| N/A   27C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
-+-------------------------------+----------------------+----------------------+
-
-+-----------------------------------------------------------------------------+
-| Processes:                                                       GPU Memory |
-|  GPU       PID   Type   Process name                             Usage      |
-|=============================================================================|
-|  No running processes found                                                 |
-+-----------------------------------------------------------------------------+
-
-```
-
-## References
-
- [Configuring a VM for GPU Pass-Through by Using the QEMU Command Line](https://docs.nvidia.com/grid/latest/grid-vgpu-user-guide/index.html#using-gpu-pass-through-red-hat-el-qemu-cli)
- https://gitlab.com/nvidia/container-images/driver/-/tree/master
- https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers
--- a/docs/use-cases/using-Intel-QAT-and-kata.md
+++ b/docs/use-cases/using-Intel-QAT-and-kata.md
@@ -312,7 +312,7 @@ working properly with the Kata Containers VM.

 ### Build OpenSSL Intel® QAT engine container

-Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/master/demo/openssl-qat-engine) 
+Use the OpenSSL Intel® QAT [Dockerfile](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/demo/openssl-qat-engine) 
 to build a container image with an optimized OpenSSL engine for 
 Intel® QAT. Using `docker build` with the Kata Containers runtime can sometimes
 have issues. Therefore, make sure that `runc` is the default Docker container 
@@ -444,7 +444,7 @@ $ sudo docker save -o openssl-qat-engine.tar openssl-qat-engine:latest
 $ sudo ctr -n=k8s.io images import openssl-qat-engine.tar
 ```

-The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/master/cmd/qat_plugin/README.md)
+The [Intel® QAT Plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/qat_plugin/README.md)
 needs to be started so that the virtual functions can be discovered and
 used by Kubernetes. 

--- a/snap/local/README.md
+++ b/snap/local/README.md
@@ -22,21 +22,35 @@ $ sudo snap install kata-containers --classic

 ## Build and install snap image

-Run next command at the root directory of the packaging repository.
+Run the command below which will use the packaging Makefile to build the snap image:

 ```sh
-$ make snap
+$ make -C tools/packaging snap
 ```

+> **Warning:**
+>
+> By default, `snapcraft` will create a clean virtual machine
+> environment to build the snap in using the `multipass` tool.
+>
+> However, `multipass` is silently disabled when `--destructive-mode` is
+> used.
+>
+> Since building the Kata Containers package currently requires
+> `--destructive-mode`, the snap will be built using the host
+> environment. To avoid parts of the build auto-detecting additional
+> features to enable (for example for QEMU), we recommend that you
+> only run the snap build in a minimal host environment.
+
 To install the resulting snap image, snap must be put in [classic mode][3] and the
-security confinement must be disabled (*--classic*). Also since the resulting snap
-has not been signed the verification of signature must be omitted (*--dangerous*).
+security confinement must be disabled (`--classic`). Also since the resulting snap
+has not been signed the verification of signature must be omitted (`--dangerous`).

 ```sh
-$ sudo snap install --classic --dangerous kata-containers_[VERSION]_[ARCH].snap
+$ sudo snap install --classic --dangerous "kata-containers_${version}_${arch}.snap"
 ```

-Replace `VERSION` with the current version of Kata Containers and `ARCH` with
+Replace `${version}` with the current version of Kata Containers and `${arch}` with
 the system architecture.

 ## Configure Kata Containers
@@ -76,12 +90,12 @@ then a new configuration file can be [created](#configure-kata-containers)
 and [configured][7].

 [1]: https://docs.snapcraft.io/snaps/intro
-[2]: ../docs/design/architecture/README.md#root-filesystem-image
+[2]: ../../docs/design/architecture/README.md#root-filesystem-image
 [3]: https://docs.snapcraft.io/reference/confinement#classic
-[4]: https://github.com/kata-containers/runtime#configuration
+[4]: https://github.com/kata-containers/kata-containers/tree/main/src/runtime#configuration
 [5]: https://docs.docker.com/engine/reference/commandline/dockerd
-[6]: ../docs/install/docker/ubuntu-docker-install.md
-[7]: ../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
+[6]: ../../docs/install/docker/ubuntu-docker-install.md
+[7]: ../../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
 [8]: https://snapcraft.io/kata-containers
-[9]: ../docs/Developer-Guide.md#run-kata-containers-with-docker
-[10]: ../docs/Developer-Guide.md#run-kata-containers-with-kubernetes
+[9]: ../../docs/Developer-Guide.md#run-kata-containers-with-docker
+[10]: ../../docs/Developer-Guide.md#run-kata-containers-with-kubernetes
--- a/snap/local/snap-common.sh
+++ b/snap/local/snap-common.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Description: Idempotent script to be sourced by all parts in a
+#   snapcraft config file.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+# XXX: Bash-specific code. zsh doesn't support this option and that *does*
+# matter if this script is run sourced... since it'll be using zsh! ;)
+[ -n "$BASH_VERSION" ] && set -o errtrace
+
+[ -n "${DEBUG:-}" ] && set -o xtrace
+
+die()
+{
+	echo >&2 "ERROR: $0: $*"
+}
+
+[ -n "${SNAPCRAFT_STAGE:-}" ] ||\
+	die "must be sourced from a snapcraft config file"
+
+snap_yq_version=3.4.1
+
+snap_common_install_yq()
+{
+      export yq="${SNAPCRAFT_STAGE}/bin/yq"
+
+      local yq_pkg
+      yq_pkg="github.com/mikefarah/yq"
+
+      local yq_url
+      yq_url="https://${yq_pkg}/releases/download/${snap_yq_version}/yq_${goos}_${goarch}"
+      curl -o "${yq}" -L "${yq_url}"
+      chmod +x "${yq}"
+}
+
+# Function that should be called for each snap "part" in
+# snapcraft.yaml.
+snap_common_main()
+{
+	# Architecture
+	arch="$(uname -m)"
+
+	case "${arch}" in
+		aarch64)
+			goarch="arm64"
+			qemu_arch="${arch}"
+			;;
+
+		ppc64le)
+			goarch="ppc64le"
+			qemu_arch="ppc64"
+			;;
+
+		s390x)
+			goarch="${arch}"
+			qemu_arch="${arch}"
+			;;
+
+		x86_64)
+			goarch="amd64"
+			qemu_arch="${arch}"
+			;;
+
+		*) die "unsupported architecture: ${arch}" ;;
+	esac
+
+	dpkg_arch=$(dpkg --print-architecture)
+
+	# golang
+	#
+	# We need the O/S name in golang format, but since we don't
+	# know if the godeps part has run, we don't know if golang is
+	# available yet, hence fall back to a standard system command.
+	goos="$(go env GOOS &>/dev/null || true)"
+	[ -z "$goos" ] && goos=$(uname -s|tr '[A-Z]' '[a-z]')
+
+	export GOROOT="${SNAPCRAFT_STAGE}"
+	export GOPATH="${GOROOT}/gopath"
+	export GO111MODULE="auto"
+
+	mkdir -p "${GOPATH}/bin"
+	export PATH="${GOPATH}/bin:${PATH}"
+
+	# Proxy
+	export http_proxy="${http_proxy:-}"
+	export https_proxy="${https_proxy:-}"
+
+	# Binaries
+	mkdir -p "${SNAPCRAFT_STAGE}/bin"
+
+	export PATH="$PATH:${SNAPCRAFT_STAGE}/bin"
+
+	# YAML query tool
+	export yq="${SNAPCRAFT_STAGE}/bin/yq"
+
+	# Kata paths
+	export kata_dir=$(printf "%s/src/github.com/%s/%s" \
+		"${GOPATH}" \
+		"${SNAPCRAFT_PROJECT_NAME}" \
+		"${SNAPCRAFT_PROJECT_NAME}")
+
+	export versions_file="${kata_dir}/versions.yaml"
+
+	[ -n "${yq:-}" ] && [ -x "${yq:-}" ] || snap_common_install_yq
+}
+
+snap_common_main
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -1,4 +1,5 @@
 name: kata-containers
+website: https://github.com/kata-containers/kata-containers
 summary: Build lightweight VMs that seamlessly plug into the containers ecosystem
 description: |
  Kata Containers is an open source project and community working to build a
@@ -18,20 +19,18 @@ parts:
      - git
      - git-extras
    override-pull: |
-      version="9999"
-      kata_url="https://github.com/kata-containers/kata-containers"
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"

-      if echo "${GITHUB_REF}" | grep -q -E "^refs/tags"; then
-        version=$(echo ${GITHUB_REF} | cut -d/ -f3)
+      version="9999"
+
+      if echo "${GITHUB_REF:-}" | grep -q -E "^refs/tags"; then
+        version=$(echo ${GITHUB_REF:-} | cut -d/ -f3)
        git checkout ${version}
      fi

      snapcraftctl set-grade "stable"
      snapcraftctl set-version "${version}"

-      # setup GOPATH - this repo dir should be there
-      export GOPATH=${SNAPCRAFT_STAGE}/gopath
-      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
      mkdir -p $(dirname ${kata_dir})
      ln -sf $(realpath "${SNAPCRAFT_STAGE}/..") ${kata_dir}

@@ -43,27 +42,12 @@ parts:
    build-packages:
      - curl
    override-build: |
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
+
      # put everything in stage
-      cd ${SNAPCRAFT_STAGE}
+      cd "${SNAPCRAFT_STAGE}"

-      yq_path="./yq"
-      yq_pkg="github.com/mikefarah/yq"
-      goos="linux"
-      case "$(uname -m)" in
-        aarch64) goarch="arm64";;
-        ppc64le) goarch="ppc64le";;
-        x86_64) goarch="amd64";;
-        s390x) goarch="s390x";;
-        *) echo "unsupported architecture: $(uname -m)"; exit 1;;
-      esac
-
-      yq_version=3.4.1
-      yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}"
-      curl -o "${yq_path}" -L "${yq_url}"
-      chmod +x "${yq_path}"
-
-      kata_dir=gopath/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
-      version="$(${yq_path} r ${kata_dir}/versions.yaml languages.golang.meta.newest-version)"
+      version="$(${yq} r ${kata_dir}/versions.yaml languages.golang.meta.newest-version)"
      tarfile="go${version}.${goos}-${goarch}.tar.gz"
      curl -LO https://golang.org/dl/${tarfile}
      tar -xf ${tarfile} --strip-components=1
@@ -80,28 +64,17 @@ parts:
      - uidmap
      - gnupg2
    override-build: |
-      [ "$(uname -m)" = "ppc64le" ] || [ "$(uname -m)" = "s390x" ] && sudo apt-get --no-install-recommends install -y protobuf-compiler
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"

-      yq=${SNAPCRAFT_STAGE}/yq
+      [ "${arch}" = "ppc64le" ] || [ "${arch}" = "s390x" ] && sudo apt-get --no-install-recommends install -y protobuf-compiler

-      # set GOPATH
-      export GOPATH=${SNAPCRAFT_STAGE}/gopath
-      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
-
-      export GOROOT=${SNAPCRAFT_STAGE}
-      export PATH="${GOROOT}/bin:${PATH}"
-      export GO111MODULE="auto"
-
-      http_proxy=${http_proxy:-""}
-      https_proxy=${https_proxy:-""}
      if [ -n "$http_proxy" ]; then
        echo "Setting proxy $http_proxy"
-        sudo -E systemctl set-environment http_proxy=$http_proxy || true
-        sudo -E systemctl set-environment https_proxy=$https_proxy || true
+        sudo -E systemctl set-environment http_proxy="$http_proxy" || true
+        sudo -E systemctl set-environment https_proxy="$https_proxy" || true
      fi

      # Copy yq binary. It's used in the container
-      mkdir -p "${GOPATH}/bin/"
      cp -a "${yq}" "${GOPATH}/bin/"

      echo "Unmasking docker service"
@@ -112,63 +85,54 @@ parts:
      echo "Starting docker"
      sudo -E systemctl start docker || true

-      cd ${kata_dir}/tools/osbuilder
+      cd "${kata_dir}/tools/osbuilder"

      # build image
      export AGENT_INIT=yes
      export USE_DOCKER=1
      export DEBUG=1
-      arch="$(uname -m)"
      initrd_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.initrd.architecture.${arch}.name)
      image_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.image.architecture.${arch}.name)
      case "$arch" in
        x86_64)
          # In some build systems it's impossible to build a rootfs image, try with the initrd image
-          sudo -E PATH=$PATH make image DISTRO=${image_distro} || sudo -E PATH=$PATH make initrd DISTRO=${initrd_distro}
+          sudo -E PATH=$PATH make image DISTRO="${image_distro}" || sudo -E PATH="$PATH" make initrd DISTRO="${initrd_distro}"
        ;;

        aarch64|ppc64le|s390x)
-          sudo -E PATH=$PATH make initrd DISTRO=${initrd_distro}
+          sudo -E PATH="$PATH" make initrd DISTRO="${initrd_distro}"
        ;;

-        *) echo "unsupported architecture: $(uname -m)"; exit 1;;
+        *) die "unsupported architecture: ${arch}" ;;
      esac

      # Install image
-      kata_image_dir=${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers
-      mkdir -p ${kata_image_dir}
-      cp kata-containers*.img ${kata_image_dir}
+      kata_image_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers"
+      mkdir -p "${kata_image_dir}"
+      cp kata-containers*.img "${kata_image_dir}"

  runtime:
    after: [godeps, image, cloud-hypervisor]
    plugin: nil
    build-attributes: [no-patchelf]
    override-build: |
-      # set GOPATH
-      export GOPATH=${SNAPCRAFT_STAGE}/gopath
-      export GOROOT=${SNAPCRAFT_STAGE}
-      export PATH="${GOROOT}/bin:${PATH}"
-      export GO111MODULE="auto"
-      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"

-      cd ${kata_dir}/src/runtime
+      cd "${kata_dir}/src/runtime"

-      # setup arch
-      arch=$(uname -m)
-      if [ ${arch} = "ppc64le" ]; then
-        arch="ppc64"
-      fi
+      qemu_cmd="qemu-system-${qemu_arch}"

      # build and install runtime
      make \
-        PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr \
+        PREFIX="/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr" \
        SKIP_GO_VERSION_CHECK=1 \
-        QEMUCMD=qemu-system-$arch
+        QEMUCMD="${qemu_cmd}"
+
      make install \
        PREFIX=/usr \
-        DESTDIR=${SNAPCRAFT_PART_INSTALL} \
+        DESTDIR="${SNAPCRAFT_PART_INSTALL}" \
        SKIP_GO_VERSION_CHECK=1 \
-        QEMUCMD=qemu-system-$arch
+        QEMUCMD="${qemu_cmd}"

      if [ ! -f ${SNAPCRAFT_PART_INSTALL}/../../image/install/usr/share/kata-containers/kata-containers.img ]; then
        sed -i -e "s|^image =.*|initrd = \"/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share/kata-containers/kata-containers-initrd.img\"|" \
@@ -185,44 +149,37 @@ parts:
      - bison
      - flex
    override-build: |
-      yq=${SNAPCRAFT_STAGE}/yq
-      export PATH="${PATH}:${SNAPCRAFT_STAGE}"
-      export GOPATH=${SNAPCRAFT_STAGE}/gopath
-      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
-      versions_file="${kata_dir}/versions.yaml"
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
+
      kernel_version="$(${yq} r $versions_file assets.kernel.version)"
      #Remove extra 'v'
-      kernel_version=${kernel_version#v}
+      kernel_version="${kernel_version#v}"

-      [ "$(uname -m)" = "s390x" ] && sudo apt-get --no-install-recommends install -y libssl-dev
+      [ "${arch}" = "s390x" ] && sudo apt-get --no-install-recommends install -y libssl-dev

-      export GOPATH=${SNAPCRAFT_STAGE}/gopath
-      export GO111MODULE="auto"
-      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
-
-      cd ${kata_dir}/tools/packaging/kernel
+      cd "${kata_dir}/tools/packaging/kernel"
      kernel_dir_prefix="kata-linux-"

      # Setup and build kernel
-      ./build-kernel.sh -v ${kernel_version} -d setup
+      ./build-kernel.sh -v "${kernel_version}" -d setup
      cd ${kernel_dir_prefix}*
      make -j $(($(nproc)-1)) EXTRAVERSION=".container"

-      kernel_suffix=${kernel_version}.container
-      kata_kernel_dir=${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers
-      mkdir -p ${kata_kernel_dir}
+      kernel_suffix="${kernel_version}.container"
+      kata_kernel_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers"
+      mkdir -p "${kata_kernel_dir}"

      # Install bz kernel
-      make install INSTALL_PATH=${kata_kernel_dir} EXTRAVERSION=".container" || true
-      vmlinuz_name=vmlinuz-${kernel_suffix}
-      ln -sf ${vmlinuz_name} ${kata_kernel_dir}/vmlinuz.container
+      make install INSTALL_PATH="${kata_kernel_dir}" EXTRAVERSION=".container" || true
+      vmlinuz_name="vmlinuz-${kernel_suffix}"
+      ln -sf "${vmlinuz_name}" "${kata_kernel_dir}/vmlinuz.container"

      # Install raw kernel
-      vmlinux_path=vmlinux
-      [ "$(uname -m)" = "s390x" ] && vmlinux_path=arch/s390/boot/compressed/vmlinux
-      vmlinux_name=vmlinux-${kernel_suffix}
-      cp ${vmlinux_path} ${kata_kernel_dir}/${vmlinux_name}
-      ln -sf ${vmlinux_name} ${kata_kernel_dir}/vmlinux.container
+      vmlinux_path="vmlinux"
+      [ "${arch}" = "s390x" ] && vmlinux_path="arch/s390/boot/compressed/vmlinux"
+      vmlinux_name="vmlinux-${kernel_suffix}"
+      cp "${vmlinux_path}" "${kata_kernel_dir}/${vmlinux_name}"
+      ln -sf "${vmlinux_name}" "${kata_kernel_dir}/vmlinux.container"

  qemu:
    plugin: make
@@ -249,12 +206,8 @@ parts:
      - libselinux1-dev
      - ninja-build
    override-build: |
-      yq=${SNAPCRAFT_STAGE}/yq
-      export GOPATH=${SNAPCRAFT_STAGE}/gopath
-      export GO111MODULE="auto"
-      kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"

-      versions_file="${kata_dir}/versions.yaml"
      branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)"
      url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)"
      commit=""
@@ -262,11 +215,11 @@ parts:
      patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}"

      # download source
-      qemu_dir=${SNAPCRAFT_STAGE}/qemu
+      qemu_dir="${SNAPCRAFT_STAGE}/qemu"
      rm -rf "${qemu_dir}"
      git clone --depth 1 --branch ${branch} --single-branch ${url} "${qemu_dir}"
-      cd ${qemu_dir}
-      [ -z "${commit}" ] || git checkout ${commit}
+      cd "${qemu_dir}"
+      [ -z "${commit}" ] || git checkout "${commit}"

      [ -n "$(ls -A ui/keycodemapdb)" ] || git clone --depth 1 https://github.com/qemu/keycodemapdb ui/keycodemapdb/
      [ -n "$(ls -A capstone)" ] || git clone --depth 1 https://github.com/qemu/capstone capstone
@@ -277,10 +230,10 @@ parts:
      ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}"

      # Only x86_64 supports libpmem
-      [ "$(uname -m)" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev
+      [ "${arch}" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev

-      configure_hypervisor=${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh
-      chmod +x ${configure_hypervisor}
+      configure_hypervisor="${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh"
+      chmod +x "${configure_hypervisor}"
      # static build. The --prefix, --libdir, --libexecdir, --datadir arguments are
      # based on PREFIX and set by configure-hypervisor.sh
      echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \
@@ -290,17 +243,17 @@ parts:
      # Copy QEMU configurations (Kconfigs)
      case "${branch}" in
      "v5.1.0")
-        cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* default-configs
+        cp -a "${kata_dir}"/tools/packaging/qemu/default-configs/* default-configs
        ;;

      *)
-        cp -a ${kata_dir}/tools/packaging/qemu/default-configs/* configs/devices/
+        cp -a "${kata_dir}"/tools/packaging/qemu/default-configs/* configs/devices/
        ;;
      esac

      # build and install
      make -j $(($(nproc)-1))
-      make install DESTDIR=${SNAPCRAFT_PART_INSTALL}
+      make install DESTDIR="${SNAPCRAFT_PART_INSTALL}"
    prime:
      - -snap/
      - -usr/bin/qemu-ga
@@ -316,26 +269,66 @@ parts:
      # Hack: move qemu to /
      "snap/kata-containers/current/": "./"

+  virtiofsd:
+    plugin: nil
+    after: [godeps]
+    override-build: |
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
+
+      # Currently, only one platform uses the new rust virtiofsd. The
+      # others make use of QEMU's C implementation.
+      #
+      # See "tools/packaging/scripts/configure-hypervisor.sh".
+      if [ "${arch}" = 'x86_64' ]
+      then
+          echo "INFO: Building rust version of virtiofsd"
+      else
+          echo "INFO: Building QEMU's C version of virtiofsd"
+          # Handled by the 'qemu' part, so nothing more to do here.
+          exit 0
+      fi
+
+      cd "${kata_dir}"
+
+      # Download the rust implementation of virtiofsd
+      tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh
+      sudo install \
+        --owner='root' \
+        --group='root' \
+        --mode=0755 \
+        -D \
+        --target-directory="${SNAPCRAFT_PART_INSTALL}/usr/libexec/" \
+        virtiofsd/virtiofsd
+
  cloud-hypervisor:
    plugin: nil
    after: [godeps]
    override-build: |
-      arch=$(uname -m)
-      if [ "{$arch}" == "aarch64" ] || [ "${arch}" == "x64_64" ]; then 
+      source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
+
+      if [ "${arch}" == "aarch64" ] || [ "${arch}" == "x86_64" ]; then
          sudo apt-get -y update
          sudo apt-get -y install ca-certificates curl gnupg lsb-release
-          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
-          echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+          curl -fsSL https://download.docker.com/linux/ubuntu/gpg |\
+              sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
+          distro_codename=$(lsb_release -cs)
+          echo "deb [arch=${dpkg_arch} signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu ${distro_codename} stable" |\
+              sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
          sudo apt-get -y update
          sudo apt-get -y install docker-ce docker-ce-cli containerd.io
          sudo systemctl start docker.socket

-          export GOPATH=${SNAPCRAFT_STAGE}/gopath
-          kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME}
-          cd ${kata_dir}
+          cd "${SNAPCRAFT_PROJECT_DIR}"
          sudo -E NO_TTY=true make cloud-hypervisor-tarball
-          tar xvJpf build/kata-static-cloud-hypervisor.tar.xz -C /tmp/
-          install -D /tmp/opt/kata/bin/cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor
+
+          tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-cloud-hypervisor.tar.xz"
+          tmpdir=$(mktemp -d)
+
+          tar -xvJpf "${tarfile}" -C "${tmpdir}"
+
+          install -D "${tmpdir}/opt/kata/bin/cloud-hypervisor" "${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor"
+
+          rm -rf "${tmpdir}"
      fi

 apps:
--- a/src/agent/Cargo.lock
+++ b/src/agent/Cargo.lock
@@ -1370,6 +1370,7 @@ dependencies = [
 "async-trait",
 "capctl",
 "caps",
+ "cfg-if 0.1.10",
 "cgroups-rs",
 "futures",
 "inotify",
--- a/src/agent/Cargo.toml
+++ b/src/agent/Cargo.toml
@@ -76,3 +76,8 @@ lto = true

 [features]
 seccomp = ["rustjail/seccomp"]
+standard-oci-runtime = ["rustjail/standard-oci-runtime"]
+
+[[bin]]
+name = "kata-agent"
+path = "src/main.rs"
--- a/src/agent/Makefile
+++ b/src/agent/Makefile
@@ -14,10 +14,6 @@ PROJECT_COMPONENT = kata-agent

 TARGET = $(PROJECT_COMPONENT)

-SOURCES := \
-  $(shell find . 2>&1 | grep -E '.*\.rs$$') \
-  Cargo.toml
-
 VERSION_FILE := ./VERSION
 VERSION := $(shell grep -v ^\# $(VERSION_FILE))
 COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true)
@@ -37,8 +33,16 @@ ifeq ($(SECCOMP),yes)
    override EXTRA_RUSTFEATURES += seccomp
 endif

+##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature
+STANDARD_OCI_RUNTIME := no
+
+# Enable standard oci runtime feature of rust build
+ifeq ($(STANDARD_OCI_RUNTIME),yes)
+    override EXTRA_RUSTFEATURES += standard-oci-runtime
+endif
+
 ifneq ($(EXTRA_RUSTFEATURES),)
-    override EXTRA_RUSTFEATURES := --features $(EXTRA_RUSTFEATURES)
+    override EXTRA_RUSTFEATURES := --features "$(EXTRA_RUSTFEATURES)"
 endif

 include ../../utils.mk
@@ -108,14 +112,14 @@ $(TARGET): $(GENERATED_CODE) logging-crate-tests $(TARGET_PATH)
 logging-crate-tests:
 	make -C $(CWD)/../libs/logging

-$(TARGET_PATH): $(SOURCES) | show-summary
+$(TARGET_PATH): show-summary
 	@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)

 $(GENERATED_FILES): %: %.in
 	@sed $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') "$<" > "$@"

 ##TARGET optimize: optimized  build
-optimize: $(SOURCES) | show-summary show-header
+optimize: show-summary show-header
 	@RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)

 ##TARGET install: install agent
--- a/src/agent/rustjail/Cargo.toml
+++ b/src/agent/rustjail/Cargo.toml
@@ -25,6 +25,7 @@ path-absolutize = "1.2.0"
 anyhow = "1.0.32"
 cgroups = { package = "cgroups-rs", version = "0.2.8" }
 rlimit = "0.5.3"
+cfg-if = "0.1.0"

 tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
 futures = "0.3.17"
@@ -38,3 +39,4 @@ tempfile = "3.1.0"

 [features]
 seccomp = ["libseccomp"]
+standard-oci-runtime = []
--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -391,7 +391,7 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool

    if let Some(swappiness) = memory.swappiness {
        if (0..=100).contains(&swappiness) {
-            mem_controller.set_swappiness(swappiness as u64)?;
+            mem_controller.set_swappiness(swappiness)?;
        } else {
            return Err(anyhow!(
                "invalid value:{}. valid memory swappiness range is 0-100",
@@ -590,9 +590,9 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField<CpuUsage> {

        let h = lines_to_map(&cpuacct.stat);
        let usage_in_usermode =
-            (((*h.get("user").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
+            (((*h.get("user").unwrap_or(&0) * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
        let usage_in_kernelmode =
-            (((*h.get("system").unwrap() * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;
+            (((*h.get("system").unwrap_or(&0) * NANO_PER_SECOND) as f64) / *CLOCK_TICKS) as u64;

        let total_usage = cpuacct.usage;

@@ -623,9 +623,9 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField<CpuUsage> {
    let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg);
    let stat = cpu_controller.cpu().stat;
    let h = lines_to_map(&stat);
-    let usage_in_usermode = *h.get("user_usec").unwrap();
-    let usage_in_kernelmode = *h.get("system_usec").unwrap();
-    let total_usage = *h.get("usage_usec").unwrap();
+    let usage_in_usermode = *h.get("user_usec").unwrap_or(&0);
+    let usage_in_kernelmode = *h.get("system_usec").unwrap_or(&0);
+    let total_usage = *h.get("usage_usec").unwrap_or(&0);
    let percpu_usage = vec![];

    SingularPtrField::some(CpuUsage {
--- a/src/agent/rustjail/src/console.rs
+++ b/src/agent/rustjail/src/console.rs
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2021 Sony Group Corporation
+//
+
+use anyhow::{anyhow, Result};
+use nix::errno::Errno;
+use nix::pty;
+use nix::sys::{socket, uio};
+use nix::unistd::{self, dup2};
+use std::os::unix::io::{AsRawFd, RawFd};
+use std::path::Path;
+
+pub fn setup_console_socket(csocket_path: &str) -> Result<Option<RawFd>> {
+    if csocket_path.is_empty() {
+        return Ok(None);
+    }
+
+    let socket_fd = socket::socket(
+        socket::AddressFamily::Unix,
+        socket::SockType::Stream,
+        socket::SockFlag::empty(),
+        None,
+    )?;
+
+    match socket::connect(
+        socket_fd,
+        &socket::SockAddr::Unix(socket::UnixAddr::new(Path::new(csocket_path))?),
+    ) {
+        Ok(()) => Ok(Some(socket_fd)),
+        Err(errno) => Err(anyhow!("failed to open console fd: {}", errno)),
+    }
+}
+
+pub fn setup_master_console(socket_fd: RawFd) -> Result<()> {
+    let pseudo = pty::openpty(None, None)?;
+
+    let pty_name: &[u8] = b"/dev/ptmx";
+    let iov = [uio::IoVec::from_slice(pty_name)];
+    let fds = [pseudo.master];
+    let cmsg = socket::ControlMessage::ScmRights(&fds);
+
+    socket::sendmsg(socket_fd, &iov, &[cmsg], socket::MsgFlags::empty(), None)?;
+
+    unistd::setsid()?;
+    let ret = unsafe { libc::ioctl(pseudo.slave, libc::TIOCSCTTY) };
+    Errno::result(ret).map_err(|e| anyhow!(e).context("ioctl TIOCSCTTY"))?;
+
+    dup2(pseudo.slave, std::io::stdin().as_raw_fd())?;
+    dup2(pseudo.slave, std::io::stdout().as_raw_fd())?;
+    dup2(pseudo.slave, std::io::stderr().as_raw_fd())?;
+
+    unistd::close(socket_fd)?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::os::unix::net::UnixListener;
+    use tempfile::{self, tempdir};
+
+    const CONSOLE_SOCKET: &str = "console-socket";
+
+    #[test]
+    fn test_setup_console_socket() {
+        let dir = tempdir()
+            .map_err(|e| anyhow!(e).context("tempdir failed"))
+            .unwrap();
+        let socket_path = dir.path().join(CONSOLE_SOCKET);
+
+        let _listener = UnixListener::bind(&socket_path).unwrap();
+
+        let ret = setup_console_socket(socket_path.to_str().unwrap());
+
+        assert!(ret.is_ok());
+    }
+}
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
@@ -23,6 +23,8 @@ use crate::cgroups::fs::Manager as FsManager;
 #[cfg(test)]
 use crate::cgroups::mock::Manager as FsManager;
 use crate::cgroups::Manager;
+#[cfg(feature = "standard-oci-runtime")]
+use crate::console;
 use crate::log_child;
 use crate::process::Process;
 #[cfg(feature = "seccomp")]
@@ -40,7 +42,7 @@ use nix::pty;
 use nix::sched::{self, CloneFlags};
 use nix::sys::signal::{self, Signal};
 use nix::sys::stat::{self, Mode};
-use nix::unistd::{self, fork, ForkResult, Gid, Pid, Uid};
+use nix::unistd::{self, fork, ForkResult, Gid, Pid, Uid, User};
 use std::os::unix::fs::MetadataExt;
 use std::os::unix::io::AsRawFd;

@@ -62,9 +64,7 @@ use rlimit::{setrlimit, Resource, Rlim};
 use tokio::io::AsyncBufReadExt;
 use tokio::sync::Mutex;

-use crate::utils;
-
-const EXEC_FIFO_FILENAME: &str = "exec.fifo";
+pub const EXEC_FIFO_FILENAME: &str = "exec.fifo";

 const INIT: &str = "INIT";
 const NO_PIVOT: &str = "NO_PIVOT";
@@ -74,6 +74,7 @@ const CLOG_FD: &str = "CLOG_FD";
 const FIFO_FD: &str = "FIFO_FD";
 const HOME_ENV_KEY: &str = "HOME";
 const PIDNS_FD: &str = "PIDNS_FD";
+const CONSOLE_SOCKET_FD: &str = "CONSOLE_SOCKET_FD";

 #[derive(Debug)]
 pub struct ContainerStatus {
@@ -82,7 +83,7 @@ pub struct ContainerStatus {
 }

 impl ContainerStatus {
-    fn new() -> Self {
+    pub fn new() -> Self {
        ContainerStatus {
            pre_status: ContainerState::Created,
            cur_status: ContainerState::Created,
@@ -99,6 +100,12 @@ impl ContainerStatus {
    }
 }

+impl Default for ContainerStatus {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 pub type Config = CreateOpts;
 type NamespaceType = String;

@@ -106,7 +113,7 @@ lazy_static! {
    // This locker ensures the child exit signal will be received by the right receiver.
    pub static ref WAIT_PID_LOCKER: Arc<Mutex<bool>> = Arc::new(Mutex::new(false));

-    static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
+    pub static ref NAMESPACES: HashMap<&'static str, CloneFlags> = {
        let mut m = HashMap::new();
        m.insert("user", CloneFlags::CLONE_NEWUSER);
        m.insert("ipc", CloneFlags::CLONE_NEWIPC);
@@ -119,7 +126,7 @@ lazy_static! {
    };

 // type to name hashmap, better to be in NAMESPACES
-    static ref TYPETONAME: HashMap<&'static str, &'static str> = {
+    pub static ref TYPETONAME: HashMap<&'static str, &'static str> = {
        let mut m = HashMap::new();
        m.insert("ipc", "ipc");
        m.insert("user", "user");
@@ -236,6 +243,8 @@ pub struct LinuxContainer {
    pub status: ContainerStatus,
    pub created: SystemTime,
    pub logger: Logger,
+    #[cfg(feature = "standard-oci-runtime")]
+    pub console_socket: PathBuf,
 }

 #[derive(Serialize, Deserialize, Debug)]
@@ -359,7 +368,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
            )));
        }
    }
-
    log_child!(cfd_log, "child process start run");
    let buf = read_sync(crfd)?;
    let spec_str = std::str::from_utf8(&buf)?;
@@ -379,6 +387,9 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {

    let cm: FsManager = serde_json::from_str(cm_str)?;

+    #[cfg(feature = "standard-oci-runtime")]
+    let csocket_fd = console::setup_console_socket(&std::env::var(CONSOLE_SOCKET_FD)?)?;
+
    let p = if spec.process.is_some() {
        spec.process.as_ref().unwrap()
    } else {
@@ -649,12 +660,17 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        }
    }

-    // set the "HOME" env getting from "/etc/passwd", if
-    // there's no uid entry in /etc/passwd, set "/" as the
-    // home env.
    if env::var_os(HOME_ENV_KEY).is_none() {
-        let home_dir = utils::home_dir(guser.uid).unwrap_or_else(|_| String::from("/"));
-        env::set_var(HOME_ENV_KEY, home_dir);
+        // try to set "HOME" env by uid
+        if let Ok(Some(user)) = User::from_uid(Uid::from_raw(guser.uid)) {
+            if let Ok(user_home_dir) = user.dir.into_os_string().into_string() {
+                env::set_var(HOME_ENV_KEY, user_home_dir);
+            }
+        }
+        // set default home dir as "/" if "HOME" env is still empty
+        if env::var_os(HOME_ENV_KEY).is_none() {
+            env::set_var(HOME_ENV_KEY, String::from("/"));
+        }
    }

    let exec_file = Path::new(&args[0]);
@@ -670,10 +686,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
    let _ = unistd::close(crfd);
    let _ = unistd::close(cwfd);

-    unistd::setsid().context("create a new session")?;
    if oci_process.terminal {
-        unsafe {
-            libc::ioctl(0, libc::TIOCSCTTY);
+        cfg_if::cfg_if! {
+            if #[cfg(feature = "standard-oci-runtime")] {
+                if let Some(csocket_fd) = csocket_fd {
+                    console::setup_master_console(csocket_fd)?;
+                } else {
+                    return Err(anyhow!("failed to get console master socket fd"));
+                }
+            }
+            else {
+                unistd::setsid().context("create a new session")?;
+                unsafe { libc::ioctl(0, libc::TIOCSCTTY) };
+            }
        }
    }

@@ -928,6 +953,14 @@ impl BaseContainer for LinuxContainer {

        let exec_path = std::env::current_exe()?;
        let mut child = std::process::Command::new(exec_path);
+
+        #[allow(unused_mut)]
+        let mut console_name = PathBuf::from("");
+        #[cfg(feature = "standard-oci-runtime")]
+        if !self.console_socket.as_os_str().is_empty() {
+            console_name = self.console_socket.clone();
+        }
+
        let mut child = child
            .arg("init")
            .stdin(child_stdin)
@@ -937,7 +970,8 @@ impl BaseContainer for LinuxContainer {
            .env(NO_PIVOT, format!("{}", self.config.no_pivot_root))
            .env(CRFD_FD, format!("{}", crfd))
            .env(CWFD_FD, format!("{}", cwfd))
-            .env(CLOG_FD, format!("{}", cfd_log));
+            .env(CLOG_FD, format!("{}", cfd_log))
+            .env(CONSOLE_SOCKET_FD, console_name);

        if p.init {
            child = child.env(FIFO_FD, format!("{}", fifofd));
@@ -1032,7 +1066,19 @@ impl BaseContainer for LinuxContainer {
        let st = self.oci_state()?;

        for pid in self.processes.keys() {
-            signal::kill(Pid::from_raw(*pid), Some(Signal::SIGKILL))?;
+            match signal::kill(Pid::from_raw(*pid), Some(Signal::SIGKILL)) {
+                Err(Errno::ESRCH) => {
+                    info!(
+                        self.logger,
+                        "kill encounters ESRCH, pid: {}, container: {}",
+                        pid,
+                        self.id.clone()
+                    );
+                    continue;
+                }
+                Err(err) => return Err(anyhow!(err)),
+                Ok(_) => continue,
+            }
        }

        if spec.hooks.is_some() {
@@ -1419,8 +1465,16 @@ impl LinuxContainer {
                .unwrap()
                .as_secs(),
            logger: logger.new(o!("module" => "rustjail", "subsystem" => "container", "cid" => id)),
+            #[cfg(feature = "standard-oci-runtime")]
+            console_socket: Path::new("").to_path_buf(),
        })
    }
+
+    #[cfg(feature = "standard-oci-runtime")]
+    pub fn set_console_socket(&mut self, console_socket: &Path) -> Result<()> {
+        self.console_socket = console_socket.to_path_buf();
+        Ok(())
+    }
 }

 fn setgroups(grps: &[libc::gid_t]) -> Result<()> {
@@ -1460,7 +1514,7 @@ use std::process::Stdio;
 use std::time::Duration;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};

-async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
+pub async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
    let logger = logger.new(o!("action" => "execute-hook"));

    let binary = PathBuf::from(h.path.as_str());
--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -30,6 +30,8 @@ extern crate regex;

 pub mod capabilities;
 pub mod cgroups;
+#[cfg(feature = "standard-oci-runtime")]
+pub mod console;
 pub mod container;
 pub mod mount;
 pub mod pipestream;
@@ -39,7 +41,6 @@ pub mod seccomp;
 pub mod specconv;
 pub mod sync;
 pub mod sync_with_async;
-pub mod utils;
 pub mod validator;

 use std::collections::HashMap;
@@ -265,7 +266,7 @@ pub fn resources_grpc_to_oci(res: &grpc::LinuxResources) -> oci::LinuxResources
            swap: Some(mem.Swap),
            kernel: Some(mem.Kernel),
            kernel_tcp: Some(mem.KernelTCP),
-            swappiness: Some(mem.Swappiness as i64),
+            swappiness: Some(mem.Swappiness),
            disable_oom_killer: Some(mem.DisableOOMKiller),
        })
    } else {
@@ -512,6 +513,7 @@ pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {

 #[cfg(test)]
 mod tests {
+    use super::*;
    #[macro_export]
    macro_rules! skip_if_not_root {
        () => {
@@ -521,4 +523,595 @@ mod tests {
            }
        };
    }
+
+    // Parameters:
+    //
+    // 1: expected Result
+    // 2: actual Result
+    // 3: string used to identify the test on error
+    #[macro_export]
+    macro_rules! assert_result {
+        ($expected_result:expr, $actual_result:expr, $msg:expr) => {
+            if $expected_result.is_ok() {
+                let expected_value = $expected_result.as_ref().unwrap();
+                let actual_value = $actual_result.unwrap();
+                assert!(*expected_value == actual_value, "{}", $msg);
+            } else {
+                assert!($actual_result.is_err(), "{}", $msg);
+
+                let expected_error = $expected_result.as_ref().unwrap_err();
+                let expected_error_msg = format!("{:?}", expected_error);
+
+                let actual_error_msg = format!("{:?}", $actual_result.unwrap_err());
+
+                assert!(expected_error_msg == actual_error_msg, "{}", $msg);
+            }
+        };
+    }
+
+    #[test]
+    fn test_process_grpc_to_oci() {
+        #[derive(Debug)]
+        struct TestData {
+            grpcproc: grpc::Process,
+            result: oci::Process,
+        }
+
+        let tests = &[
+            TestData {
+                // All fields specified
+                grpcproc: grpc::Process {
+                    Terminal: true,
+                    ConsoleSize: protobuf::SingularPtrField::<grpc::Box>::some(grpc::Box {
+                        Height: 123,
+                        Width: 456,
+                        ..Default::default()
+                    }),
+                    User: protobuf::SingularPtrField::<grpc::User>::some(grpc::User {
+                        UID: 1234,
+                        GID: 5678,
+                        AdditionalGids: Vec::from([910, 1112]),
+                        Username: String::from("username"),
+                        ..Default::default()
+                    }),
+                    Args: protobuf::RepeatedField::from(Vec::from([
+                        String::from("arg1"),
+                        String::from("arg2"),
+                    ])),
+                    Env: protobuf::RepeatedField::from(Vec::from([String::from("env")])),
+                    Cwd: String::from("cwd"),
+                    Capabilities: protobuf::SingularPtrField::some(grpc::LinuxCapabilities {
+                        Bounding: protobuf::RepeatedField::from(Vec::from([String::from("bnd")])),
+                        Effective: protobuf::RepeatedField::from(Vec::from([String::from("eff")])),
+                        Inheritable: protobuf::RepeatedField::from(Vec::from([String::from(
+                            "inher",
+                        )])),
+                        Permitted: protobuf::RepeatedField::from(Vec::from([String::from("perm")])),
+                        Ambient: protobuf::RepeatedField::from(Vec::from([String::from("amb")])),
+                        ..Default::default()
+                    }),
+                    Rlimits: protobuf::RepeatedField::from(Vec::from([
+                        grpc::POSIXRlimit {
+                            Type: String::from("r#type"),
+                            Hard: 123,
+                            Soft: 456,
+                            ..Default::default()
+                        },
+                        grpc::POSIXRlimit {
+                            Type: String::from("r#type2"),
+                            Hard: 789,
+                            Soft: 1011,
+                            ..Default::default()
+                        },
+                    ])),
+                    NoNewPrivileges: true,
+                    ApparmorProfile: String::from("apparmor profile"),
+                    OOMScoreAdj: 123456,
+                    SelinuxLabel: String::from("Selinux Label"),
+                    ..Default::default()
+                },
+                result: oci::Process {
+                    terminal: true,
+                    console_size: Some(oci::Box {
+                        height: 123,
+                        width: 456,
+                    }),
+                    user: oci::User {
+                        uid: 1234,
+                        gid: 5678,
+                        additional_gids: Vec::from([910, 1112]),
+                        username: String::from("username"),
+                    },
+                    args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                    env: Vec::from([String::from("env")]),
+                    cwd: String::from("cwd"),
+                    capabilities: Some(oci::LinuxCapabilities {
+                        bounding: Vec::from([String::from("bnd")]),
+                        effective: Vec::from([String::from("eff")]),
+                        inheritable: Vec::from([String::from("inher")]),
+                        permitted: Vec::from([String::from("perm")]),
+                        ambient: Vec::from([String::from("amb")]),
+                    }),
+                    rlimits: Vec::from([
+                        oci::PosixRlimit {
+                            r#type: String::from("r#type"),
+                            hard: 123,
+                            soft: 456,
+                        },
+                        oci::PosixRlimit {
+                            r#type: String::from("r#type2"),
+                            hard: 789,
+                            soft: 1011,
+                        },
+                    ]),
+                    no_new_privileges: true,
+                    apparmor_profile: String::from("apparmor profile"),
+                    oom_score_adj: Some(123456),
+                    selinux_label: String::from("Selinux Label"),
+                },
+            },
+            TestData {
+                // None ConsoleSize
+                grpcproc: grpc::Process {
+                    ConsoleSize: protobuf::SingularPtrField::<grpc::Box>::none(),
+                    OOMScoreAdj: 0,
+                    ..Default::default()
+                },
+                result: oci::Process {
+                    console_size: None,
+                    oom_score_adj: Some(0),
+                    ..Default::default()
+                },
+            },
+            TestData {
+                // None User
+                grpcproc: grpc::Process {
+                    User: protobuf::SingularPtrField::<grpc::User>::none(),
+                    OOMScoreAdj: 0,
+                    ..Default::default()
+                },
+                result: oci::Process {
+                    user: oci::User {
+                        uid: 0,
+                        gid: 0,
+                        additional_gids: vec![],
+                        username: String::from(""),
+                    },
+                    oom_score_adj: Some(0),
+                    ..Default::default()
+                },
+            },
+            TestData {
+                // None Capabilities
+                grpcproc: grpc::Process {
+                    Capabilities: protobuf::SingularPtrField::none(),
+                    OOMScoreAdj: 0,
+                    ..Default::default()
+                },
+                result: oci::Process {
+                    capabilities: None,
+                    oom_score_adj: Some(0),
+                    ..Default::default()
+                },
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = process_grpc_to_oci(&d.grpcproc);
+
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            assert_eq!(d.result, result, "{}", msg);
+        }
+    }
+
+    #[test]
+    fn test_root_grpc_to_oci() {
+        #[derive(Debug)]
+        struct TestData {
+            grpcroot: grpc::Root,
+            result: oci::Root,
+        }
+
+        let tests = &[
+            TestData {
+                // Default fields
+                grpcroot: grpc::Root {
+                    ..Default::default()
+                },
+                result: oci::Root {
+                    ..Default::default()
+                },
+            },
+            TestData {
+                // Specified fields, readonly false
+                grpcroot: grpc::Root {
+                    Path: String::from("path"),
+                    Readonly: false,
+                    ..Default::default()
+                },
+                result: oci::Root {
+                    path: String::from("path"),
+                    readonly: false,
+                    ..Default::default()
+                },
+            },
+            TestData {
+                // Specified fields, readonly true
+                grpcroot: grpc::Root {
+                    Path: String::from("path"),
+                    Readonly: true,
+                    ..Default::default()
+                },
+                result: oci::Root {
+                    path: String::from("path"),
+                    readonly: true,
+                    ..Default::default()
+                },
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = root_grpc_to_oci(&d.grpcroot);
+
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            assert_eq!(d.result, result, "{}", msg);
+        }
+    }
+
+    #[test]
+    fn test_hooks_grpc_to_oci() {
+        #[derive(Debug)]
+        struct TestData {
+            grpchooks: grpc::Hooks,
+            result: oci::Hooks,
+        }
+
+        let tests = &[
+            TestData {
+                // Default fields
+                grpchooks: grpc::Hooks {
+                    ..Default::default()
+                },
+                result: oci::Hooks {
+                    ..Default::default()
+                },
+            },
+            TestData {
+                // All specified
+                grpchooks: grpc::Hooks {
+                    Prestart: protobuf::RepeatedField::from(Vec::from([
+                        grpc::Hook {
+                            Path: String::from("prestartpath"),
+                            Args: protobuf::RepeatedField::from(Vec::from([
+                                String::from("arg1"),
+                                String::from("arg2"),
+                            ])),
+                            Env: protobuf::RepeatedField::from(Vec::from([
+                                String::from("env1"),
+                                String::from("env2"),
+                            ])),
+                            Timeout: 10,
+                            ..Default::default()
+                        },
+                        grpc::Hook {
+                            Path: String::from("prestartpath2"),
+                            Args: protobuf::RepeatedField::from(Vec::from([
+                                String::from("arg3"),
+                                String::from("arg4"),
+                            ])),
+                            Env: protobuf::RepeatedField::from(Vec::from([
+                                String::from("env3"),
+                                String::from("env4"),
+                            ])),
+                            Timeout: 25,
+                            ..Default::default()
+                        },
+                    ])),
+                    Poststart: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("poststartpath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    Poststop: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("poststoppath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    ..Default::default()
+                },
+                result: oci::Hooks {
+                    prestart: Vec::from([
+                        oci::Hook {
+                            path: String::from("prestartpath"),
+                            args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                            env: Vec::from([String::from("env1"), String::from("env2")]),
+                            timeout: Some(10),
+                        },
+                        oci::Hook {
+                            path: String::from("prestartpath2"),
+                            args: Vec::from([String::from("arg3"), String::from("arg4")]),
+                            env: Vec::from([String::from("env3"), String::from("env4")]),
+                            timeout: Some(25),
+                        },
+                    ]),
+                    poststart: Vec::from([oci::Hook {
+                        path: String::from("poststartpath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                    poststop: Vec::from([oci::Hook {
+                        path: String::from("poststoppath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                },
+            },
+            TestData {
+                // Prestart empty
+                grpchooks: grpc::Hooks {
+                    Prestart: protobuf::RepeatedField::from(Vec::from([])),
+                    Poststart: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("poststartpath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    Poststop: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("poststoppath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    ..Default::default()
+                },
+                result: oci::Hooks {
+                    prestart: Vec::from([]),
+                    poststart: Vec::from([oci::Hook {
+                        path: String::from("poststartpath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                    poststop: Vec::from([oci::Hook {
+                        path: String::from("poststoppath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                },
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = hooks_grpc_to_oci(&d.grpchooks);
+
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            assert_eq!(d.result, result, "{}", msg);
+        }
+    }
+
+    #[test]
+    fn test_mount_grpc_to_oci() {
+        #[derive(Debug)]
+        struct TestData {
+            grpcmount: grpc::Mount,
+            result: oci::Mount,
+        }
+
+        let tests = &[
+            TestData {
+                // Default fields
+                grpcmount: grpc::Mount {
+                    ..Default::default()
+                },
+                result: oci::Mount {
+                    ..Default::default()
+                },
+            },
+            TestData {
+                grpcmount: grpc::Mount {
+                    destination: String::from("destination"),
+                    source: String::from("source"),
+                    field_type: String::from("fieldtype"),
+                    options: protobuf::RepeatedField::from(Vec::from([
+                        String::from("option1"),
+                        String::from("option2"),
+                    ])),
+                    ..Default::default()
+                },
+                result: oci::Mount {
+                    destination: String::from("destination"),
+                    source: String::from("source"),
+                    r#type: String::from("fieldtype"),
+                    options: Vec::from([String::from("option1"), String::from("option2")]),
+                },
+            },
+            TestData {
+                grpcmount: grpc::Mount {
+                    destination: String::from("destination"),
+                    source: String::from("source"),
+                    field_type: String::from("fieldtype"),
+                    options: protobuf::RepeatedField::from(Vec::new()),
+                    ..Default::default()
+                },
+                result: oci::Mount {
+                    destination: String::from("destination"),
+                    source: String::from("source"),
+                    r#type: String::from("fieldtype"),
+                    options: Vec::new(),
+                },
+            },
+            TestData {
+                grpcmount: grpc::Mount {
+                    destination: String::new(),
+                    source: String::from("source"),
+                    field_type: String::from("fieldtype"),
+                    options: protobuf::RepeatedField::from(Vec::from([String::from("option1")])),
+                    ..Default::default()
+                },
+                result: oci::Mount {
+                    destination: String::new(),
+                    source: String::from("source"),
+                    r#type: String::from("fieldtype"),
+                    options: Vec::from([String::from("option1")]),
+                },
+            },
+            TestData {
+                grpcmount: grpc::Mount {
+                    destination: String::from("destination"),
+                    source: String::from("source"),
+                    field_type: String::new(),
+                    options: protobuf::RepeatedField::from(Vec::from([String::from("option1")])),
+                    ..Default::default()
+                },
+                result: oci::Mount {
+                    destination: String::from("destination"),
+                    source: String::from("source"),
+                    r#type: String::new(),
+                    options: Vec::from([String::from("option1")]),
+                },
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = mount_grpc_to_oci(&d.grpcmount);
+
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            assert_eq!(d.result, result, "{}", msg);
+        }
+    }
+
+    #[test]
+    fn test_hook_grpc_to_oci<'a>() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            grpchook: &'a [grpc::Hook],
+            result: Vec<oci::Hook>,
+        }
+
+        let tests = &[
+            TestData {
+                // Default fields
+                grpchook: &[
+                    grpc::Hook {
+                        Timeout: 0,
+                        ..Default::default()
+                    },
+                    grpc::Hook {
+                        Timeout: 0,
+                        ..Default::default()
+                    },
+                ],
+                result: vec![
+                    oci::Hook {
+                        timeout: Some(0),
+                        ..Default::default()
+                    },
+                    oci::Hook {
+                        timeout: Some(0),
+                        ..Default::default()
+                    },
+                ],
+            },
+            TestData {
+                // Specified fields
+                grpchook: &[
+                    grpc::Hook {
+                        Path: String::from("path"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    },
+                    grpc::Hook {
+                        Path: String::from("path2"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg3"),
+                            String::from("arg4"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env3"),
+                            String::from("env4"),
+                        ])),
+                        Timeout: 20,
+                        ..Default::default()
+                    },
+                ],
+                result: vec![
+                    oci::Hook {
+                        path: String::from("path"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    },
+                    oci::Hook {
+                        path: String::from("path2"),
+                        args: Vec::from([String::from("arg3"), String::from("arg4")]),
+                        env: Vec::from([String::from("env3"), String::from("env4")]),
+                        timeout: Some(20),
+                    },
+                ],
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = hook_grpc_to_oci(d.grpchook);
+
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            assert_eq!(d.result, result, "{}", msg);
+        }
+    }
 }
--- a/src/agent/rustjail/src/mount.rs
+++ b/src/agent/rustjail/src/mount.rs
@@ -32,16 +32,21 @@ use crate::log_child;

 // Info reveals information about a particular mounted filesystem. This
 // struct is populated from the content in the /proc/<pid>/mountinfo file.
-#[derive(std::fmt::Debug)]
+#[derive(std::fmt::Debug, PartialEq)]
 pub struct Info {
    mount_point: String,
    optional: String,
    fstype: String,
 }

-const MOUNTINFOFORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
+const MOUNTINFO_FORMAT: &str = "{d} {d} {d}:{d} {} {} {} {}";
+const MOUNTINFO_PATH: &str = "/proc/self/mountinfo";
 const PROC_PATH: &str = "/proc";

+const ERR_FAILED_PARSE_MOUNTINFO: &str = "failed to parse mountinfo file";
+const ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS: &str =
+    "failed to parse final fields in mountinfo file";
+
 // since libc didn't defined this const for musl, thus redefined it here.
 #[cfg(all(target_os = "linux", target_env = "gnu", not(target_arch = "s390x")))]
 const PROC_SUPER_MAGIC: libc::c_long = 0x00009fa0;
@@ -518,7 +523,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
 }

 fn rootfs_parent_mount_private(path: &str) -> Result<()> {
-    let mount_infos = parse_mount_table()?;
+    let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;

    let mut max_len = 0;
    let mut mount_point = String::from("");
@@ -546,8 +551,8 @@ fn rootfs_parent_mount_private(path: &str) -> Result<()> {

 // Parse /proc/self/mountinfo because comparing Dev and ino does not work from
 // bind mounts
-fn parse_mount_table() -> Result<Vec<Info>> {
-    let file = File::open("/proc/self/mountinfo")?;
+fn parse_mount_table(mountinfo_path: &str) -> Result<Vec<Info>> {
+    let file = File::open(mountinfo_path)?;
    let reader = BufReader::new(file);
    let mut infos = Vec::new();

@@ -569,7 +574,7 @@ fn parse_mount_table() -> Result<Vec<Info>> {

        let (_id, _parent, _major, _minor, _root, mount_point, _opts, optional) = scan_fmt!(
            &line,
-            MOUNTINFOFORMAT,
+            MOUNTINFO_FORMAT,
            i32,
            i32,
            i32,
@@ -578,12 +583,17 @@ fn parse_mount_table() -> Result<Vec<Info>> {
            String,
            String,
            String
-        )?;
+        )
+        .map_err(|_| anyhow!(ERR_FAILED_PARSE_MOUNTINFO))?;

        let fields: Vec<&str> = line.split(" - ").collect();
        if fields.len() == 2 {
-            let (fstype, _source, _vfs_opts) =
-                scan_fmt!(fields[1], "{} {} {}", String, String, String)?;
+            let final_fields: Vec<&str> = fields[1].split_whitespace().collect();
+
+            if final_fields.len() != 3 {
+                return Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS));
+            }
+            let fstype = final_fields[0].to_string();

            let mut optional_new = String::new();
            if optional != "-" {
@@ -598,7 +608,7 @@ fn parse_mount_table() -> Result<Vec<Info>> {

            infos.push(info);
        } else {
-            return Err(anyhow!("failed to parse mount info file".to_string()));
+            return Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO));
        }
    }

@@ -619,7 +629,7 @@ fn chroot<P: ?Sized + NixPath>(_path: &P) -> Result<(), nix::Error> {

 pub fn ms_move_root(rootfs: &str) -> Result<bool> {
    unistd::chdir(rootfs)?;
-    let mount_infos = parse_mount_table()?;
+    let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;

    let root_path = Path::new(rootfs);
    let abs_root_buf = root_path.absolutize()?;
@@ -1046,10 +1056,12 @@ fn readonly_path(path: &str) -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::assert_result;
    use crate::skip_if_not_root;
    use std::fs::create_dir;
    use std::fs::create_dir_all;
    use std::fs::remove_dir_all;
+    use std::io;
    use std::os::unix::fs;
    use std::os::unix::io::AsRawFd;
    use tempfile::tempdir;
@@ -1286,6 +1298,113 @@ mod tests {
        let ret = stat::stat(path);
        assert!(ret.is_ok(), "Should pass. Got: {:?}", ret);
    }
+
+    #[test]
+    fn test_mount_from() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            source: &'a str,
+            destination: &'a str,
+            r#type: &'a str,
+            flags: MsFlags,
+            error_contains: &'a str,
+
+            // if true, a directory will be created at path in source
+            make_source_directory: bool,
+            // if true, a file will be created at path in source
+            make_source_file: bool,
+        }
+
+        impl Default for TestData<'_> {
+            fn default() -> Self {
+                TestData {
+                    source: "tmp",
+                    destination: "dest",
+                    r#type: "tmpfs",
+                    flags: MsFlags::empty(),
+                    error_contains: "",
+                    make_source_directory: true,
+                    make_source_file: false,
+                }
+            }
+        }
+
+        let tests = &[
+            TestData {
+                ..Default::default()
+            },
+            TestData {
+                flags: MsFlags::MS_BIND,
+                ..Default::default()
+            },
+            TestData {
+                r#type: "bind",
+                ..Default::default()
+            },
+            TestData {
+                r#type: "cgroup2",
+                ..Default::default()
+            },
+            TestData {
+                r#type: "bind",
+                make_source_directory: false,
+                error_contains: &format!("{}", std::io::Error::from_raw_os_error(libc::ENOENT)),
+                ..Default::default()
+            },
+            TestData {
+                r#type: "bind",
+                make_source_directory: false,
+                make_source_file: true,
+                ..Default::default()
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+            let tempdir = tempdir().unwrap();
+
+            let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
+            defer!({
+                unistd::close(rfd).unwrap();
+                unistd::close(wfd).unwrap();
+            });
+
+            let source_path = tempdir.path().join(d.source).to_str().unwrap().to_string();
+            if d.make_source_directory {
+                std::fs::create_dir_all(&source_path).unwrap();
+            } else if d.make_source_file {
+                std::fs::write(&source_path, []).unwrap();
+            }
+
+            let mount = Mount {
+                source: source_path,
+                destination: d.destination.to_string(),
+                r#type: d.r#type.to_string(),
+                options: vec![],
+            };
+
+            let result = mount_from(
+                wfd,
+                &mount,
+                tempdir.path().to_str().unwrap(),
+                d.flags,
+                "",
+                "",
+            );
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);
+            } else {
+                assert!(result.is_err(), "{}", msg);
+
+                let error_msg = format!("{}", result.unwrap_err());
+                assert!(error_msg.contains(d.error_contains), "{}", msg);
+            }
+        }
+    }
+
    #[test]
    fn test_check_proc_mount() {
        let mount = oci::Mount {
@@ -1401,6 +1520,121 @@ mod tests {
        }
    }

+    #[test]
+    fn test_parse_mount_table() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            mountinfo_data: Option<&'a str>,
+            result: Result<Vec<Info>>,
+        }
+
+        let tests = &[
+            TestData {
+                mountinfo_data: Some(
+                    "22 933 0:20 / /sys rw,nodev shared:2 - sysfs sysfs rw,noexec",
+                ),
+                result: Ok(vec![Info {
+                    mount_point: "/sys".to_string(),
+                    optional: "shared:2".to_string(),
+                    fstype: "sysfs".to_string(),
+                }]),
+            },
+            TestData {
+                mountinfo_data: Some(
+                    r#"22 933 0:20 / /sys rw,nodev - sysfs sysfs rw,noexec
+                       81 13 1:2 / /tmp/dir rw shared:2 - tmpfs tmpfs rw"#,
+                ),
+                result: Ok(vec![
+                    Info {
+                        mount_point: "/sys".to_string(),
+                        optional: "".to_string(),
+                        fstype: "sysfs".to_string(),
+                    },
+                    Info {
+                        mount_point: "/tmp/dir".to_string(),
+                        optional: "shared:2".to_string(),
+                        fstype: "tmpfs".to_string(),
+                    },
+                ]),
+            },
+            TestData {
+                mountinfo_data: Some(
+                    "22 933 0:20 /foo\040-\040bar /sys rw,nodev shared:2 - sysfs sysfs rw,noexec",
+                ),
+                result: Ok(vec![Info {
+                    mount_point: "/sys".to_string(),
+                    optional: "shared:2".to_string(),
+                    fstype: "sysfs".to_string(),
+                }]),
+            },
+            TestData {
+                mountinfo_data: Some(""),
+                result: Ok(vec![]),
+            },
+            TestData {
+                mountinfo_data: Some("invalid line data - sysfs sysfs rw"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: Some("22 96 0:21 / /sys rw,noexec - sysfs"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS)),
+            },
+            TestData {
+                mountinfo_data: Some("22 96 0:21 / /sys rw,noexec - sysfs sysfs rw rw"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO_FINAL_FIELDS)),
+            },
+            TestData {
+                mountinfo_data: Some("22 96 0:21 / /sys rw,noexec shared:2 - x - x"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: Some("-"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: Some("--"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: Some("- -"),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: Some(" - "),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: Some(
+                    r#"22 933 0:20 / /sys rw,nodev - sysfs sysfs rw,noexec
+                       invalid line
+                       81 13 1:2 / /tmp/dir rw shared:2 - tmpfs tmpfs rw"#,
+                ),
+                result: Err(anyhow!(ERR_FAILED_PARSE_MOUNTINFO)),
+            },
+            TestData {
+                mountinfo_data: None,
+                result: Err(anyhow!(io::Error::from_raw_os_error(libc::ENOENT))),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let tempdir = tempdir().unwrap();
+            let mountinfo_path = tempdir.path().join("mountinfo");
+
+            if let Some(mountinfo_data) = d.mountinfo_data {
+                std::fs::write(&mountinfo_path, mountinfo_data).unwrap();
+            }
+
+            let result = parse_mount_table(mountinfo_path.to_str().unwrap());
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            assert_result!(d.result, result, msg);
+        }
+    }
+
    #[test]
    fn test_dev_rel_path() {
        // Valid device paths
--- a/src/agent/rustjail/src/process.rs
+++ b/src/agent/rustjail/src/process.rs
@@ -5,7 +5,7 @@

 use libc::pid_t;
 use std::fs::File;
-use std::os::unix::io::RawFd;
+use std::os::unix::io::{AsRawFd, RawFd};
 use tokio::sync::mpsc::Sender;

 use nix::errno::Errno;
@@ -137,19 +137,25 @@ impl Process {
        info!(logger, "before create console socket!");

        if !p.tty {
-            info!(logger, "created console socket!");
+            if cfg!(feature = "standard-oci-runtime") {
+                p.stdin = Some(std::io::stdin().as_raw_fd());
+                p.stdout = Some(std::io::stdout().as_raw_fd());
+                p.stderr = Some(std::io::stderr().as_raw_fd());
+            } else {
+                info!(logger, "created console socket!");

-            let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?;
-            p.parent_stdin = Some(pstdin);
-            p.stdin = Some(stdin);
+                let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?;
+                p.parent_stdin = Some(pstdin);
+                p.stdin = Some(stdin);

-            let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
-            p.parent_stdout = Some(pstdout);
-            p.stdout = Some(stdout);
+                let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
+                p.parent_stdout = Some(pstdout);
+                p.stdout = Some(stdout);

-            let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
-            p.parent_stderr = Some(pstderr);
-            p.stderr = Some(stderr);
+                let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?;
+                p.parent_stderr = Some(pstderr);
+                p.stderr = Some(stderr);
+            }
        }
        Ok(p)
    }
@@ -284,5 +290,11 @@ mod tests {
        // group of the calling process.
        process.pid = 0;
        assert!(process.signal(libc::SIGCONT).is_ok());
+
+        if cfg!(feature = "standard-oci-runtime") {
+            assert_eq!(process.stdin.unwrap(), std::io::stdin().as_raw_fd());
+            assert_eq!(process.stdout.unwrap(), std::io::stdout().as_raw_fd());
+            assert_eq!(process.stderr.unwrap(), std::io::stderr().as_raw_fd());
+        }
    }
 }
--- a/src/agent/rustjail/src/specconv.rs
+++ b/src/agent/rustjail/src/specconv.rs
@@ -5,7 +5,7 @@

 use oci::Spec;

-#[derive(Debug)]
+#[derive(Serialize, Deserialize, Debug, Default, Clone)]
 pub struct CreateOpts {
    pub cgroup_name: String,
    pub use_systemd_cgroup: bool,
--- a/src/agent/rustjail/src/utils.rs
+++ b/src/agent/rustjail/src/utils.rs
@@ -1,120 +0,0 @@
-// Copyright (c) 2021 Ant Group
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-use anyhow::{anyhow, Context, Result};
-use libc::gid_t;
-use libc::uid_t;
-use std::fs::File;
-use std::io::{BufRead, BufReader};
-
-const PASSWD_FILE: &str = "/etc/passwd";
-
-// An entry from /etc/passwd
-#[derive(Debug, PartialEq, PartialOrd)]
-pub struct PasswdEntry {
-    // username
-    pub name: String,
-    // user password
-    pub passwd: String,
-    // user id
-    pub uid: uid_t,
-    // group id
-    pub gid: gid_t,
-    // user Information
-    pub gecos: String,
-    // home directory
-    pub dir: String,
-    // User's Shell
-    pub shell: String,
-}
-
-// get an entry for a given `uid` from `/etc/passwd`
-fn get_entry_by_uid(uid: uid_t, path: &str) -> Result<PasswdEntry> {
-    let file = File::open(path).with_context(|| format!("open file {}", path))?;
-    let mut reader = BufReader::new(file);
-
-    let mut line = String::new();
-    loop {
-        line.clear();
-        match reader.read_line(&mut line) {
-            Ok(0) => return Err(anyhow!(format!("file {} is empty", path))),
-            Ok(_) => (),
-            Err(e) => {
-                return Err(anyhow!(format!(
-                    "failed to read file {} with {:?}",
-                    path, e
-                )))
-            }
-        }
-
-        if line.starts_with('#') {
-            continue;
-        }
-
-        let parts: Vec<&str> = line.split(':').map(|part| part.trim()).collect();
-        if parts.len() != 7 {
-            continue;
-        }
-
-        match parts[2].parse() {
-            Err(_e) => continue,
-            Ok(new_uid) => {
-                if uid != new_uid {
-                    continue;
-                }
-
-                let entry = PasswdEntry {
-                    name: parts[0].to_string(),
-                    passwd: parts[1].to_string(),
-                    uid: new_uid,
-                    gid: parts[3].parse().unwrap_or(0),
-                    gecos: parts[4].to_string(),
-                    dir: parts[5].to_string(),
-                    shell: parts[6].to_string(),
-                };
-
-                return Ok(entry);
-            }
-        }
-    }
-}
-
-pub fn home_dir(uid: uid_t) -> Result<String> {
-    get_entry_by_uid(uid, PASSWD_FILE).map(|entry| entry.dir)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::io::Write;
-    use tempfile::Builder;
-
-    #[test]
-    fn test_get_entry_by_uid() {
-        let tmpdir = Builder::new().tempdir().unwrap();
-        let tmpdir_path = tmpdir.path().to_str().unwrap();
-        let temp_passwd = format!("{}/passwd", tmpdir_path);
-
-        let mut tempf = File::create(temp_passwd.as_str()).unwrap();
-        let passwd_entries = "root:x:0:0:root:/root0:/bin/bash
-root:x:1:0:root:/root1:/bin/bash
-#root:x:1:0:root:/rootx:/bin/bash
-root:x:2:0:root:/root2:/bin/bash
-root:x:3:0:root:/root3
-root:x:3:0:root:/root3:/bin/bash";
-        writeln!(tempf, "{}", passwd_entries).unwrap();
-
-        let entry = get_entry_by_uid(0, temp_passwd.as_str()).unwrap();
-        assert_eq!(entry.dir.as_str(), "/root0");
-
-        let entry = get_entry_by_uid(1, temp_passwd.as_str()).unwrap();
-        assert_eq!(entry.dir.as_str(), "/root1");
-
-        let entry = get_entry_by_uid(2, temp_passwd.as_str()).unwrap();
-        assert_eq!(entry.dir.as_str(), "/root2");
-
-        let entry = get_entry_by_uid(3, temp_passwd.as_str()).unwrap();
-        assert_eq!(entry.dir.as_str(), "/root3");
-    }
-}
--- a/src/agent/src/config.rs
+++ b/src/agent/src/config.rs
@@ -432,6 +432,8 @@ fn get_container_pipe_size(param: &str) -> Result<i32> {

 #[cfg(test)]
 mod tests {
+    use crate::assert_result;
+
    use super::*;
    use anyhow::anyhow;
    use std::fs::File;
@@ -439,32 +441,6 @@ mod tests {
    use std::time;
    use tempfile::tempdir;

-    // Parameters:
-    //
-    // 1: expected Result
-    // 2: actual Result
-    // 3: string used to identify the test on error
-    macro_rules! assert_result {
-        ($expected_result:expr, $actual_result:expr, $msg:expr) => {
-            if $expected_result.is_ok() {
-                let expected_level = $expected_result.as_ref().unwrap();
-                let actual_level = $actual_result.unwrap();
-                assert!(*expected_level == actual_level, "{}", $msg);
-            } else {
-                let expected_error = $expected_result.as_ref().unwrap_err();
-                let expected_error_msg = format!("{:?}", expected_error);
-
-                if let Err(actual_error) = $actual_result {
-                    let actual_error_msg = format!("{:?}", actual_error);
-
-                    assert!(expected_error_msg == actual_error_msg, "{}", $msg);
-                } else {
-                    assert!(expected_error_msg == "expected error, got OK", "{}", $msg);
-                }
-            }
-        };
-    }
-
    #[test]
    fn test_new() {
        let config: AgentConfig = Default::default();
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -416,3 +416,59 @@ fn reset_sigpipe() {

 use crate::config::AgentConfig;
 use std::os::unix::io::{FromRawFd, RawFd};
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_utils::test_utils::TestUserType;
+
+    #[tokio::test]
+    async fn test_create_logger_task() {
+        #[derive(Debug)]
+        struct TestData {
+            vsock_port: u32,
+            test_user: TestUserType,
+            result: Result<()>,
+        }
+
+        let tests = &[
+            TestData {
+                // non-root user cannot use privileged vsock port
+                vsock_port: 1,
+                test_user: TestUserType::NonRootOnly,
+                result: Err(anyhow!(nix::errno::Errno::from_i32(libc::EACCES))),
+            },
+            TestData {
+                // passing vsock_port 0 causes logger task to write to stdout
+                vsock_port: 0,
+                test_user: TestUserType::Any,
+                result: Ok(()),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            if d.test_user == TestUserType::RootOnly {
+                skip_if_not_root!();
+            } else if d.test_user == TestUserType::NonRootOnly {
+                skip_if_root!();
+            }
+
+            let msg = format!("test[{}]: {:?}", i, d);
+            let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
+            defer!({
+                // rfd is closed by the use of PipeStream in the crate_logger_task function,
+                // but we will attempt to close in case of a failure
+                let _ = unistd::close(rfd);
+                unistd::close(wfd).unwrap();
+            });
+
+            let (shutdown_tx, shutdown_rx) = channel(true);
+
+            shutdown_tx.send(true).unwrap();
+            let result = create_logger_task(rfd, d.vsock_port, shutdown_rx).await;
+
+            let msg = format!("{}, result: {:?}", msg, result);
+            assert_result!(d.result, result, msg);
+        }
+    }
+}
--- a/src/agent/src/mount.rs
+++ b/src/agent/src/mount.rs
@@ -16,7 +16,7 @@ use std::sync::Arc;
 use tokio::sync::Mutex;

 use nix::mount::MsFlags;
-use nix::unistd::Gid;
+use nix::unistd::{Gid, Uid};

 use regex::Regex;

@@ -29,6 +29,7 @@ use crate::device::{
 use crate::linux_abi::*;
 use crate::pci;
 use crate::protocols::agent::Storage;
+use crate::protocols::types::FSGroupChangePolicy;
 use crate::Sandbox;
 #[cfg(target_arch = "s390x")]
 use crate::{ccw, device::get_virtio_blk_ccw_device_name};
@@ -43,6 +44,11 @@ pub const MOUNT_GUEST_TAG: &str = "kataShared";
 // Allocating an FSGroup that owns the pod's volumes
 const FS_GID: &str = "fsgid";

+const RW_MASK: u32 = 0o660;
+const RO_MASK: u32 = 0o440;
+const EXEC_MASK: u32 = 0o110;
+const MODE_SETGID: u32 = 0o2000;
+
 #[rustfmt::skip]
 lazy_static! {
    pub static ref FLAGS: HashMap<&'static str, (bool, MsFlags)> = {
@@ -85,11 +91,11 @@ lazy_static! {
 }

 #[derive(Debug, PartialEq)]
-pub struct InitMount {
-    fstype: &'static str,
-    src: &'static str,
-    dest: &'static str,
-    options: Vec<&'static str>,
+pub struct InitMount<'a> {
+    fstype: &'a str,
+    src: &'a str,
+    dest: &'a str,
+    options: Vec<&'a str>,
 }

 #[rustfmt::skip]
@@ -115,7 +121,7 @@ lazy_static!{

 #[rustfmt::skip]
 lazy_static! {
-    pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount> = vec![
+    pub static ref INIT_ROOTFS_MOUNTS: Vec<InitMount<'static>> = vec![
        InitMount{fstype: "proc", src: "proc", dest: "/proc", options: vec!["nosuid", "nodev", "noexec"]},
        InitMount{fstype: "sysfs", src: "sysfs", dest: "/sys", options: vec!["nosuid", "nodev", "noexec"]},
        InitMount{fstype: "devtmpfs", src: "dev", dest: "/dev", options: vec!["nosuid"]},
@@ -222,7 +228,7 @@ async fn ephemeral_storage_handler(
            let meta = fs::metadata(&storage.mount_point)?;
            let mut permission = meta.permissions();

-            let o_mode = meta.mode() | 0o2000;
+            let o_mode = meta.mode() | MODE_SETGID;
            permission.set_mode(o_mode);
            fs::set_permissions(&storage.mount_point, permission)?;
        }
@@ -272,7 +278,7 @@ async fn local_storage_handler(

        if need_set_fsgid {
            // set SetGid mode mask.
-            o_mode |= 0o2000;
+            o_mode |= MODE_SETGID;
        }
        permission.set_mode(o_mode);

@@ -489,7 +495,9 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result<String>
    // Mount the storage device.
    let mount_point = storage.mount_point.to_string();

-    mount_storage(logger, storage).and(Ok(mount_point))
+    mount_storage(logger, storage)?;
+    set_ownership(logger, storage)?;
+    Ok(mount_point)
 }

 // nvdimm_storage_handler handles the storage for NVDIMM driver.
@@ -573,6 +581,91 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> {
    )
 }

+#[instrument]
+pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> {
+    let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership"));
+
+    // If fsGroup is not set, skip performing ownership change
+    if storage.fs_group.is_none() {
+        return Ok(());
+    }
+    let fs_group = storage.get_fs_group();
+
+    let mut read_only = false;
+    let opts_vec: Vec<String> = storage.options.to_vec();
+    if opts_vec.contains(&String::from("ro")) {
+        read_only = true;
+    }
+
+    let mount_path = Path::new(&storage.mount_point);
+    let metadata = mount_path.metadata().map_err(|err| {
+        error!(logger, "failed to obtain metadata for mount path";
+            "mount-path" => mount_path.to_str(),
+            "error" => err.to_string(),
+        );
+        err
+    })?;
+
+    if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch
+        && metadata.gid() == fs_group.group_id
+    {
+        let mut mask = if read_only { RO_MASK } else { RW_MASK };
+        mask |= EXEC_MASK;
+
+        // With fsGroup change policy to OnRootMismatch, if the current
+        // gid of the mount path root directory matches the desired gid
+        // and the current permission of mount path root directory is correct,
+        // then ownership change will be skipped.
+        let current_mode = metadata.permissions().mode();
+        if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) {
+            info!(logger, "skipping ownership change for volume";
+                "mount-path" => mount_path.to_str(),
+                "fs-group" => fs_group.group_id.to_string(),
+            );
+            return Ok(());
+        }
+    }
+
+    info!(logger, "performing recursive ownership change";
+        "mount-path" => mount_path.to_str(),
+        "fs-group" => fs_group.group_id.to_string(),
+    );
+    recursive_ownership_change(
+        mount_path,
+        None,
+        Some(Gid::from_raw(fs_group.group_id)),
+        read_only,
+    )
+}
+
+#[instrument]
+pub fn recursive_ownership_change(
+    path: &Path,
+    uid: Option<Uid>,
+    gid: Option<Gid>,
+    read_only: bool,
+) -> Result<()> {
+    let mut mask = if read_only { RO_MASK } else { RW_MASK };
+    if path.is_dir() {
+        for entry in fs::read_dir(&path)? {
+            recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?;
+        }
+        mask |= EXEC_MASK;
+        mask |= MODE_SETGID;
+    }
+    nix::unistd::chown(path, uid, gid)?;
+
+    if gid.is_some() {
+        let metadata = path.metadata()?;
+        let mut permission = metadata.permissions();
+        let target_mode = metadata.mode() | mask;
+        permission.set_mode(target_mode);
+        fs::set_permissions(path, permission)?;
+    }
+
+    Ok(())
+}
+
 /// Looks for `mount_point` entry in the /proc/mounts.
 #[instrument]
 pub fn is_mounted(mount_point: &str) -> Result<bool> {
@@ -766,8 +859,9 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
    }

    Err(anyhow!(
-        "failed to find FS type for mount point {}",
-        mount_point
+        "failed to find FS type for mount point {}, mount file content: {:?}",
+        mount_point,
+        fs::read_to_string(mount_file)
    ))
 }

@@ -776,7 +870,7 @@ pub fn get_cgroup_mounts(
    logger: &Logger,
    cg_path: &str,
    unified_cgroup_hierarchy: bool,
-) -> Result<Vec<InitMount>> {
+) -> Result<Vec<InitMount<'static>>> {
    // cgroup v2
    // https://github.com/kata-containers/agent/blob/8c9bbadcd448c9a67690fbe11a860aaacc69813c/agent.go#L1249
    if unified_cgroup_hierarchy {
@@ -924,20 +1018,16 @@ fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::{skip_if_not_root, skip_loop_if_not_root, skip_loop_if_root};
+    use crate::test_utils::test_utils::TestUserType;
+    use crate::{skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root};
+    use protobuf::RepeatedField;
+    use protocols::agent::FSGroup;
    use std::fs::File;
    use std::fs::OpenOptions;
    use std::io::Write;
    use std::path::PathBuf;
    use tempfile::tempdir;

-    #[derive(Debug, PartialEq)]
-    enum TestUserType {
-        RootOnly,
-        NonRootOnly,
-        Any,
-    }
-
    #[test]
    fn test_mount() {
        #[derive(Debug)]
@@ -1023,11 +1113,7 @@ mod tests {
        for (i, d) in tests.iter().enumerate() {
            let msg = format!("test[{}]: {:?}", i, d);

-            if d.test_user == TestUserType::RootOnly {
-                skip_loop_if_not_root!(msg);
-            } else if d.test_user == TestUserType::NonRootOnly {
-                skip_loop_if_root!(msg);
-            }
+            skip_loop_by_user!(msg, d.test_user);

            let src: PathBuf;
            let dest: PathBuf;
@@ -1497,6 +1583,226 @@ mod tests {
        assert!(testfile.is_file());
    }

+    #[test]
+    fn test_mount_storage() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            test_user: TestUserType,
+            storage: Storage,
+            error_contains: &'a str,
+
+            make_source_dir: bool,
+            make_mount_dir: bool,
+            deny_mount_permission: bool,
+        }
+
+        impl Default for TestData<'_> {
+            fn default() -> Self {
+                TestData {
+                    test_user: TestUserType::Any,
+                    storage: Storage {
+                        mount_point: "mnt".to_string(),
+                        source: "src".to_string(),
+                        fstype: "tmpfs".to_string(),
+                        ..Default::default()
+                    },
+                    make_source_dir: true,
+                    make_mount_dir: false,
+                    deny_mount_permission: false,
+                    error_contains: "",
+                }
+            }
+        }
+
+        let tests = &[
+            TestData {
+                test_user: TestUserType::NonRootOnly,
+                error_contains: "EPERM: Operation not permitted",
+                ..Default::default()
+            },
+            TestData {
+                test_user: TestUserType::RootOnly,
+                ..Default::default()
+            },
+            TestData {
+                storage: Storage {
+                    mount_point: "mnt".to_string(),
+                    source: "src".to_string(),
+                    fstype: "bind".to_string(),
+                    ..Default::default()
+                },
+                make_source_dir: false,
+                make_mount_dir: true,
+                error_contains: "Could not create mountpoint",
+                ..Default::default()
+            },
+            TestData {
+                test_user: TestUserType::NonRootOnly,
+                deny_mount_permission: true,
+                error_contains: "Could not create mountpoint",
+                ..Default::default()
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            skip_loop_by_user!(msg, d.test_user);
+
+            let drain = slog::Discard;
+            let logger = slog::Logger::root(drain, o!());
+
+            let tempdir = tempdir().unwrap();
+
+            let source = tempdir.path().join(&d.storage.source);
+            let mount_point = tempdir.path().join(&d.storage.mount_point);
+
+            let storage = Storage {
+                source: source.to_str().unwrap().to_string(),
+                mount_point: mount_point.to_str().unwrap().to_string(),
+                ..d.storage.clone()
+            };
+
+            if d.make_source_dir {
+                fs::create_dir_all(&storage.source).unwrap();
+            }
+            if d.make_mount_dir {
+                fs::create_dir_all(&storage.mount_point).unwrap();
+            }
+
+            if d.deny_mount_permission {
+                fs::set_permissions(
+                    mount_point.parent().unwrap(),
+                    fs::Permissions::from_mode(0o000),
+                )
+                .unwrap();
+            }
+
+            let result = mount_storage(&logger, &storage);
+
+            // restore permissions so tempdir can be cleaned up
+            if d.deny_mount_permission {
+                fs::set_permissions(
+                    mount_point.parent().unwrap(),
+                    fs::Permissions::from_mode(0o755),
+                )
+                .unwrap();
+            }
+
+            if result.is_ok() {
+                nix::mount::umount(&mount_point).unwrap();
+            }
+
+            let msg = format!("{}: result: {:?}", msg, result);
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);
+            } else {
+                assert!(result.is_err(), "{}", msg);
+                let error_msg = format!("{}", result.unwrap_err());
+                assert!(error_msg.contains(d.error_contains), "{}", msg);
+            }
+        }
+    }
+
+    #[test]
+    fn test_mount_to_rootfs() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            test_user: TestUserType,
+            src: &'a str,
+            options: Vec<&'a str>,
+            error_contains: &'a str,
+            deny_mount_dir_permission: bool,
+            // if true src will be prepended with a temporary directory
+            mask_src: bool,
+        }
+
+        impl Default for TestData<'_> {
+            fn default() -> Self {
+                TestData {
+                    test_user: TestUserType::Any,
+                    src: "src",
+                    options: vec![],
+                    error_contains: "",
+                    deny_mount_dir_permission: false,
+                    mask_src: true,
+                }
+            }
+        }
+
+        let tests = &[
+            TestData {
+                test_user: TestUserType::NonRootOnly,
+                error_contains: "EPERM: Operation not permitted",
+                ..Default::default()
+            },
+            TestData {
+                test_user: TestUserType::NonRootOnly,
+                src: "dev",
+                mask_src: false,
+                ..Default::default()
+            },
+            TestData {
+                test_user: TestUserType::RootOnly,
+                ..Default::default()
+            },
+            TestData {
+                test_user: TestUserType::NonRootOnly,
+                deny_mount_dir_permission: true,
+                error_contains: "could not create directory",
+                ..Default::default()
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+            skip_loop_by_user!(msg, d.test_user);
+
+            let drain = slog::Discard;
+            let logger = slog::Logger::root(drain, o!());
+            let tempdir = tempdir().unwrap();
+
+            let src = if d.mask_src {
+                tempdir.path().join(&d.src)
+            } else {
+                Path::new(d.src).to_path_buf()
+            };
+            let dest = tempdir.path().join("mnt");
+            let init_mount = InitMount {
+                fstype: "tmpfs",
+                src: src.to_str().unwrap(),
+                dest: dest.to_str().unwrap(),
+                options: d.options.clone(),
+            };
+
+            if d.deny_mount_dir_permission {
+                fs::set_permissions(dest.parent().unwrap(), fs::Permissions::from_mode(0o000))
+                    .unwrap();
+            }
+
+            let result = mount_to_rootfs(&logger, &init_mount);
+
+            // restore permissions so tempdir can be cleaned up
+            if d.deny_mount_dir_permission {
+                fs::set_permissions(dest.parent().unwrap(), fs::Permissions::from_mode(0o755))
+                    .unwrap();
+            }
+
+            if result.is_ok() && d.mask_src {
+                nix::mount::umount(&dest).unwrap();
+            }
+
+            let msg = format!("{}: result: {:?}", msg, result);
+            if d.error_contains.is_empty() {
+                assert!(result.is_ok(), "{}", msg);
+            } else {
+                assert!(result.is_err(), "{}", msg);
+                let error_msg = format!("{}", result.unwrap_err());
+                assert!(error_msg.contains(d.error_contains), "{}", msg);
+            }
+        }
+    }
+
    #[test]
    fn test_get_pagesize_and_size_from_option() {
        let expected_pagesize = 2048;
@@ -1552,4 +1858,263 @@ mod tests {
            }
        }
    }
+
+    #[test]
+    fn test_parse_mount_flags_and_options() {
+        #[derive(Debug)]
+        struct TestData<'a> {
+            options_vec: Vec<&'a str>,
+            result: (MsFlags, &'a str),
+        }
+
+        let tests = &[
+            TestData {
+                options_vec: vec![],
+                result: (MsFlags::empty(), ""),
+            },
+            TestData {
+                options_vec: vec!["ro"],
+                result: (MsFlags::MS_RDONLY, ""),
+            },
+            TestData {
+                options_vec: vec!["rw"],
+                result: (MsFlags::empty(), ""),
+            },
+            TestData {
+                options_vec: vec!["ro", "rw"],
+                result: (MsFlags::empty(), ""),
+            },
+            TestData {
+                options_vec: vec!["ro", "nodev"],
+                result: (MsFlags::MS_RDONLY | MsFlags::MS_NODEV, ""),
+            },
+            TestData {
+                options_vec: vec!["option1", "nodev", "option2"],
+                result: (MsFlags::MS_NODEV, "option1,option2"),
+            },
+            TestData {
+                options_vec: vec!["rbind", "", "ro"],
+                result: (MsFlags::MS_BIND | MsFlags::MS_REC | MsFlags::MS_RDONLY, ""),
+            },
+        ];
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let result = parse_mount_flags_and_options(d.options_vec.clone());
+
+            let msg = format!("{}: result: {:?}", msg, result);
+
+            let expected_result = (d.result.0, d.result.1.to_owned());
+            assert_eq!(expected_result, result, "{}", msg);
+        }
+    }
+
+    #[test]
+    fn test_set_ownership() {
+        skip_if_not_root!();
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        #[derive(Debug)]
+        struct TestData<'a> {
+            mount_path: &'a str,
+            fs_group: Option<FSGroup>,
+            read_only: bool,
+            expected_group_id: u32,
+            expected_permission: u32,
+        }
+
+        let tests = &[
+            TestData {
+                mount_path: "foo",
+                fs_group: None,
+                read_only: false,
+                expected_group_id: 0,
+                expected_permission: 0,
+            },
+            TestData {
+                mount_path: "rw_mount",
+                fs_group: Some(FSGroup {
+                    group_id: 3000,
+                    group_change_policy: FSGroupChangePolicy::Always,
+                    unknown_fields: Default::default(),
+                    cached_size: Default::default(),
+                }),
+                read_only: false,
+                expected_group_id: 3000,
+                expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID,
+            },
+            TestData {
+                mount_path: "ro_mount",
+                fs_group: Some(FSGroup {
+                    group_id: 3000,
+                    group_change_policy: FSGroupChangePolicy::OnRootMismatch,
+                    unknown_fields: Default::default(),
+                    cached_size: Default::default(),
+                }),
+                read_only: true,
+                expected_group_id: 3000,
+                expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID,
+            },
+        ];
+
+        let tempdir = tempdir().expect("failed to create tmpdir");
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let mount_dir = tempdir.path().join(d.mount_path);
+            fs::create_dir(&mount_dir)
+                .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
+
+            let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
+            let mut storage_data = Storage::new();
+            if d.read_only {
+                storage_data.set_options(RepeatedField::from_slice(&[
+                    "foo".to_string(),
+                    "ro".to_string(),
+                ]));
+            }
+            if let Some(fs_group) = d.fs_group.clone() {
+                storage_data.set_fs_group(fs_group);
+            }
+            storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap();
+
+            let result = set_ownership(&logger, &storage_data);
+            assert!(result.is_ok());
+
+            assert_eq!(
+                mount_dir.as_path().metadata().unwrap().gid(),
+                d.expected_group_id
+            );
+            assert_eq!(
+                mount_dir.as_path().metadata().unwrap().permissions().mode(),
+                (directory_mode | d.expected_permission)
+            );
+        }
+    }
+
+    #[test]
+    fn test_recursive_ownership_change() {
+        skip_if_not_root!();
+
+        const COUNT: usize = 5;
+
+        #[derive(Debug)]
+        struct TestData<'a> {
+            // Directory where the recursive ownership change should be performed on
+            path: &'a str,
+
+            // User ID for ownership change
+            uid: u32,
+
+            // Group ID for ownership change
+            gid: u32,
+
+            // Set when the permission should be read-only
+            read_only: bool,
+
+            // The expected permission of all directories after ownership change
+            expected_permission_directory: u32,
+
+            // The expected permission of all files after ownership change
+            expected_permission_file: u32,
+        }
+
+        let tests = &[
+            TestData {
+                path: "no_gid_change",
+                uid: 0,
+                gid: 0,
+                read_only: false,
+                expected_permission_directory: 0,
+                expected_permission_file: 0,
+            },
+            TestData {
+                path: "rw_gid_change",
+                uid: 0,
+                gid: 3000,
+                read_only: false,
+                expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID,
+                expected_permission_file: RW_MASK,
+            },
+            TestData {
+                path: "ro_gid_change",
+                uid: 0,
+                gid: 3000,
+                read_only: true,
+                expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID,
+                expected_permission_file: RO_MASK,
+            },
+        ];
+
+        let tempdir = tempdir().expect("failed to create tmpdir");
+
+        for (i, d) in tests.iter().enumerate() {
+            let msg = format!("test[{}]: {:?}", i, d);
+
+            let mount_dir = tempdir.path().join(d.path);
+            fs::create_dir(&mount_dir)
+                .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg));
+
+            let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode();
+            let mut file_mode: u32 = 0;
+
+            // create testing directories and files
+            for n in 1..COUNT {
+                let nest_dir = mount_dir.join(format!("nested{}", n));
+                fs::create_dir(&nest_dir)
+                    .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg));
+
+                for f in 1..COUNT {
+                    let filename = nest_dir.join(format!("file{}", f));
+                    File::create(&filename)
+                        .unwrap_or_else(|_| panic!("{}: failed to create file", msg));
+                    file_mode = filename.as_path().metadata().unwrap().permissions().mode();
+                }
+            }
+
+            let uid = if d.uid > 0 {
+                Some(Uid::from_raw(d.uid))
+            } else {
+                None
+            };
+            let gid = if d.gid > 0 {
+                Some(Gid::from_raw(d.gid))
+            } else {
+                None
+            };
+            let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only);
+
+            assert!(result.is_ok());
+
+            assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid);
+            assert_eq!(
+                mount_dir.as_path().metadata().unwrap().permissions().mode(),
+                (directory_mode | d.expected_permission_directory)
+            );
+
+            for n in 1..COUNT {
+                let nest_dir = mount_dir.join(format!("nested{}", n));
+                for f in 1..COUNT {
+                    let filename = nest_dir.join(format!("file{}", f));
+                    let file = Path::new(&filename);
+
+                    assert_eq!(file.metadata().unwrap().gid(), d.gid);
+                    assert_eq!(
+                        file.metadata().unwrap().permissions().mode(),
+                        (file_mode | d.expected_permission_file)
+                    );
+                }
+
+                let dir = Path::new(&nest_dir);
+                assert_eq!(dir.metadata().unwrap().gid(), d.gid);
+                assert_eq!(
+                    dir.metadata().unwrap().permissions().mode(),
+                    (directory_mode | d.expected_permission_directory)
+                );
+            }
+        }
+    }
 }
--- a/src/agent/src/random.rs
+++ b/src/agent/src/random.rs
@@ -3,7 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-use anyhow::Result;
+use anyhow::{ensure, Result};
 use nix::errno::Errno;
 use nix::fcntl::{self, OFlag};
 use nix::sys::stat::Mode;
@@ -13,7 +13,7 @@ use tracing::instrument;

 pub const RNGDEV: &str = "/dev/random";
 pub const RNDADDTOENTCNT: libc::c_int = 0x40045201;
-pub const RNDRESEEDRNG: libc::c_int = 0x5207;
+pub const RNDRESEEDCRNG: libc::c_int = 0x5207;

 // Handle the differing ioctl(2) request types for different targets
 #[cfg(target_env = "musl")]
@@ -24,6 +24,9 @@ type IoctlRequestType = libc::c_ulong;
 #[instrument]
 pub fn reseed_rng(data: &[u8]) -> Result<()> {
    let len = data.len() as libc::c_long;
+
+    ensure!(len > 0, "missing entropy data");
+
    fs::write(RNGDEV, data)?;

    let f = {
@@ -41,8 +44,52 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
    };
    Errno::result(ret).map(drop)?;

-    let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDRNG as IoctlRequestType, 0) };
+    let ret = unsafe { libc::ioctl(f.as_raw_fd(), RNDRESEEDCRNG as IoctlRequestType, 0) };
    Errno::result(ret).map(drop)?;

    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::skip_if_not_root;
+    use std::fs::File;
+    use std::io::prelude::*;
+
+    #[test]
+    fn test_reseed_rng() {
+        skip_if_not_root!();
+        const POOL_SIZE: usize = 512;
+        let mut f = File::open("/dev/urandom").unwrap();
+        let mut seed = [0; POOL_SIZE];
+        let n = f.read(&mut seed).unwrap();
+        // Ensure the buffer was filled.
+        assert!(n == POOL_SIZE);
+        let ret = reseed_rng(&seed);
+        assert!(ret.is_ok());
+    }
+
+    #[test]
+    fn test_reseed_rng_not_root() {
+        const POOL_SIZE: usize = 512;
+        let mut f = File::open("/dev/urandom").unwrap();
+        let mut seed = [0; POOL_SIZE];
+        let n = f.read(&mut seed).unwrap();
+        // Ensure the buffer was filled.
+        assert!(n == POOL_SIZE);
+        let ret = reseed_rng(&seed);
+        if nix::unistd::Uid::effective().is_root() {
+            assert!(ret.is_ok());
+        } else {
+            assert!(!ret.is_ok());
+        }
+    }
+
+    #[test]
+    fn test_reseed_rng_zero_data() {
+        let seed = [];
+        let ret = reseed_rng(&seed);
+        assert!(!ret.is_ok());
+    }
+}
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
--- a/src/agent/src/sandbox.rs
+++ b/src/agent/src/sandbox.rs
@@ -32,6 +32,8 @@ use tokio::sync::oneshot;
 use tokio::sync::Mutex;
 use tracing::instrument;

+pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id";
+
 type UeventWatcher = (Box<dyn UeventMatcher>, oneshot::Sender<Uevent>);

 #[derive(Debug)]
@@ -149,7 +151,12 @@ impl Sandbox {
    pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> {
        let mounts = vec![path.to_string()];
        remove_mounts(&mounts)?;
-        fs::remove_dir_all(path).context(format!("failed to remove dir {:?}", path))?;
+        // "remove_dir" will fail if the mount point is backed by a read-only filesystem.
+        // This is the case with the device mapper snapshotter, where we mount the block device directly
+        // at the underlying sandbox path which was provided from the base RO kataShared path from the host.
+        if let Err(err) = fs::remove_dir(path) {
+            warn!(self.logger, "failed to remove dir {}, {:?}", path, err);
+        }
        Ok(())
    }

@@ -232,7 +239,7 @@ impl Sandbox {
    pub fn find_container_process(&mut self, cid: &str, eid: &str) -> Result<&mut Process> {
        let ctr = self
            .get_container(cid)
-            .ok_or_else(|| anyhow!("Invalid container id"))?;
+            .ok_or_else(|| anyhow!(ERR_INVALID_CONTAINER_ID))?;

        if eid.is_empty() {
            return ctr
@@ -463,7 +470,7 @@ fn online_memory(logger: &Logger) -> Result<()> {

 #[cfg(test)]
 mod tests {
-    use super::Sandbox;
+    use super::*;
    use crate::{mount::baremount, skip_if_not_root};
    use anyhow::{anyhow, Error};
    use nix::mount::MsFlags;
@@ -473,6 +480,7 @@ mod tests {
    use rustjail::specconv::CreateOpts;
    use slog::Logger;
    use std::fs::{self, File};
+    use std::io::prelude::*;
    use std::os::unix::fs::PermissionsExt;
    use std::path::Path;
    use tempfile::{tempdir, Builder, TempDir};
@@ -562,19 +570,8 @@ mod tests {
            .remove_sandbox_storage(invalid_dir.to_str().unwrap())
            .is_err());

-        // Now, create a double mount as this guarantees the directory cannot
-        // be deleted after the first umount.
-        for _i in 0..2 {
-            assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());
-        }
+        assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok());

-        assert!(
-            s.remove_sandbox_storage(destdir_path).is_err(),
-            "Expect fail as deletion cannot happen due to the second mount."
-        );
-
-        // This time it should work as the previous two calls have undone the double
-        // mount.
        assert!(s.remove_sandbox_storage(destdir_path).is_ok());
    }

@@ -851,4 +848,259 @@ mod tests {
        let p = s.find_container_process("not-exist-cid", "");
        assert!(p.is_err(), "Expecting Error, Got {:?}", p);
    }
+
+    #[tokio::test]
+    async fn test_find_process() {
+        let logger = slog::Logger::root(slog::Discard, o!());
+
+        let test_pids = [std::i32::MIN, -1, 0, 1, std::i32::MAX];
+
+        for test_pid in test_pids {
+            let mut s = Sandbox::new(&logger).unwrap();
+            let (mut linux_container, _root) = create_linuxcontainer();
+
+            let mut test_process = Process::new(
+                &logger,
+                &oci::Process::default(),
+                "this_is_a_test_process",
+                true,
+                1,
+            )
+            .unwrap();
+            // processes interally only have pids when manually set
+            test_process.pid = test_pid;
+
+            linux_container.processes.insert(test_pid, test_process);
+
+            s.add_container(linux_container);
+
+            let find_result = s.find_process(test_pid);
+
+            // test first if it finds anything
+            assert!(find_result.is_some(), "Should be able to find a process");
+
+            let found_process = find_result.unwrap();
+
+            // then test if it founds the correct process
+            assert_eq!(
+                found_process.pid, test_pid,
+                "Should be able to find correct process"
+            );
+        }
+
+        // to test for nonexistent pids, any pid that isn't the one set
+        // above should work, as linuxcontainer starts with no processes
+        let mut s = Sandbox::new(&logger).unwrap();
+
+        let nonexistent_test_pid = 1234;
+
+        let find_result = s.find_process(nonexistent_test_pid);
+
+        assert!(
+            find_result.is_none(),
+            "Shouldn't find a process for non existent pid"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_online_resources() {
+        #[derive(Debug, Default)]
+        struct TestFile {
+            name: String,
+            content: String,
+        }
+
+        #[derive(Debug, Default)]
+        struct TestDirectory<'a> {
+            name: String,
+            files: &'a [TestFile],
+        }
+
+        #[derive(Debug)]
+        struct TestData<'a> {
+            directory_autogen_name: String,
+            number_autogen_directories: u32,
+
+            extra_directories: &'a [TestDirectory<'a>],
+            pattern: String,
+            to_enable: i32,
+
+            result: Result<i32>,
+        }
+
+        impl Default for TestData<'_> {
+            fn default() -> Self {
+                TestData {
+                    directory_autogen_name: Default::default(),
+                    number_autogen_directories: Default::default(),
+                    extra_directories: Default::default(),
+                    pattern: Default::default(),
+                    to_enable: Default::default(),
+                    result: Ok(Default::default()),
+                }
+            }
+        }
+
+        let tests = &[
+            // 4 well formed directories, request enabled 4,
+            // correct result 4 enabled, should pass
+            TestData {
+                directory_autogen_name: String::from("cpu"),
+                number_autogen_directories: 4,
+                pattern: String::from(r"cpu[0-9]+"),
+                to_enable: 4,
+                result: Ok(4),
+                ..Default::default()
+            },
+            // 0 well formed directories, request enabled 4,
+            // correct result 0 enabled, should pass
+            TestData {
+                number_autogen_directories: 0,
+                to_enable: 4,
+                result: Ok(0),
+                ..Default::default()
+            },
+            // 10 well formed directories, request enabled 4,
+            // correct result 4 enabled, should pass
+            TestData {
+                directory_autogen_name: String::from("cpu"),
+                number_autogen_directories: 10,
+                pattern: String::from(r"cpu[0-9]+"),
+                to_enable: 4,
+                result: Ok(4),
+                ..Default::default()
+            },
+            // 0 well formed directories, request enabled 0,
+            // correct result 0 enabled, should pass
+            TestData {
+                number_autogen_directories: 0,
+                pattern: String::from(r"cpu[0-9]+"),
+                to_enable: 0,
+                result: Ok(0),
+                ..Default::default()
+            },
+            // 4 well formed directories, 1 malformed (no online file),
+            // request enable 5, correct result 4
+            TestData {
+                directory_autogen_name: String::from("cpu"),
+                number_autogen_directories: 4,
+                pattern: String::from(r"cpu[0-9]+"),
+                extra_directories: &[TestDirectory {
+                    name: String::from("cpu4"),
+                    files: &[],
+                }],
+                to_enable: 5,
+                result: Ok(4),
+            },
+            // 3 malformed directories (no online files),
+            // request enable 3, correct result 0
+            TestData {
+                pattern: String::from(r"cpu[0-9]+"),
+                extra_directories: &[
+                    TestDirectory {
+                        name: String::from("cpu0"),
+                        files: &[],
+                    },
+                    TestDirectory {
+                        name: String::from("cpu1"),
+                        files: &[],
+                    },
+                    TestDirectory {
+                        name: String::from("cpu2"),
+                        files: &[],
+                    },
+                ],
+                to_enable: 3,
+                result: Ok(0),
+                ..Default::default()
+            },
+            // 1 malformed directories (online file with content "1"),
+            // request enable 1, correct result 0
+            TestData {
+                pattern: String::from(r"cpu[0-9]+"),
+                extra_directories: &[TestDirectory {
+                    name: String::from("cpu0"),
+                    files: &[TestFile {
+                        name: SYSFS_ONLINE_FILE.to_string(),
+                        content: String::from("1"),
+                    }],
+                }],
+                to_enable: 1,
+                result: Ok(0),
+                ..Default::default()
+            },
+            // 2 well formed directories, 1 malformed (online file with content "1"),
+            // request enable 3, correct result 2
+            TestData {
+                directory_autogen_name: String::from("cpu"),
+                number_autogen_directories: 2,
+                pattern: String::from(r"cpu[0-9]+"),
+                extra_directories: &[TestDirectory {
+                    name: String::from("cpu2"),
+                    files: &[TestFile {
+                        name: SYSFS_ONLINE_FILE.to_string(),
+                        content: String::from("1"),
+                    }],
+                }],
+                to_enable: 3,
+                result: Ok(2),
+            },
+        ];
+
+        let logger = slog::Logger::root(slog::Discard, o!());
+        let tmpdir = Builder::new().tempdir().unwrap();
+        let tmpdir_path = tmpdir.path().to_str().unwrap();
+
+        for (i, d) in tests.iter().enumerate() {
+            let current_test_dir_path = format!("{}/test_{}", tmpdir_path, i);
+            fs::create_dir(&current_test_dir_path).unwrap();
+
+            // create numbered directories and fill using root name
+            for j in 0..d.number_autogen_directories {
+                let subdir_path = format!(
+                    "{}/{}{}",
+                    current_test_dir_path, d.directory_autogen_name, j
+                );
+                let subfile_path = format!("{}/{}", subdir_path, SYSFS_ONLINE_FILE);
+                fs::create_dir(&subdir_path).unwrap();
+                let mut subfile = File::create(subfile_path).unwrap();
+                subfile.write_all(b"0").unwrap();
+            }
+            // create extra directories and fill to specification
+            for j in d.extra_directories {
+                let subdir_path = format!("{}/{}", current_test_dir_path, j.name);
+                fs::create_dir(&subdir_path).unwrap();
+                for file in j.files {
+                    let subfile_path = format!("{}/{}", subdir_path, file.name);
+                    let mut subfile = File::create(&subfile_path).unwrap();
+                    subfile.write_all(file.content.as_bytes()).unwrap();
+                }
+            }
+
+            // run created directory structure against online_resources
+            let result = online_resources(&logger, &current_test_dir_path, &d.pattern, d.to_enable);
+
+            let mut msg = format!(
+                "test[{}]: {:?}, expected {}, actual {}",
+                i,
+                d,
+                d.result.is_ok(),
+                result.is_ok()
+            );
+
+            assert_eq!(result.is_ok(), d.result.is_ok(), "{}", msg);
+
+            if d.result.is_ok() {
+                let test_result_val = *d.result.as_ref().ok().unwrap();
+                let result_val = result.ok().unwrap();
+
+                msg = format!(
+                    "test[{}]: {:?}, expected {}, actual {}",
+                    i, d, test_result_val, result_val
+                );
+
+                assert_eq!(test_result_val, result_val, "{}", msg);
+            }
+        }
+    }
 }
--- a/src/agent/src/test_utils.rs
+++ b/src/agent/src/test_utils.rs
@@ -5,7 +5,14 @@
 #![allow(clippy::module_inception)]

 #[cfg(test)]
-mod test_utils {
+pub mod test_utils {
+    #[derive(Debug, PartialEq)]
+    pub enum TestUserType {
+        RootOnly,
+        NonRootOnly,
+        Any,
+    }
+
    #[macro_export]
    macro_rules! skip_if_root {
        () => {
@@ -53,4 +60,40 @@ mod test_utils {
            }
        };
    }
+
+    // Parameters:
+    //
+    // 1: expected Result
+    // 2: actual Result
+    // 3: string used to identify the test on error
+    #[macro_export]
+    macro_rules! assert_result {
+        ($expected_result:expr, $actual_result:expr, $msg:expr) => {
+            if $expected_result.is_ok() {
+                let expected_value = $expected_result.as_ref().unwrap();
+                let actual_value = $actual_result.unwrap();
+                assert!(*expected_value == actual_value, "{}", $msg);
+            } else {
+                assert!($actual_result.is_err(), "{}", $msg);
+
+                let expected_error = $expected_result.as_ref().unwrap_err();
+                let expected_error_msg = format!("{:?}", expected_error);
+
+                let actual_error_msg = format!("{:?}", $actual_result.unwrap_err());
+
+                assert!(expected_error_msg == actual_error_msg, "{}", $msg);
+            }
+        };
+    }
+
+    #[macro_export]
+    macro_rules! skip_loop_by_user {
+        ($msg:expr, $user:expr) => {
+            if $user == TestUserType::RootOnly {
+                skip_loop_if_not_root!($msg);
+            } else if $user == TestUserType::NonRootOnly {
+                skip_loop_if_root!($msg);
+            }
+        };
+    }
 }
--- a/src/agent/src/watcher.rs
+++ b/src/agent/src/watcher.rs
@@ -6,6 +6,7 @@
 #![allow(unknown_lints)]

 use std::collections::HashMap;
+use std::os::unix::fs::MetadataExt;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::SystemTime;
@@ -13,6 +14,7 @@ use std::time::SystemTime;
 use anyhow::{ensure, Context, Result};
 use async_recursion::async_recursion;
 use nix::mount::{umount, MsFlags};
+use nix::unistd::{Gid, Uid};
 use slog::{debug, error, info, warn, Logger};
 use thiserror::Error;
 use tokio::fs;
@@ -80,7 +82,8 @@ impl Drop for Storage {
 }

 async fn copy(from: impl AsRef<Path>, to: impl AsRef<Path>) -> Result<()> {
-    if fs::symlink_metadata(&from).await?.file_type().is_symlink() {
+    let metadata = fs::symlink_metadata(&from).await?;
+    if metadata.file_type().is_symlink() {
        // if source is a symlink, create new symlink with same link source. If
        // the symlink exists, remove and create new one:
        if fs::symlink_metadata(&to).await.is_ok() {
@@ -88,8 +91,15 @@ async fn copy(from: impl AsRef<Path>, to: impl AsRef<Path>) -> Result<()> {
        }
        fs::symlink(fs::read_link(&from).await?, &to).await?;
    } else {
-        fs::copy(from, to).await?;
+        fs::copy(&from, &to).await?;
    }
+    // preserve the source uid and gid to the destination.
+    nix::unistd::chown(
+        to.as_ref(),
+        Some(Uid::from_raw(metadata.uid())),
+        Some(Gid::from_raw(metadata.gid())),
+    )?;
+
    Ok(())
 }

@@ -106,14 +116,29 @@ impl Storage {

    async fn update_target(&self, logger: &Logger, source_path: impl AsRef<Path>) -> Result<()> {
        let source_file_path = source_path.as_ref();
+        let metadata = source_file_path.symlink_metadata()?;

        // if we are creating a directory: just create it, nothing more to do
-        if source_file_path.symlink_metadata()?.file_type().is_dir() {
+        if metadata.file_type().is_dir() {
            let dest_file_path = self.make_target_path(&source_file_path)?;

            fs::create_dir_all(&dest_file_path)
                .await
                .with_context(|| format!("Unable to mkdir all for {}", dest_file_path.display()))?;
+            // set the directory permissions to match the source directory permissions
+            fs::set_permissions(&dest_file_path, metadata.permissions())
+                .await
+                .with_context(|| {
+                    format!("Unable to set permissions for {}", dest_file_path.display())
+                })?;
+            // preserve the source directory uid and gid to the destination.
+            nix::unistd::chown(
+                &dest_file_path,
+                Some(Uid::from_raw(metadata.uid())),
+                Some(Gid::from_raw(metadata.gid())),
+            )
+            .with_context(|| format!("Unable to set ownership for {}", dest_file_path.display()))?;
+
            return Ok(());
        }

@@ -504,6 +529,7 @@ mod tests {
    use super::*;
    use crate::mount::is_mounted;
    use crate::skip_if_not_root;
+    use nix::unistd::{Gid, Uid};
    use std::fs;
    use std::thread;

@@ -895,20 +921,28 @@ mod tests {

    #[tokio::test]
    async fn test_copy() {
+        skip_if_not_root!();
+
        // prepare tmp src/destination
        let source_dir = tempfile::tempdir().unwrap();
        let dest_dir = tempfile::tempdir().unwrap();
+        let uid = Uid::from_raw(10);
+        let gid = Gid::from_raw(200);

        // verify copy of a regular file
        let src_file = source_dir.path().join("file.txt");
        let dst_file = dest_dir.path().join("file.txt");
        fs::write(&src_file, "foo").unwrap();
+        nix::unistd::chown(&src_file, Some(uid), Some(gid)).unwrap();
+
        copy(&src_file, &dst_file).await.unwrap();
        // verify destination:
-        assert!(!fs::symlink_metadata(dst_file)
+        assert!(!fs::symlink_metadata(&dst_file)
            .unwrap()
            .file_type()
            .is_symlink());
+        assert_eq!(fs::metadata(&dst_file).unwrap().uid(), uid.as_raw());
+        assert_eq!(fs::metadata(&dst_file).unwrap().gid(), gid.as_raw());

        // verify copy of a symlink
        let src_symlink_file = source_dir.path().join("symlink_file.txt");
@@ -916,7 +950,7 @@ mod tests {
        tokio::fs::symlink(&src_file, &src_symlink_file)
            .await
            .unwrap();
-        copy(src_symlink_file, &dst_symlink_file).await.unwrap();
+        copy(&src_symlink_file, &dst_symlink_file).await.unwrap();
        // verify destination:
        assert!(fs::symlink_metadata(&dst_symlink_file)
            .unwrap()
@@ -924,6 +958,8 @@ mod tests {
            .is_symlink());
        assert_eq!(fs::read_link(&dst_symlink_file).unwrap(), src_file);
        assert_eq!(fs::read_to_string(&dst_symlink_file).unwrap(), "foo");
+        assert_ne!(fs::metadata(&dst_symlink_file).unwrap().uid(), uid.as_raw());
+        assert_ne!(fs::metadata(&dst_symlink_file).unwrap().gid(), gid.as_raw());
    }

    #[tokio::test]
@@ -1069,6 +1105,8 @@ mod tests {

    #[tokio::test]
    async fn watch_directory() {
+        skip_if_not_root!();
+
        // Prepare source directory:
        // ./tmp/1.txt
        // ./tmp/A/B/2.txt
@@ -1079,7 +1117,9 @@ mod tests {

        // A/C is an empty directory
        let empty_dir = "A/C";
-        fs::create_dir_all(source_dir.path().join(empty_dir)).unwrap();
+        let path = source_dir.path().join(empty_dir);
+        fs::create_dir_all(&path).unwrap();
+        nix::unistd::chown(&path, Some(Uid::from_raw(10)), Some(Gid::from_raw(200))).unwrap();

        // delay 20 ms between writes to files in order to ensure filesystem timestamps are unique
        thread::sleep(Duration::from_millis(20));
@@ -1123,7 +1163,9 @@ mod tests {

        // create another empty directory A/C/D
        let empty_dir = "A/C/D";
-        fs::create_dir_all(source_dir.path().join(empty_dir)).unwrap();
+        let path = source_dir.path().join(empty_dir);
+        fs::create_dir_all(&path).unwrap();
+        nix::unistd::chown(&path, Some(Uid::from_raw(10)), Some(Gid::from_raw(200))).unwrap();
        assert_eq!(entry.scan(&logger).await.unwrap(), 1);
        assert!(dest_dir.path().join(empty_dir).exists());
    }
--- a/src/libs/Cargo.lock
+++ b/src/libs/Cargo.lock
@@ -0,0 +1,897 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "anyhow"
+version = "1.0.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc"
+
+[[package]]
+name = "arc-swap"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
+
+[[package]]
+name = "async-trait"
+version = "0.1.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
+
+[[package]]
+name = "bitflags"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
+
+[[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
+name = "bytes"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
+dependencies = [
+ "byteorder",
+ "iovec",
+]
+
+[[package]]
+name = "bytes"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
+
+[[package]]
+name = "cc"
+version = "1.0.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
+dependencies = [
+ "libc",
+ "num-integer",
+ "num-traits",
+ "time",
+ "winapi",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120"
+dependencies = [
+ "cfg-if",
+ "lazy_static",
+]
+
+[[package]]
+name = "derive-new"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "fastrand"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2"
+dependencies = [
+ "instant",
+]
+
+[[package]]
+name = "fixedbitset"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
+
+[[package]]
+name = "futures"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868"
+
+[[package]]
+name = "futures-task"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"
+
+[[package]]
+name = "futures-util"
+version = "0.3.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
+
+[[package]]
+name = "heck"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "iovec"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.124"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
+
+[[package]]
+name = "log"
+version = "0.4.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "logging"
+version = "0.1.0"
+dependencies = [
+ "serde_json",
+ "slog",
+ "slog-async",
+ "slog-json",
+ "slog-scope",
+ "tempfile",
+]
+
+[[package]]
+name = "memchr"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+
+[[package]]
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "mio"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52da4364ffb0e4fe33a9841a98a3f3014fb964045ce4f7a45a398243c8d6b0c9"
+dependencies = [
+ "libc",
+ "log",
+ "miow",
+ "ntapi",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "winapi",
+]
+
+[[package]]
+name = "miow"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "multimap"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
+
+[[package]]
+name = "nix"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5e06129fb611568ef4e868c14b326274959aa70ff7776e9d55323531c374945"
+dependencies = [
+ "bitflags",
+ "cc",
+ "cfg-if",
+ "libc",
+ "memoffset",
+]
+
+[[package]]
+name = "nix"
+version = "0.23.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6"
+dependencies = [
+ "bitflags",
+ "cc",
+ "cfg-if",
+ "libc",
+ "memoffset",
+]
+
+[[package]]
+name = "ntapi"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
+
+[[package]]
+name = "petgraph"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7"
+dependencies = [
+ "fixedbitset",
+ "indexmap",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "prost"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de5e2533f59d08fcf364fd374ebda0692a70bd6d7e66ef97f306f45c6c5d8020"
+dependencies = [
+ "bytes 1.1.0",
+ "prost-derive",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603"
+dependencies = [
+ "bytes 1.1.0",
+ "heck",
+ "itertools",
+ "log",
+ "multimap",
+ "petgraph",
+ "prost",
+ "prost-types",
+ "tempfile",
+ "which",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba"
+dependencies = [
+ "anyhow",
+ "itertools",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "prost-types"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "603bbd6394701d13f3f25aada59c7de9d35a6a5887cfc156181234a44002771b"
+dependencies = [
+ "bytes 1.1.0",
+ "prost",
+]
+
+[[package]]
+name = "protobuf"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e86d370532557ae7573551a1ec8235a0f8d6cb276c7c9e6aa490b511c447485"
+dependencies = [
+ "serde",
+ "serde_derive",
+]
+
+[[package]]
+name = "protobuf-codegen"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de113bba758ccf2c1ef816b127c958001b7831136c9bc3f8e9ec695ac4e82b0c"
+dependencies = [
+ "protobuf",
+]
+
+[[package]]
+name = "protobuf-codegen-pure"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d1a4febc73bf0cada1d77c459a0c8e5973179f1cfd5b0f1ab789d45b17b6440"
+dependencies = [
+ "protobuf",
+ "protobuf-codegen",
+]
+
+[[package]]
+name = "protocols"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "protobuf",
+ "serde",
+ "serde_json",
+ "ttrpc",
+ "ttrpc-codegen",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
+
+[[package]]
+name = "safe-path"
+version = "0.1.0"
+dependencies = [
+ "libc",
+ "tempfile",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.133"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.133"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed201699328568d8d08208fdd080e3ff594e6c422e438b6705905da01005d537"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
+
+[[package]]
+name = "slog"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
+
+[[package]]
+name = "slog-async"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe"
+dependencies = [
+ "crossbeam-channel",
+ "slog",
+ "take_mut",
+ "thread_local",
+]
+
+[[package]]
+name = "slog-json"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52e9b96fb6b5e80e371423b4aca6656eb537661ce8f82c2697e619f8ca85d043"
+dependencies = [
+ "chrono",
+ "serde",
+ "serde_json",
+ "slog",
+]
+
+[[package]]
+name = "slog-scope"
+version = "4.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
+dependencies = [
+ "arc-swap",
+ "lazy_static",
+ "slog",
+]
+
+[[package]]
+name = "socket2"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "take_mut"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
+
+[[package]]
+name = "tempfile"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "libc",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "time"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
+dependencies = [
+ "libc",
+ "wasi 0.10.0+wasi-snapshot-preview1",
+ "winapi",
+]
+
+[[package]]
+name = "tokio"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee"
+dependencies = [
+ "bytes 1.1.0",
+ "libc",
+ "memchr",
+ "mio",
+ "pin-project-lite",
+ "socket2",
+ "tokio-macros",
+ "winapi",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio-vsock"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e0723fc001950a3b018947b05eeb45014fd2b7c6e8f292502193ab74486bdb6"
+dependencies = [
+ "bytes 0.4.12",
+ "futures",
+ "libc",
+ "tokio",
+ "vsock",
+]
+
+[[package]]
+name = "ttrpc"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66a973ce6d5eaa20c173635b29ffb660dafbc7ef109172c0015ba44e47a23711"
+dependencies = [
+ "async-trait",
+ "byteorder",
+ "futures",
+ "libc",
+ "log",
+ "nix 0.20.2",
+ "protobuf",
+ "protobuf-codegen-pure",
+ "thiserror",
+ "tokio",
+ "tokio-vsock",
+]
+
+[[package]]
+name = "ttrpc-codegen"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809eda4e459820237104e4b61d6b41bbe6c9e1ce6adf4057955e6e6722a90408"
+dependencies = [
+ "protobuf",
+ "protobuf-codegen",
+ "protobuf-codegen-pure",
+ "ttrpc-compiler",
+]
+
+[[package]]
+name = "ttrpc-compiler"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2978ed3fa047d8fd55cbeb4d4a61d461fb3021a90c9618519c73ce7e5bb66c15"
+dependencies = [
+ "derive-new",
+ "prost",
+ "prost-build",
+ "prost-types",
+ "protobuf",
+ "protobuf-codegen",
+ "tempfile",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "vsock"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e32675ee2b3ce5df274c0ab52d19b28789632406277ca26bffee79a8e27dc133"
+dependencies = [
+ "libc",
+ "nix 0.23.1",
+]
+
+[[package]]
+name = "wasi"
+version = "0.10.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "which"
+version = "4.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c4fb54e6113b6a8772ee41c3404fb0301ac79604489467e0a9ce1f3e97c24ae"
+dependencies = [
+ "either",
+ "lazy_static",
+ "libc",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
--- a/src/libs/Cargo.toml
+++ b/src/libs/Cargo.toml
@@ -0,0 +1,7 @@
+[workspace]
+members = [
+    "logging",
+    "safe-path",
+    "protocols",
+]
+resolver = "2"
--- a/src/libs/README.md
+++ b/src/libs/README.md
@@ -0,0 +1,10 @@
+The `src/libs` directory hosts library crates which may be shared by multiple Kata Containers components
+or published to [`crates.io`](https://crates.io/index.html).
+
+### Library Crates
+Currently it provides following library crates:
+
+| Library | Description |
+|-|-|-|
+| [logging](logging/) | Facilities to setup logging subsystem based slog. |
+| [safe-path](safe-path/) | Utilities to safely resolve filesystem paths. |
--- a/src/libs/logging/Cargo.lock
+++ b/src/libs/logging/Cargo.lock
@@ -1,321 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "arc-swap"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f"
-
-[[package]]
-name = "autocfg"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
-
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "chrono"
-version = "0.4.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
-dependencies = [
- "libc",
- "num-integer",
- "num-traits",
- "time",
- "winapi",
-]
-
-[[package]]
-name = "crossbeam-channel"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
-dependencies = [
- "cfg-if",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
-dependencies = [
- "cfg-if",
- "lazy_static",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
-
-[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
-[[package]]
-name = "libc"
-version = "0.2.112"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
-
-[[package]]
-name = "logging"
-version = "0.1.0"
-dependencies = [
- "serde_json",
- "slog",
- "slog-async",
- "slog-json",
- "slog-scope",
- "tempfile",
-]
-
-[[package]]
-name = "num-integer"
-version = "0.1.44"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
-dependencies = [
- "autocfg",
- "num-traits",
-]
-
-[[package]]
-name = "num-traits"
-version = "0.2.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
-
-[[package]]
-name = "rand"
-version = "0.8.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
-dependencies = [
- "libc",
- "rand_chacha",
- "rand_core",
- "rand_hc",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "rand_hc"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
-dependencies = [
- "rand_core",
-]
-
-[[package]]
-name = "redox_syscall"
-version = "0.2.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
-dependencies = [
- "bitflags",
-]
-
-[[package]]
-name = "remove_dir_all"
-version = "0.5.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
-dependencies = [
- "winapi",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
-
-[[package]]
-name = "serde"
-version = "1.0.131"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1"
-
-[[package]]
-name = "serde_json"
-version = "1.0.73"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5"
-dependencies = [
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "slog"
-version = "2.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
-
-[[package]]
-name = "slog-async"
-version = "2.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe"
-dependencies = [
- "crossbeam-channel",
- "slog",
- "take_mut",
- "thread_local",
-]
-
-[[package]]
-name = "slog-json"
-version = "2.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52e9b96fb6b5e80e371423b4aca6656eb537661ce8f82c2697e619f8ca85d043"
-dependencies = [
- "chrono",
- "serde",
- "serde_json",
- "slog",
-]
-
-[[package]]
-name = "slog-scope"
-version = "4.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
-dependencies = [
- "arc-swap",
- "lazy_static",
- "slog",
-]
-
-[[package]]
-name = "take_mut"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
-
-[[package]]
-name = "tempfile"
-version = "3.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22"
-dependencies = [
- "cfg-if",
- "libc",
- "rand",
- "redox_syscall",
- "remove_dir_all",
- "winapi",
-]
-
-[[package]]
-name = "thread_local"
-version = "1.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
-dependencies = [
- "once_cell",
-]
-
-[[package]]
-name = "time"
-version = "0.1.43"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
-dependencies = [
- "libc",
- "winapi",
-]
-
-[[package]]
-name = "wasi"
-version = "0.10.2+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
--- a/src/libs/oci/src/lib.rs
+++ b/src/libs/oci/src/lib.rs
@@ -381,7 +381,7 @@ pub struct LinuxMemory {
    #[serde(default, skip_serializing_if = "Option::is_none", rename = "kernelTCP")]
    pub kernel_tcp: Option<i64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub swappiness: Option<i64>,
+    pub swappiness: Option<u64>,
    #[serde(
        default,
        skip_serializing_if = "Option::is_none",
--- a/src/libs/protocols/.gitignore
+++ b/src/libs/protocols/.gitignore
@@ -1,6 +1,7 @@
 Cargo.lock
 src/agent.rs
 src/agent_ttrpc.rs
+src/csi.rs
 src/empty.rs
 src/health.rs
 src/health_ttrpc.rs
--- a/src/libs/protocols/protos/agent.proto
+++ b/src/libs/protocols/protos/agent.proto
@@ -51,6 +51,8 @@ service AgentService {
 	rpc ListInterfaces(ListInterfacesRequest) returns(Interfaces);
 	rpc ListRoutes(ListRoutesRequest) returns (Routes);
 	rpc AddARPNeighbors(AddARPNeighborsRequest) returns (google.protobuf.Empty);
+	rpc GetIPTables(GetIPTablesRequest) returns (GetIPTablesResponse);
+	rpc SetIPTables(SetIPTablesRequest) returns (SetIPTablesResponse);

 	// observability
 	rpc GetMetrics(GetMetricsRequest) returns (Metrics);
@@ -328,6 +330,28 @@ message AddARPNeighborsRequest {
       ARPNeighbors neighbors = 1;
 }

+message GetIPTablesRequest {
+       bool is_ipv6 = 1;
+}
+
+message GetIPTablesResponse{
+        // raw stdout from iptables-save or ip6tables-save
+        bytes data = 1;
+}
+
+message SetIPTablesRequest {
+       bool is_ipv6 = 1;
+
+       // iptables, in raw format expected to be passed to stdin
+       // of iptables-save or ip6tables-save
+       bytes data = 2;
+}
+
+message SetIPTablesResponse{
+        // raw stdout from iptables-restore or ip6tables-restore
+        bytes data = 1;
+}
+
 message OnlineCPUMemRequest {
 	// Wait specifies if the caller waits for the agent to online all resources.
 	// If true the agent returns once all resources have been connected, otherwise all
@@ -399,6 +423,17 @@ message SetGuestDateTimeRequest {
 	int64 Usec = 2;
 }

+// FSGroup consists of the group id and group ownership change policy
+// that a volume should have its ownership changed to.
+message FSGroup {
+	// GroupID is the ID that the group ownership of the
+	// files in the mounted volume will need to be changed to.
+	uint32 group_id = 2;
+	// GroupChangePolicy specifies the policy for applying group id
+	// ownership change on a mounted volume.
+	types.FSGroupChangePolicy group_change_policy = 3;
+}
+
 // Storage represents both the rootfs of the container, and any volume that
 // could have been defined through the Mount list of the OCI specification.
 message Storage {
@@ -422,11 +457,14 @@ message Storage {
 	// device, "9p" for shared filesystem, or "tmpfs" for shared /dev/shm.
 	string fstype = 4;
 	// Options describes the additional options that might be needed to
-	// mount properly the storage filesytem.
+	// mount properly the storage filesystem.
 	repeated string options = 5;
 	// MountPoint refers to the path where the storage should be mounted
 	// inside the VM.
 	string mount_point = 6;
+	// FSGroup consists of the group ID and group ownership change policy
+	// that the mounted volume must have its group ID changed to when specified.
+	FSGroup fs_group = 7;
 }

 // Device represents only the devices that could have been defined through the
--- a/src/libs/protocols/protos/types.proto
+++ b/src/libs/protocols/protos/types.proto
@@ -16,6 +16,15 @@ enum IPFamily {
 	v6 = 1;
 }

+// FSGroupChangePolicy defines the policy for applying group id ownership change on a mounted volume.
+enum FSGroupChangePolicy {
+	// Always indicates that the volume ownership will always be changed.
+	Always = 0;
+	// OnRootMismatch indicates that the volume ownership will be changed only
+	// when the ownership of the root directory does not match with the expected group id for the volume.
+	OnRootMismatch = 1;
+}
+
 message IPAddress {
 	IPFamily family = 1;
 	string address = 2;
--- a/src/libs/safe-path/Cargo.toml
+++ b/src/libs/safe-path/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "safe-path"
+version = "0.1.0"
+description = "A library to safely handle file system paths for container runtimes"
+keywords = ["kata", "container", "path", "securejoin"]
+categories = ["parser-implementations", "filesystem"]
+authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
+repository = "https://github.com/kata-containers/kata-containers.git"
+homepage = "https://katacontainers.io/"
+readme = "README.md"
+license = "Apache-2.0"
+edition = "2018"
+
+[dependencies]
+libc = "0.2.100"
+
+[dev-dependencies]
+tempfile = "3.2.0"
--- a/src/libs/safe-path/README.md
+++ b/src/libs/safe-path/README.md
@@ -0,0 +1,21 @@
+Safe Path
+====================
+[![CI](https://github.com/magiclen/path-absolutize/actions/workflows/ci.yml/badge.svg)](https://github.com/magiclen/path-absolutize/actions/workflows/ci.yml)
+
+A library to safely handle filesystem paths, typically for container runtimes.
+
+There are often path related attacks, such as symlink based attacks, TOCTTOU attacks. The `safe-path` crate
+provides several functions and utility structures to protect against path resolution related attacks.
+
+## Support
+
+**Operating Systems**:
+- Linux
+
+## Reference
+- [`filepath-securejoin`](https://github.com/cyphar/filepath-securejoin): secure_join() written in Go.
+- [CVE-2021-30465](https://github.com/advisories/GHSA-c3xm-pvg7-gh7r): symlink related TOCTOU flaw in `runC`.
+
+## License
+
+This code is licensed under [Apache-2.0](../../../LICENSE).
--- a/src/libs/safe-path/src/lib.rs
+++ b/src/libs/safe-path/src/lib.rs
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 Alibaba Cloud
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! A library to safely handle filesystem paths, typically for container runtimes.
+//!
+//! Linux [mount namespace](https://man7.org/linux/man-pages/man7/mount_namespaces.7.html)
+//! provides isolation of the list of mounts seen by the processes in each
+//! [namespace](https://man7.org/linux/man-pages/man7/namespaces.7.html) instance.
+//! Thus, the processes in each of the mount namespace instances will see distinct single-directory
+//! hierarchies.
+//!
+//! Containers are used to isolate workloads from the host system. Container on Linux systems
+//! depends on the mount namespace to build an isolated root filesystem for each container,
+//! thus protect the host and containers from each other. When creating containers, the container
+//! runtime needs to setup filesystem mounts for container rootfs/volumes. Configuration for
+//! mounts/paths may be indirectly controlled by end users through:
+//! - container images
+//! - Kubernetes pod specifications
+//! - hook command line arguments
+//!
+//! These volume configuration information may be controlled by end users/malicious attackers,
+//! so it must not be trusted by container runtimes. When the container runtime is preparing mount
+//! namespace for a container, it must be very careful to validate user input configuration
+//! information and ensure data out of the container rootfs directory won't be affected
+//! by the container. There are several types of attacks related to container mount namespace:
+//! - symlink based attack
+//! - Time of check to time of use (TOCTTOU)
+//!
+//! This crate provides several mechanisms for container runtimes to safely handle filesystem paths
+//! when preparing mount namespace for containers.
+//! - [scoped_join()](crate::scoped_join()): safely join `unsafe_path` to `root`, and ensure
+//!   `unsafe_path` is scoped under `root`.
+//! - [scoped_resolve()](crate::scoped_resolve()): resolve `unsafe_path` to a relative path,
+//!   rooted at and constrained by `root`.
+//! - [struct PinnedPathBuf](crate::PinnedPathBuf): safe version of `PathBuf` to protect from
+//!   TOCTTOU style of attacks, which ensures:
+//!     - the value of [`PinnedPathBuf::as_path()`] never changes.
+//!     - the path returned by [`PinnedPathBuf::as_path()`] is always a symlink.
+//!     - the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never changes.
+//!     - the value of [`PinnedPathBuf::target()`] never changes.
+//! - [struct ScopedDirBuilder](crate::ScopedDirBuilder): safe version of `DirBuilder` to protect
+//!   from symlink race and TOCTTOU style of attacks, which enhances security by:
+//!     - ensuring the new directories are created under a specified `root` directory.
+//!     - avoiding symlink race attacks during making directories.
+//!     - returning a [PinnedPathBuf] for the last level of directory, so it could be used for other
+//!       operations safely.
+//!
+//! The work is inspired by:
+//! - [`filepath-securejoin`](https://github.com/cyphar/filepath-securejoin): secure_join() written
+//! in Go.
+//! - [CVE-2021-30465](https://github.com/advisories/GHSA-c3xm-pvg7-gh7r): symlink related TOCTOU
+//! flaw in `runC`.
+
+#![deny(missing_docs)]
+
+mod pinned_path_buf;
+pub use pinned_path_buf::PinnedPathBuf;
+
+mod scoped_dir_builder;
+pub use scoped_dir_builder::ScopedDirBuilder;
+
+mod scoped_path_resolver;
+pub use scoped_path_resolver::{scoped_join, scoped_resolve};
--- a/src/libs/safe-path/src/pinned_path_buf.rs
+++ b/src/libs/safe-path/src/pinned_path_buf.rs
@@ -0,0 +1,444 @@
+// Copyright (c) 2022 Alibaba Cloud
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::ffi::{CString, OsStr};
+use std::fs::{self, File, Metadata, OpenOptions};
+use std::io::{Error, ErrorKind, Result};
+use std::ops::Deref;
+use std::os::unix::ffi::OsStrExt;
+use std::os::unix::fs::OpenOptionsExt;
+use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
+use std::path::{Component, Path, PathBuf};
+
+use crate::scoped_join;
+
+/// A safe version of [`PathBuf`] pinned to an underlying filesystem object to protect from
+/// `TOCTTOU` style of attacks.
+///
+/// A [`PinnedPathBuf`] is a resolved path buffer pinned to an underlying filesystem object, which
+/// guarantees:
+/// - the value of [`PinnedPathBuf::as_path()`] never changes.
+/// - the path returned by [`PinnedPathBuf::as_path()`] is always a symlink.
+/// - the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never changes.
+/// - the value of [`PinnedPathBuf::target()`] never changes.
+///
+/// Note:
+/// - Though the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never
+///   changes, the value of `fs::read_link(PinnedPathBuf::as_path())` may change due to filesystem
+///   operations.
+/// - The value of [`PinnedPathBuf::target()`] is a cached version of
+///   `fs::read_link(PinnedPathBuf::as_path())` generated when creating the `PinnedPathBuf` object.
+/// - It's a sign of possible attacks if `[PinnedPathBuf::target()]` doesn't match
+///   `fs::read_link(PinnedPathBuf::as_path())`.
+/// - Once the [`PinnedPathBuf`] object gets dropped, the [`Path`] returned by
+///   [`PinnedPathBuf::as_path()`] becomes invalid.
+///
+/// With normal [`PathBuf`], there's a race window for attackers between time to validate a path and
+/// time to use the path. An attacker may maliciously change filesystem object referenced by the
+/// path by using symlinks to compose an attack.
+///
+/// The [`PinnedPathBuf`] is introduced to protect from such attacks, by using the
+/// `/proc/self/fd/xxx` files on Linux. The `/proc/self/fd/xxx` file on Linux is a symlink to the
+/// real target corresponding to the process's file descriptor `xxx`. And the target filesystem
+/// object referenced by the symlink will be kept stable until the file descriptor has been closed.
+/// Combined with `O_PATH`, a safe version of `PathBuf` could be built by:
+/// - Generate a safe path from `root` and `path` by using [`crate::scoped_join()`].
+/// - Open the safe path with O_PATH | O_CLOEXEC flags, say the fd number is `fd_num`.
+/// - Read the symlink target of `/proc/self/fd/fd_num`.
+/// - Compare the symlink target with the safe path, it's safe if these two paths equal.
+/// - Use the proc file path as a safe version of [`PathBuf`].
+/// - Close the `fd_num` when dropping the [`PinnedPathBuf`] object.
+#[derive(Debug)]
+pub struct PinnedPathBuf {
+    handle: File,
+    path: PathBuf,
+    target: PathBuf,
+}
+
+impl PinnedPathBuf {
+    /// Create a [`PinnedPathBuf`] object from `root` and `path`.
+    ///
+    /// The `path` must be a subdirectory of `root`, otherwise error will be returned.
+    pub fn new<R: AsRef<Path>, U: AsRef<Path>>(root: R, path: U) -> Result<Self> {
+        let path = scoped_join(root, path)?;
+        Self::from_path(path)
+    }
+
+    /// Create a `PinnedPathBuf` from `path`.
+    ///
+    /// If the resolved value of `path` doesn't equal to `path`, an error will be returned.
+    pub fn from_path<P: AsRef<Path>>(orig_path: P) -> Result<Self> {
+        let orig_path = orig_path.as_ref();
+        let handle = Self::open_by_path(orig_path)?;
+        Self::new_from_file(handle, orig_path)
+    }
+
+    /// Try to clone the [`PinnedPathBuf`] object.
+    pub fn try_clone(&self) -> Result<Self> {
+        let fd = unsafe { libc::dup(self.path_fd()) };
+        if fd < 0 {
+            Err(Error::last_os_error())
+        } else {
+            Ok(Self {
+                handle: unsafe { File::from_raw_fd(fd) },
+                path: Self::get_proc_path(fd),
+                target: self.target.clone(),
+            })
+        }
+    }
+
+    /// Return the underlying file descriptor representing the pinned path.
+    ///
+    /// Following operations are supported by the returned `RawFd`:
+    /// - fchdir
+    /// - fstat/fstatfs
+    /// - openat/linkat/fchownat/fstatat/readlinkat/mkdirat/*at
+    /// - fcntl(F_GETFD, F_SETFD, F_GETFL)
+    pub fn path_fd(&self) -> RawFd {
+        self.handle.as_raw_fd()
+    }
+
+    /// Get the symlink path referring the target filesystem object.
+    pub fn as_path(&self) -> &Path {
+        self.path.as_path()
+    }
+
+    /// Get the cached real path of the target filesystem object.
+    ///
+    /// The target path is cached version of `fs::read_link(PinnedPathBuf::as_path())` generated
+    /// when creating the `PinnedPathBuf` object. On the other hand, the value of
+    /// `fs::read_link(PinnedPathBuf::as_path())` may change due to underlying filesystem operations.
+    /// So it's a sign of possible attacks if `PinnedPathBuf::target()` does not match
+    /// `fs::read_link(PinnedPathBuf::as_path())`.
+    pub fn target(&self) -> &Path {
+        &self.target
+    }
+
+    /// Get [`Metadata`] about the path handle.
+    pub fn metadata(&self) -> Result<Metadata> {
+        self.handle.metadata()
+    }
+
+    /// Open a direct child of the filesystem objected referenced by the `PinnedPathBuf` object.
+    pub fn open_child(&self, path_comp: &OsStr) -> Result<Self> {
+        let name = Self::prepare_path_component(path_comp)?;
+        let oflags = libc::O_PATH | libc::O_CLOEXEC;
+        let res = unsafe { libc::openat(self.path_fd(), name.as_ptr(), oflags, 0) };
+        if res < 0 {
+            Err(Error::last_os_error())
+        } else {
+            let handle = unsafe { File::from_raw_fd(res) };
+            Self::new_from_file(handle, self.target.join(path_comp))
+        }
+    }
+
+    /// Create or open a child directory if current object is a directory.
+    pub fn mkdir(&self, path_comp: &OsStr, mode: libc::mode_t) -> Result<Self> {
+        let path_name = Self::prepare_path_component(path_comp)?;
+        let res = unsafe { libc::mkdirat(self.handle.as_raw_fd(), path_name.as_ptr(), mode) };
+        if res < 0 {
+            Err(Error::last_os_error())
+        } else {
+            self.open_child(path_comp)
+        }
+    }
+
+    /// Open a directory/file by path.
+    ///
+    /// Obtain a file descriptor that can be used for two purposes:
+    /// - indicate a location in the filesystem tree
+    /// - perform operations that act purely at the file descriptor level
+    fn open_by_path<P: AsRef<Path>>(path: P) -> Result<File> {
+        // When O_PATH is specified in flags, flag bits other than O_CLOEXEC, O_DIRECTORY, and
+        // O_NOFOLLOW are ignored.
+        let o_flags = libc::O_PATH | libc::O_CLOEXEC;
+        OpenOptions::new()
+            .read(true)
+            .custom_flags(o_flags)
+            .open(path.as_ref())
+    }
+
+    fn get_proc_path<F: AsRawFd>(file: F) -> PathBuf {
+        PathBuf::from(format!("/proc/self/fd/{}", file.as_raw_fd()))
+    }
+
+    fn new_from_file<P: AsRef<Path>>(handle: File, orig_path: P) -> Result<Self> {
+        let path = Self::get_proc_path(handle.as_raw_fd());
+        let link_path = fs::read_link(path.as_path())?;
+        if link_path != orig_path.as_ref() {
+            Err(Error::new(
+                ErrorKind::Other,
+                format!(
+                    "Path changed from {} to {} on open, possible attack",
+                    orig_path.as_ref().display(),
+                    link_path.display()
+                ),
+            ))
+        } else {
+            Ok(PinnedPathBuf {
+                handle,
+                path,
+                target: link_path,
+            })
+        }
+    }
+
+    #[inline]
+    fn prepare_path_component(path_comp: &OsStr) -> Result<CString> {
+        let path = Path::new(path_comp);
+        let mut comps = path.components();
+        let name = comps.next();
+        if !matches!(name, Some(Component::Normal(_))) || comps.next().is_some() {
+            return Err(Error::new(
+                ErrorKind::Other,
+                format!("Path component {} is invalid", path_comp.to_string_lossy()),
+            ));
+        }
+        let name = name.unwrap();
+        if name.as_os_str() != path_comp {
+            return Err(Error::new(
+                ErrorKind::Other,
+                format!("Path component {} is invalid", path_comp.to_string_lossy()),
+            ));
+        }
+
+        CString::new(path_comp.as_bytes()).map_err(|_e| {
+            Error::new(
+                ErrorKind::Other,
+                format!("Path component {} is invalid", path_comp.to_string_lossy()),
+            )
+        })
+    }
+}
+
+impl Deref for PinnedPathBuf {
+    type Target = PathBuf;
+
+    fn deref(&self) -> &Self::Target {
+        &self.path
+    }
+}
+
+impl AsRef<Path> for PinnedPathBuf {
+    fn as_ref(&self) -> &Path {
+        self.path.as_path()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::ffi::OsString;
+    use std::fs::DirBuilder;
+    use std::io::Write;
+    use std::os::unix::fs::{symlink, MetadataExt};
+    use std::sync::{Arc, Barrier};
+    use std::thread;
+
+    #[test]
+    fn test_pinned_path_buf() {
+        // Create a root directory, which itself contains symlinks.
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        DirBuilder::new()
+            .create(rootfs_dir.path().join("b"))
+            .unwrap();
+        symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
+        let rootfs_path = &rootfs_dir.path().join("a");
+
+        // Create a file and a symlink to it.
+        fs::create_dir(rootfs_path.join("symlink_dir")).unwrap();
+        symlink("/endpoint", rootfs_path.join("symlink_dir/endpoint")).unwrap();
+        fs::write(rootfs_path.join("endpoint"), "test").unwrap();
+
+        // Pin the target and validate the path/content.
+        let path = PinnedPathBuf::new(rootfs_path.to_path_buf(), "symlink_dir/endpoint").unwrap();
+        assert!(!path.is_dir());
+        let path_ref = path.deref();
+        let target = fs::read_link(path_ref).unwrap();
+        assert_eq!(target, rootfs_path.join("endpoint").canonicalize().unwrap());
+        let content = fs::read_to_string(&path).unwrap();
+        assert_eq!(&content, "test");
+
+        // Remove the target file and validate that we could still read data from the pinned path.
+        fs::remove_file(&target).unwrap();
+        fs::read_to_string(&target).unwrap_err();
+        let content = fs::read_to_string(&path).unwrap();
+        assert_eq!(&content, "test");
+    }
+
+    #[test]
+    fn test_pinned_path_buf_race() {
+        let root_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let root_path = root_dir.path();
+        let barrier = Arc::new(Barrier::new(2));
+
+        fs::write(root_path.join("a"), b"a").unwrap();
+        fs::write(root_path.join("b"), b"b").unwrap();
+        fs::write(root_path.join("c"), b"c").unwrap();
+        symlink("a", root_path.join("s")).unwrap();
+
+        let root_path2 = root_path.to_path_buf();
+        let barrier2 = barrier.clone();
+        let thread = thread::spawn(move || {
+            // step 1
+            barrier2.wait();
+            fs::remove_file(root_path2.join("a")).unwrap();
+            symlink("b", root_path2.join("a")).unwrap();
+            barrier2.wait();
+
+            // step 2
+            barrier2.wait();
+            fs::remove_file(root_path2.join("b")).unwrap();
+            symlink("c", root_path2.join("b")).unwrap();
+            barrier2.wait();
+        });
+
+        let path = scoped_join(&root_path, "s").unwrap();
+        let data = fs::read_to_string(&path).unwrap();
+        assert_eq!(&data, "a");
+        assert!(path.is_file());
+        barrier.wait();
+        barrier.wait();
+        // Verify the target has been redirected.
+        let data = fs::read_to_string(&path).unwrap();
+        assert_eq!(&data, "b");
+        PinnedPathBuf::from_path(&path).unwrap_err();
+
+        let pinned_path = PinnedPathBuf::new(&root_path, "s").unwrap();
+        let data = fs::read_to_string(&pinned_path).unwrap();
+        assert_eq!(&data, "b");
+
+        // step2
+        barrier.wait();
+        barrier.wait();
+        // Verify it still points to the old target.
+        let data = fs::read_to_string(&pinned_path).unwrap();
+        assert_eq!(&data, "b");
+
+        thread.join().unwrap();
+    }
+
+    #[test]
+    fn test_new_pinned_path_buf() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        let path = PinnedPathBuf::from_path(rootfs_path).unwrap();
+        let _ = OpenOptions::new().read(true).open(&path).unwrap();
+    }
+
+    #[test]
+    fn test_pinned_path_try_clone() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        let path = PinnedPathBuf::from_path(rootfs_path).unwrap();
+        let path2 = path.try_clone().unwrap();
+        assert_ne!(path.as_path(), path2.as_path());
+    }
+
+    #[test]
+    fn test_new_pinned_path_buf_from_nonexist_file() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        PinnedPathBuf::new(rootfs_path, "does_not_exist").unwrap_err();
+    }
+
+    #[test]
+    fn test_new_pinned_path_buf_without_read_perm() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        let path = rootfs_path.join("write_only_file");
+
+        let mut file = OpenOptions::new()
+            .read(false)
+            .write(true)
+            .create(true)
+            .mode(0o200)
+            .open(&path)
+            .unwrap();
+        file.write_all(&[0xa5u8]).unwrap();
+        let md = fs::metadata(&path).unwrap();
+        let umask = unsafe { libc::umask(0022) };
+        unsafe { libc::umask(umask) };
+        assert_eq!(md.mode() & 0o700, 0o200 & !umask);
+        PinnedPathBuf::from_path(&path).unwrap();
+    }
+
+    #[test]
+    fn test_pinned_path_buf_path_fd() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        let path = rootfs_path.join("write_only_file");
+
+        let mut file = OpenOptions::new()
+            .read(false)
+            .write(true)
+            .create(true)
+            .mode(0o200)
+            .open(&path)
+            .unwrap();
+        file.write_all(&[0xa5u8]).unwrap();
+        let handle = PinnedPathBuf::from_path(&path).unwrap();
+        // Check that `fstat()` etc works with the fd returned by `path_fd()`.
+        let fd = handle.path_fd();
+        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
+        let res = unsafe { libc::fstat(fd, &mut stat as *mut _) };
+        assert_eq!(res, 0);
+    }
+
+    #[test]
+    fn test_pinned_path_buf_open_child() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        let path = PinnedPathBuf::from_path(rootfs_path).unwrap();
+
+        fs::write(path.join("child"), "test").unwrap();
+        let path = path.open_child(OsStr::new("child")).unwrap();
+        let content = fs::read_to_string(&path).unwrap();
+        assert_eq!(&content, "test");
+
+        path.open_child(&OsString::from("__does_not_exist__"))
+            .unwrap_err();
+        path.open_child(&OsString::from("test/a")).unwrap_err();
+    }
+
+    #[test]
+    fn test_prepare_path_component() {
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from(".")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("..")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("/")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("//")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/b")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("./b")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/.")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/..")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/./")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/../")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/./a")).is_err());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a/../a")).is_err());
+
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a")).is_ok());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a.b")).is_ok());
+        assert!(PinnedPathBuf::prepare_path_component(&OsString::from("a..b")).is_ok());
+    }
+
+    #[test]
+    fn test_target_fs_object_changed() {
+        let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir");
+        let rootfs_path = rootfs_dir.path();
+        let file = rootfs_path.join("child");
+        fs::write(&file, "test").unwrap();
+
+        let path = PinnedPathBuf::from_path(&file).unwrap();
+        let path3 = fs::read_link(path.as_path()).unwrap();
+        assert_eq!(&path3, path.target());
+        fs::rename(file, rootfs_path.join("child2")).unwrap();
+        let path4 = fs::read_link(path.as_path()).unwrap();
+        assert_ne!(&path4, path.target());
+        fs::remove_file(rootfs_path.join("child2")).unwrap();
+        let path5 = fs::read_link(path.as_path()).unwrap();
+        assert_ne!(&path4, &path5);
+    }
+}
--- a/src/libs/safe-path/src/scoped_dir_builder.rs
+++ b/src/libs/safe-path/src/scoped_dir_builder.rs
@@ -0,0 +1,294 @@
+// Copyright (c) 2022 Alibaba Cloud
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::io::{Error, ErrorKind, Result};
+use std::path::Path;
+
+use crate::{scoped_join, scoped_resolve, PinnedPathBuf};
+
+const DIRECTORY_MODE_DEFAULT: u32 = 0o777;
+const DIRECTORY_MODE_MASK: u32 = 0o777;
+
+/// Safe version of `DirBuilder` to protect from TOCTOU style of attacks.
+///
+/// The `ScopedDirBuilder` is a counterpart for `DirBuilder`, with safety enhancements of:
+/// - ensuring the new directories are created under a specified `root` directory.
+/// - ensuring all created directories are still scoped under `root` even under symlink based
+///   attacks.
+/// - returning a [PinnedPathBuf] for the last level of directory, so it could be used for other
+///   operations safely.
+#[derive(Debug)]
+pub struct ScopedDirBuilder {
+    root: PinnedPathBuf,
+    mode: u32,
+    recursive: bool,
+}
+
+impl ScopedDirBuilder {
+    /// Create a new instance of `ScopedDirBuilder` with with default mode/security settings.
+    pub fn new<P: AsRef<Path>>(root: P) -> Result<Self> {
+        let root = root.as_ref().canonicalize()?;
+        let root = PinnedPathBuf::from_path(root)?;
+        if !root.metadata()?.is_dir() {
+            return Err(Error::new(
+                ErrorKind::Other,
+                format!("Invalid root path: {}", root.display()),
+            ));
+        }
+
+        Ok(ScopedDirBuilder {
+            root,
+            mode: DIRECTORY_MODE_DEFAULT,
+            recursive: false,
+        })
+    }
+
+    /// Indicates that directories should be created recursively, creating all parent directories.
+    ///
+    /// Parents that do not exist are created with the same security and permissions settings.
+    pub fn recursive(&mut self, recursive: bool) -> &mut Self {
+        self.recursive = recursive;
+        self
+    }
+
+    /// Sets the mode to create new directories with. This option defaults to 0o755.
+    pub fn mode(&mut self, mode: u32) -> &mut Self {
+        self.mode = mode & DIRECTORY_MODE_MASK;
+        self
+    }
+
+    /// Creates the specified directory with the options configured in this builder.
+    ///
+    /// This is a helper to create subdirectory with an absolute path, without stripping off
+    /// `self.root`. So error will be returned if path does start with `self.root`.
+    /// It is considered an error if the directory already exists unless recursive mode is enabled.
+    pub fn create_with_unscoped_path<P: AsRef<Path>>(&self, path: P) -> Result<PinnedPathBuf> {
+        if !path.as_ref().is_absolute() {
+            return Err(Error::new(
+                ErrorKind::Other,
+                format!(
+                    "Expected absolute directory path: {}",
+                    path.as_ref().display()
+                ),
+            ));
+        }
+        // Partially canonicalize `path` so we can strip the `root` part.
+        let scoped_path = scoped_join("/", path)?;
+        let stripped_path = scoped_path.strip_prefix(self.root.target()).map_err(|_| {
+            Error::new(
+                ErrorKind::Other,
+                format!(
+                    "Path {} is not under {}",
+                    scoped_path.display(),
+                    self.root.target().display()
+                ),
+            )
+        })?;
+
+        self.do_mkdir(&stripped_path)
+    }
+
+    /// Creates sub-directory with the options configured in this builder.
+    ///
+    /// It is considered an error if the directory already exists unless recursive mode is enabled.
+    pub fn create<P: AsRef<Path>>(&self, path: P) -> Result<PinnedPathBuf> {
+        let path = scoped_resolve(&self.root, path)?;
+        self.do_mkdir(&path)
+    }
+
+    fn do_mkdir(&self, path: &Path) -> Result<PinnedPathBuf> {
+        assert!(path.is_relative());
+        if path.file_name().is_none() {
+            if !self.recursive {
+                return Err(Error::new(
+                    ErrorKind::AlreadyExists,
+                    "directory already exists",
+                ));
+            } else {
+                return self.root.try_clone();
+            }
+        }
+
+        // Safe because `path` have at least one level.
+        let levels = path.iter().count() - 1;
+        let mut dir = self.root.try_clone()?;
+        for (idx, comp) in path.iter().enumerate() {
+            match dir.open_child(comp) {
+                Ok(v) => {
+                    if !v.metadata()?.is_dir() {
+                        return Err(Error::new(
+                            ErrorKind::Other,
+                            format!("Path {} is not a directory", v.display()),
+                        ));
+                    } else if !self.recursive && idx == levels {
+                        return Err(Error::new(
+                            ErrorKind::AlreadyExists,
+                            "directory already exists",
+                        ));
+                    }
+                    dir = v;
+                }
+                Err(_e) => {
+                    if !self.recursive && idx != levels {
+                        return Err(Error::new(
+                            ErrorKind::NotFound,
+                            format!("parent directory does not exist"),
+                        ));
+                    }
+                    dir = dir.mkdir(comp, self.mode)?;
+                }
+            }
+        }
+
+        Ok(dir)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use std::fs::DirBuilder;
+    use std::os::unix::fs::{symlink, MetadataExt};
+    use tempfile::tempdir;
+
+    #[test]
+    fn test_scoped_dir_builder() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        DirBuilder::new()
+            .create(rootfs_dir.path().join("b"))
+            .unwrap();
+        symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
+        let rootfs_path = &rootfs_dir.path().join("a");
+
+        // root directory doesn't exist
+        ScopedDirBuilder::new(rootfs_path.join("__does_not_exist__")).unwrap_err();
+        ScopedDirBuilder::new("__does_not_exist__").unwrap_err();
+
+        // root is a file
+        fs::write(rootfs_path.join("txt"), "test").unwrap();
+        ScopedDirBuilder::new(rootfs_path.join("txt")).unwrap_err();
+
+        let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap();
+
+        // file with the same name already exists.
+        builder
+            .create_with_unscoped_path(rootfs_path.join("txt"))
+            .unwrap_err();
+        // parent is a file
+        builder.create("/txt/a").unwrap_err();
+        // Not starting with root
+        builder.create_with_unscoped_path("/txt/a").unwrap_err();
+        // creating "." without recursive mode should fail
+        builder
+            .create_with_unscoped_path(rootfs_path.join("."))
+            .unwrap_err();
+        // parent doesn't exist
+        builder
+            .create_with_unscoped_path(rootfs_path.join("a/b"))
+            .unwrap_err();
+        builder.create("a/b/c").unwrap_err();
+
+        let path = builder.create("a").unwrap();
+        assert!(rootfs_path.join("a").is_dir());
+        assert_eq!(path.target(), rootfs_path.join("a").canonicalize().unwrap());
+
+        // Creating an existing directory without recursive mode should fail.
+        builder
+            .create_with_unscoped_path(rootfs_path.join("a"))
+            .unwrap_err();
+
+        // Creating an existing directory with recursive mode should succeed.
+        builder.recursive(true);
+        let path = builder
+            .create_with_unscoped_path(rootfs_path.join("a"))
+            .unwrap();
+        assert_eq!(path.target(), rootfs_path.join("a").canonicalize().unwrap());
+        let path = builder.create(".").unwrap();
+        assert_eq!(path.target(), rootfs_path.canonicalize().unwrap());
+
+        let umask = unsafe { libc::umask(0022) };
+        unsafe { libc::umask(umask) };
+
+        builder.mode(0o740);
+        let path = builder.create("a/b/c/d").unwrap();
+        assert_eq!(
+            path.target(),
+            rootfs_path.join("a/b/c/d").canonicalize().unwrap()
+        );
+        assert!(rootfs_path.join("a/b/c/d").is_dir());
+        assert_eq!(
+            rootfs_path.join("a").metadata().unwrap().mode() & 0o777,
+            DIRECTORY_MODE_DEFAULT & !umask,
+        );
+        assert_eq!(
+            rootfs_path.join("a/b").metadata().unwrap().mode() & 0o777,
+            0o740 & !umask
+        );
+        assert_eq!(
+            rootfs_path.join("a/b/c").metadata().unwrap().mode() & 0o777,
+            0o740 & !umask
+        );
+        assert_eq!(
+            rootfs_path.join("a/b/c/d").metadata().unwrap().mode() & 0o777,
+            0o740 & !umask
+        );
+
+        // Creating should fail if some components are not directory.
+        builder.create("txt/e/f").unwrap_err();
+        fs::write(rootfs_path.join("a/b/txt"), "test").unwrap();
+        builder.create("a/b/txt/h/i").unwrap_err();
+    }
+
+    #[test]
+    fn test_create_root() {
+        let mut builder = ScopedDirBuilder::new("/").unwrap();
+        builder.recursive(true);
+        builder.create("/").unwrap();
+        builder.create(".").unwrap();
+        builder.create("..").unwrap();
+        builder.create("../../.").unwrap();
+        builder.create("").unwrap();
+        builder.create_with_unscoped_path("/").unwrap();
+        builder.create_with_unscoped_path("/..").unwrap();
+        builder.create_with_unscoped_path("/../.").unwrap();
+    }
+
+    #[test]
+    fn test_create_with_absolute_path() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        DirBuilder::new()
+            .create(rootfs_dir.path().join("b"))
+            .unwrap();
+        symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
+        let rootfs_path = &rootfs_dir.path().join("a");
+
+        let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap();
+        builder.create_with_unscoped_path("/").unwrap_err();
+        builder
+            .create_with_unscoped_path(rootfs_path.join("../__xxxx___xxx__"))
+            .unwrap_err();
+        builder
+            .create_with_unscoped_path(rootfs_path.join("c/d"))
+            .unwrap_err();
+
+        // Return `AlreadyExist` when recursive is false
+        builder.create_with_unscoped_path(&rootfs_path).unwrap_err();
+        builder
+            .create_with_unscoped_path(rootfs_path.join("."))
+            .unwrap_err();
+
+        builder.recursive(true);
+        builder.create_with_unscoped_path(&rootfs_path).unwrap();
+        builder
+            .create_with_unscoped_path(rootfs_path.join("."))
+            .unwrap();
+        builder
+            .create_with_unscoped_path(rootfs_path.join("c/d"))
+            .unwrap();
+    }
+}
--- a/src/libs/safe-path/src/scoped_path_resolver.rs
+++ b/src/libs/safe-path/src/scoped_path_resolver.rs
@@ -0,0 +1,415 @@
+// Copyright (c) 2022 Alibaba Cloud
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::io::{Error, ErrorKind, Result};
+use std::path::{Component, Path, PathBuf};
+
+// Follow the same configuration as
+// [secure_join](https://github.com/cyphar/filepath-securejoin/blob/master/join.go#L51)
+const MAX_SYMLINK_DEPTH: u32 = 255;
+
+fn do_scoped_resolve<R: AsRef<Path>, U: AsRef<Path>>(
+    root: R,
+    unsafe_path: U,
+) -> Result<(PathBuf, PathBuf)> {
+    let root = root.as_ref().canonicalize()?;
+
+    let mut nlinks = 0u32;
+    let mut curr_path = unsafe_path.as_ref().to_path_buf();
+    'restart: loop {
+        let mut subpath = PathBuf::new();
+        let mut iter = curr_path.components();
+
+        'next_comp: while let Some(comp) = iter.next() {
+            match comp {
+                // Linux paths don't have prefixes.
+                Component::Prefix(_) => {
+                    return Err(Error::new(
+                        ErrorKind::Other,
+                        format!("Invalid path prefix in: {}", unsafe_path.as_ref().display()),
+                    ));
+                }
+                // `RootDir` should always be the first component, and Path::components() ensures
+                // that.
+                Component::RootDir | Component::CurDir => {
+                    continue 'next_comp;
+                }
+                Component::ParentDir => {
+                    subpath.pop();
+                }
+                Component::Normal(n) => {
+                    let path = root.join(&subpath).join(n);
+                    if let Ok(v) = path.read_link() {
+                        nlinks += 1;
+                        if nlinks > MAX_SYMLINK_DEPTH {
+                            return Err(Error::new(
+                                ErrorKind::Other,
+                                format!(
+                                    "Too many levels of symlinks: {}",
+                                    unsafe_path.as_ref().display()
+                                ),
+                            ));
+                        }
+                        curr_path = if v.is_absolute() {
+                            v.join(iter.as_path())
+                        } else {
+                            subpath.join(v).join(iter.as_path())
+                        };
+                        continue 'restart;
+                    } else {
+                        subpath.push(n);
+                    }
+                }
+            }
+        }
+
+        return Ok((root, subpath));
+    }
+}
+
+/// Resolve `unsafe_path` to a relative path, rooted at and constrained by `root`.
+///
+/// The `scoped_resolve()` function assumes `root` exists and is an absolute path. It processes
+/// each path component in `unsafe_path` as below:
+/// - assume it's not a symlink and output if the component doesn't exist yet.
+/// - ignore if it's "/" or ".".
+/// - go to parent directory but constrained by `root` if it's "..".
+/// - recursively resolve to the real path if it's a symlink. All symlink resolutions will be
+///   constrained by `root`.
+/// - otherwise output the path component.
+///
+/// # Arguments
+/// - `root`: the absolute path to constrain the symlink resolution.
+/// - `unsafe_path`: the path to resolve.
+///
+/// Note that the guarantees provided by this function only apply if the path components in the
+/// returned PathBuf are not modified (in other words are not replaced with symlinks on the
+/// filesystem) after this function has returned. You may use [crate::PinnedPathBuf] to protect
+/// from such TOCTOU attacks.
+pub fn scoped_resolve<R: AsRef<Path>, U: AsRef<Path>>(root: R, unsafe_path: U) -> Result<PathBuf> {
+    do_scoped_resolve(root, unsafe_path).map(|(_root, path)| path)
+}
+
+/// Safely join `unsafe_path` to `root`, and ensure `unsafe_path` is scoped under `root`.
+///
+/// The `scoped_join()` function assumes `root` exists and is an absolute path. It safely joins the
+/// two given paths and ensures:
+/// - The returned path is guaranteed to be scoped inside `root`.
+/// - Any symbolic links in the path are evaluated with the given `root` treated as the root of the
+///   filesystem, similar to a chroot.
+///
+/// It's modelled after [secure_join](https://github.com/cyphar/filepath-securejoin), but only
+/// for Linux systems.
+///
+/// # Arguments
+/// - `root`: the absolute path to scope the symlink evaluation.
+/// - `unsafe_path`: the path to evaluated and joint with `root`. It is unsafe since it may try to
+///   escape from the `root` by using "../" or symlinks.
+///
+/// # Security
+/// On success return, the `scoped_join()` function guarantees that:
+/// - The resulting PathBuf must be a child path of `root` and will not contain any symlink path
+///   components (they will all get expanded).
+/// - When expanding symlinks, all symlink path components must be resolved relative to the provided
+///   `root`. In particular, this can be considered a userspace implementation of how chroot(2)
+///    operates on file paths.
+/// - Non-existent path components are unaffected.
+///
+/// Note that the guarantees provided by this function only apply if the path components in the
+/// returned string are not modified (in other words are not replaced with symlinks on the
+/// filesystem) after this function has returned. You may use [crate::PinnedPathBuf] to protect
+/// from such TOCTTOU attacks.
+pub fn scoped_join<R: AsRef<Path>, U: AsRef<Path>>(root: R, unsafe_path: U) -> Result<PathBuf> {
+    do_scoped_resolve(root, unsafe_path).map(|(root, path)| root.join(path))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs::DirBuilder;
+    use std::os::unix::fs;
+    use tempfile::tempdir;
+
+    #[allow(dead_code)]
+    #[derive(Debug)]
+    struct TestData<'a> {
+        name: &'a str,
+        rootfs: &'a Path,
+        unsafe_path: &'a str,
+        result: &'a str,
+    }
+
+    fn exec_tests(tests: &[TestData]) {
+        for (i, t) in tests.iter().enumerate() {
+            // Create a string containing details of the test
+            let msg = format!("test[{}]: {:?}", i, t);
+            let result = scoped_resolve(t.rootfs, t.unsafe_path).unwrap();
+            let msg = format!("{}, result: {:?}", msg, result);
+
+            // Perform the checks
+            assert_eq!(&result, Path::new(t.result), "{}", msg);
+        }
+    }
+
+    #[test]
+    fn test_scoped_resolve() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        DirBuilder::new()
+            .create(rootfs_dir.path().join("b"))
+            .unwrap();
+        fs::symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap();
+        let rootfs_path = &rootfs_dir.path().join("a");
+
+        let tests = [
+            TestData {
+                name: "normal path",
+                rootfs: rootfs_path,
+                unsafe_path: "a/b/c",
+                result: "a/b/c",
+            },
+            TestData {
+                name: "path with .. at beginning",
+                rootfs: rootfs_path,
+                unsafe_path: "../../../a/b/c",
+                result: "a/b/c",
+            },
+            TestData {
+                name: "path with complex .. pattern",
+                rootfs: rootfs_path,
+                unsafe_path: "../../../a/../../b/../../c",
+                result: "c",
+            },
+            TestData {
+                name: "path with .. in middle",
+                rootfs: rootfs_path,
+                unsafe_path: "/usr/bin/../../bin/ls",
+                result: "bin/ls",
+            },
+            TestData {
+                name: "path with . and ..",
+                rootfs: rootfs_path,
+                unsafe_path: "/usr/./bin/../../bin/./ls",
+                result: "bin/ls",
+            },
+            TestData {
+                name: "path with . at end",
+                rootfs: rootfs_path,
+                unsafe_path: "/usr/./bin/../../bin/./ls/.",
+                result: "bin/ls",
+            },
+            TestData {
+                name: "path try to escape by ..",
+                rootfs: rootfs_path,
+                unsafe_path: "/usr/./bin/../../../../bin/./ls/../ls",
+                result: "bin/ls",
+            },
+            TestData {
+                name: "path with .. at the end",
+                rootfs: rootfs_path,
+                unsafe_path: "/usr/./bin/../../bin/./ls/..",
+                result: "bin",
+            },
+            TestData {
+                name: "path ..",
+                rootfs: rootfs_path,
+                unsafe_path: "..",
+                result: "",
+            },
+            TestData {
+                name: "path .",
+                rootfs: rootfs_path,
+                unsafe_path: ".",
+                result: "",
+            },
+            TestData {
+                name: "path /",
+                rootfs: rootfs_path,
+                unsafe_path: "/",
+                result: "",
+            },
+            TestData {
+                name: "empty path",
+                rootfs: rootfs_path,
+                unsafe_path: "",
+                result: "",
+            },
+        ];
+
+        exec_tests(&tests);
+    }
+
+    #[test]
+    fn test_scoped_resolve_invalid() {
+        scoped_resolve("./root_is_not_absolute_path", ".").unwrap_err();
+        scoped_resolve("C:", ".").unwrap_err();
+        scoped_resolve(r#"\\server\test"#, ".").unwrap_err();
+        scoped_resolve(r#"http://localhost/test"#, ".").unwrap_err();
+        // Chinese Unicode characters
+        scoped_resolve(r#"您好"#, ".").unwrap_err();
+    }
+
+    #[test]
+    fn test_scoped_resolve_symlink() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = &rootfs_dir.path();
+        std::fs::create_dir(rootfs_path.join("symlink_dir")).unwrap();
+
+        fs::symlink("../../../", rootfs_path.join("1")).unwrap();
+        let tests = [TestData {
+            name: "relative symlink beyond root",
+            rootfs: rootfs_path,
+            unsafe_path: "1",
+            result: "",
+        }];
+        exec_tests(&tests);
+
+        fs::symlink("/dddd", rootfs_path.join("2")).unwrap();
+        let tests = [TestData {
+            name: "abs symlink pointing to non-exist directory",
+            rootfs: rootfs_path,
+            unsafe_path: "2",
+            result: "dddd",
+        }];
+        exec_tests(&tests);
+
+        fs::symlink("/", rootfs_path.join("3")).unwrap();
+        let tests = [TestData {
+            name: "abs symlink pointing to /",
+            rootfs: rootfs_path,
+            unsafe_path: "3",
+            result: "",
+        }];
+        exec_tests(&tests);
+
+        fs::symlink("usr/bin/../bin/ls", rootfs_path.join("4")).unwrap();
+        let tests = [TestData {
+            name: "symlink with one ..",
+            rootfs: rootfs_path,
+            unsafe_path: "4",
+            result: "usr/bin/ls",
+        }];
+        exec_tests(&tests);
+
+        fs::symlink("usr/bin/../../bin/ls", rootfs_path.join("5")).unwrap();
+        let tests = [TestData {
+            name: "symlink with two ..",
+            rootfs: rootfs_path,
+            unsafe_path: "5",
+            result: "bin/ls",
+        }];
+        exec_tests(&tests);
+
+        fs::symlink(
+            "../usr/bin/../../../bin/ls",
+            rootfs_path.join("symlink_dir/6"),
+        )
+        .unwrap();
+        let tests = [TestData {
+            name: "symlink try to escape",
+            rootfs: rootfs_path,
+            unsafe_path: "symlink_dir/6",
+            result: "bin/ls",
+        }];
+        exec_tests(&tests);
+
+        // Detect symlink loop.
+        fs::symlink("/endpoint_b", rootfs_path.join("endpoint_a")).unwrap();
+        fs::symlink("/endpoint_a", rootfs_path.join("endpoint_b")).unwrap();
+        scoped_resolve(rootfs_path, "endpoint_a").unwrap_err();
+    }
+
+    #[test]
+    fn test_scoped_join() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = &rootfs_dir.path();
+
+        assert_eq!(
+            scoped_join(&rootfs_path, "a").unwrap(),
+            rootfs_path.join("a")
+        );
+        assert_eq!(
+            scoped_join(&rootfs_path, "./a").unwrap(),
+            rootfs_path.join("a")
+        );
+        assert_eq!(
+            scoped_join(&rootfs_path, "././a").unwrap(),
+            rootfs_path.join("a")
+        );
+        assert_eq!(
+            scoped_join(&rootfs_path, "c/d/../../a").unwrap(),
+            rootfs_path.join("a")
+        );
+        assert_eq!(
+            scoped_join(&rootfs_path, "c/d/../../../.././a").unwrap(),
+            rootfs_path.join("a")
+        );
+        assert_eq!(
+            scoped_join(&rootfs_path, "../../a").unwrap(),
+            rootfs_path.join("a")
+        );
+        assert_eq!(
+            scoped_join(&rootfs_path, "./../a").unwrap(),
+            rootfs_path.join("a")
+        );
+    }
+
+    #[test]
+    fn test_scoped_join_symlink() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = &rootfs_dir.path();
+        DirBuilder::new()
+            .recursive(true)
+            .create(rootfs_dir.path().join("b/c"))
+            .unwrap();
+        fs::symlink("b/c", rootfs_dir.path().join("a")).unwrap();
+
+        let target = rootfs_path.join("b/c");
+        assert_eq!(scoped_join(&rootfs_path, "a").unwrap(), target);
+        assert_eq!(scoped_join(&rootfs_path, "./a").unwrap(), target);
+        assert_eq!(scoped_join(&rootfs_path, "././a").unwrap(), target);
+        assert_eq!(scoped_join(&rootfs_path, "b/c/../../a").unwrap(), target);
+        assert_eq!(
+            scoped_join(&rootfs_path, "b/c/../../../.././a").unwrap(),
+            target
+        );
+        assert_eq!(scoped_join(&rootfs_path, "../../a").unwrap(), target);
+        assert_eq!(scoped_join(&rootfs_path, "./../a").unwrap(), target);
+        assert_eq!(scoped_join(&rootfs_path, "a/../../../a").unwrap(), target);
+        assert_eq!(scoped_join(&rootfs_path, "a/../../../b/c").unwrap(), target);
+    }
+
+    #[test]
+    fn test_scoped_join_symlink_loop() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = &rootfs_dir.path();
+        fs::symlink("/endpoint_b", rootfs_path.join("endpoint_a")).unwrap();
+        fs::symlink("/endpoint_a", rootfs_path.join("endpoint_b")).unwrap();
+        scoped_join(rootfs_path, "endpoint_a").unwrap_err();
+    }
+
+    #[test]
+    fn test_scoped_join_unicode_character() {
+        // create temporary directory to emulate container rootfs with symlink
+        let rootfs_dir = tempdir().expect("failed to create tmpdir");
+        let rootfs_path = &rootfs_dir.path().canonicalize().unwrap();
+
+        let path = scoped_join(rootfs_path, "您好").unwrap();
+        assert_eq!(path, rootfs_path.join("您好"));
+
+        let path = scoped_join(rootfs_path, "../../../您好").unwrap();
+        assert_eq!(path, rootfs_path.join("您好"));
+
+        let path = scoped_join(rootfs_path, "。。/您好").unwrap();
+        assert_eq!(path, rootfs_path.join("。。/您好"));
+
+        let path = scoped_join(rootfs_path, "您好/../../test").unwrap();
+        assert_eq!(path, rootfs_path.join("test"));
+    }
+}
--- a/src/runtime/.gitignore
+++ b/src/runtime/.gitignore
@@ -2,6 +2,7 @@
 *.patch
 *.swp
 coverage.txt
+coverage.txt.tmp
 coverage.html
 .git-commit
 .git-commit.tmp
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -176,6 +176,9 @@ DEFDISABLEBLOCK := false
 DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
 DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
 DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
+ifeq ($(ARCH),amd64)
+DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd
+endif
 DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
 # Default DAX mapping cache size in MiB
 #if value is 0, DAX is not enabled
@@ -596,7 +599,7 @@ hook:

 go-test: $(GENERATED_FILES)
 	go clean -testcache
-	$(QUIET_TEST)../../ci/go-test.sh
+	$(QUIET_TEST)./go-test.sh

 fast-test: $(GENERATED_FILES)
 	go clean -testcache
--- a/src/runtime/README.md
+++ b/src/runtime/README.md
@@ -10,6 +10,7 @@ This repository contains the following components:
 |-|-|
 | `containerd-shim-kata-v2` | The [shimv2 runtime](../../docs/design/architecture/README.md#runtime) |
 | `kata-runtime` | [utility program](../../docs/design/architecture/README.md#utility-program) |
+| `kata-monitor` | [metrics collector daemon](cmd/kata-monitor/README.md) |

 For details of the other Kata Containers repositories, see the
 [repository summary](https://github.com/kata-containers/kata-containers).
--- a/src/runtime/cmd/kata-monitor/README.md
+++ b/src/runtime/cmd/kata-monitor/README.md
@@ -0,0 +1,68 @@
+# Kata monitor
+
+## Overview
+`kata-monitor` is a daemon able to collect and expose metrics related to all the Kata Containers workloads running on the same host.
+Once started, it detects all the running Kata Containers runtimes (`containerd-shim-kata-v2`) in the system and exposes few http endpoints to allow the retrieval of the available data.
+The main endpoint is the `/metrics` one which aggregates metrics from all the kata workloads.
+Available metrics include:
+  * Kata runtime metrics
+  * Kata agent metrics
+  * Kata guest OS metrics
+  * Hypervisor metrics
+  * Firecracker metrics
+  * Kata monitor metrics
+
+All the provided metrics are in Prometheus format. While `kata-monitor` can be used as a standalone daemon on any host running Kata Containers workloads and can be used for retrieving profiling data from the running Kata runtimes, its main expected usage is to be deployed as a DaemonSet on a Kubernetes cluster: there Prometheus should scrape the metrics from the kata-monitor endpoints.
+For more information on the Kata Containers metrics architecture and a detailed list of the available metrics provided by Kata monitor check the [Kata 2.0 Metrics Design](../../../../docs/design/kata-2-0-metrics.md) document.
+
+## Usage
+Each `kata-monitor` instance detects and monitors the Kata Container workloads running on the same node.
+
+### Kata monitor arguments
+The `kata-monitor` binary accepts the following arguments:
+
+* `--listen-address` _IP:PORT_
+* `--runtime-enpoint` _PATH_TO_THE_CONTAINER_MANAGER_CRI_INTERFACE_
+* `--log-level` _[ trace | debug | info | warn | error | fatal | panic ]_
+
+The **listen-address** specifies the IP and TCP port where the kata-monitor HTTP endpoints will be exposed. It defaults to `127.0.0.1:8090`.
+
+The **runtime-endpoint** is the CRI of a CRI compliant container manager: it will be used to retrieve the CRI `PodSandboxMetadata` (`uid`, `name` and `namespace`) which will be attached to the Kata metrics through the labels `cri_uid`, `cri_name` and `cri_namespace`. It defaults to the containerd socket: `/run/containerd/containerd.sock`.
+
+The **log-level** allows the chose how verbose the logs should be. The default is `info`.
+### Kata monitor HTTP endpoints
+`kata-monitor` exposes the following endpoints:
+  * `/metrics`             : get Kata sandboxes metrics.
+  * `/sandboxes`           : list all the Kata sandboxes running on the host.
+  * `/agent-url`           : Get the agent URL of a Kata sandbox.
+  * `/debug/vars`          : Internal data of the Kata runtime shim.
+  * `/debug/pprof/`        : Golang profiling data of the Kata runtime shim: index page.
+  * `/debug/pprof/cmdline` : Golang profiling data of the Kata runtime shim: `cmdline` endpoint.
+  * `/debug/pprof/profile` : Golang profiling data of the Kata runtime shim: `profile` endpoint (CPU profiling).
+  * `/debug/pprof/symbol`  : Golang profiling data of the Kata runtime shim: `symbol` endpoint.
+  * `/debug/pprof/trace`   : Golang profiling data of the Kata runtime shim: `trace` endpoint.
+
+**NOTE: The debug endpoints are available only if the [Kata Containers configuration file](https://github.com/kata-containers/kata-containers/blob/9d5b03a1b70bbd175237ec4b9f821d6ccee0a1f6/src/runtime/config/configuration-qemu.toml.in#L590-L592) includes** `enable_pprof = true` **in the** `[runtime]` **section**.
+
+The `/sandboxes` endpoint lists the _sandbox ID_ of all the detected Kata runtimes. If accessed via a web browser, it provides html links to the endpoints available for each sandbox.
+
+In order to retrieve data for a specific Kata workload, the _sandbox ID_ should be passed in the query string using the _sandbox_ key. The `/agent-url`, and all the `/debug/`* endpoints require `sandbox_id` to be specified in the query string.
+<br>
+#### Examples
+Retrieve the IDs of the available sandboxes:
+```bash
+$ curl 127.0.0.1:8090/sandboxes
+```
+output:
+```
+6fcf0a90b01e90d8747177aa466c3462d02e02a878bc393649df83d4c314af0c
+df96b24bd49ec437c872c1a758edc084121d607ce1242ff5d2263a0e1b693343
+```
+Retrieve the `agent-url` of the sandbox with ID _df96b24bd49ec437c872c1a758edc084121d607ce1242ff5d2263a0e1b693343_:
+```bash
+$ curl 127.0.0.1:8090/agent-url?sandbox=df96b24bd49ec437c872c1a758edc084121d607ce1242ff5d2263a0e1b693343
+```
+output:
+```
+vsock://830455376:1024
+```
--- a/src/runtime/cmd/kata-monitor/main.go
+++ b/src/runtime/cmd/kata-monitor/main.go
@@ -175,6 +175,15 @@ func main() {
 }

 func indexPage(w http.ResponseWriter, r *http.Request) {
+	htmlResponse := kataMonitor.IfReturnHTMLResponse(w, r)
+	if htmlResponse {
+		indexPageHTML(w, r)
+	} else {
+		indexPageText(w, r)
+	}
+}
+
+func indexPageText(w http.ResponseWriter, r *http.Request) {
 	w.Write([]byte("Available HTTP endpoints:\n"))

 	spacing := 0
@@ -184,13 +193,35 @@ func indexPage(w http.ResponseWriter, r *http.Request) {
 		}
 	}
 	spacing = spacing + 3
+	formatter := fmt.Sprintf("%%-%ds: %%s\n", spacing)

-	formattedString := fmt.Sprintf("%%-%ds: %%s\n", spacing)
 	for _, endpoint := range endpoints {
-		w.Write([]byte(fmt.Sprintf(formattedString, endpoint.path, endpoint.desc)))
+		w.Write([]byte(fmt.Sprintf(formatter, endpoint.path, endpoint.desc)))
 	}
 }

+func indexPageHTML(w http.ResponseWriter, r *http.Request) {
+
+	w.Write([]byte("<h1>Available HTTP endpoints:</h1>\n"))
+
+	var formattedString string
+	needLinkPaths := []string{"/metrics", "/sandboxes"}
+
+	w.Write([]byte("<ul>"))
+	for _, endpoint := range endpoints {
+		formattedString = fmt.Sprintf("<b>%s</b>: %s\n", endpoint.path, endpoint.desc)
+		for _, linkPath := range needLinkPaths {
+			if linkPath == endpoint.path {
+				formattedString = fmt.Sprintf("<b><a href='%s'>%s</a></b>: %s\n", endpoint.path, endpoint.path, endpoint.desc)
+				break
+			}
+		}
+		formattedString = fmt.Sprintf("<li>%s</li>", formattedString)
+		w.Write([]byte(formattedString))
+	}
+	w.Write([]byte("</ul>"))
+}
+
 // initLog setup logger
 func initLog() {
 	kataMonitorLog := logrus.WithFields(logrus.Fields{
--- a/src/runtime/cmd/kata-runtime/factory_test.go
+++ b/src/runtime/cmd/kata-runtime/factory_test.go
@@ -8,7 +8,6 @@ package main
 import (
 	"context"
 	"flag"
-	"os"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -43,9 +42,7 @@ func TestFactoryCLIFunctionNoRuntimeConfig(t *testing.T) {
 func TestFactoryCLIFunctionInit(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -92,9 +89,7 @@ func TestFactoryCLIFunctionInit(t *testing.T) {
 func TestFactoryCLIFunctionDestroy(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -126,9 +121,7 @@ func TestFactoryCLIFunctionDestroy(t *testing.T) {
 func TestFactoryCLIFunctionStatus(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
--- a/src/runtime/cmd/kata-runtime/kata-check_amd64_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-check_amd64_test.go
@@ -71,11 +71,7 @@ func TestCCCheckCLIFunction(t *testing.T) {
 func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedSysModuleDir := sysModuleDir
 	savedProcCPUInfo := procCPUInfo
@@ -91,7 +87,7 @@ func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
 		procCPUInfo = savedProcCPUInfo
 	}()

-	err = os.MkdirAll(sysModuleDir, testDirMode)
+	err := os.MkdirAll(sysModuleDir, testDirMode)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -156,11 +152,7 @@ func TestCheckCheckKernelModulesNoNesting(t *testing.T) {
 func TestCheckCheckKernelModulesNoUnrestrictedGuest(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedSysModuleDir := sysModuleDir
 	savedProcCPUInfo := procCPUInfo
@@ -176,7 +168,7 @@ func TestCheckCheckKernelModulesNoUnrestrictedGuest(t *testing.T) {
 		procCPUInfo = savedProcCPUInfo
 	}()

-	err = os.MkdirAll(sysModuleDir, testDirMode)
+	err := os.MkdirAll(sysModuleDir, testDirMode)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -255,11 +247,7 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {

 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedSysModuleDir := sysModuleDir
 	savedProcCPUInfo := procCPUInfo
@@ -275,7 +263,7 @@ func TestCheckHostIsVMContainerCapable(t *testing.T) {
 		procCPUInfo = savedProcCPUInfo
 	}()

-	err = os.MkdirAll(sysModuleDir, testDirMode)
+	err := os.MkdirAll(sysModuleDir, testDirMode)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -405,11 +393,7 @@ func TestArchKernelParamHandler(t *testing.T) {
 func TestKvmIsUsable(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedKvmDevice := kvmDevice
 	fakeKVMDevice := filepath.Join(dir, "kvm")
@@ -419,7 +403,7 @@ func TestKvmIsUsable(t *testing.T) {
 		kvmDevice = savedKvmDevice
 	}()

-	err = kvmIsUsable()
+	err := kvmIsUsable()
 	assert.Error(err)

 	err = createEmptyFile(fakeKVMDevice)
@@ -457,9 +441,7 @@ foo     : bar
 func TestSetCPUtype(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedArchRequiredCPUFlags := archRequiredCPUFlags
 	savedArchRequiredCPUAttribs := archRequiredCPUAttribs
--- a/src/runtime/cmd/kata-runtime/kata-check_arm64_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-check_arm64_test.go
@@ -67,11 +67,7 @@ foo     : bar
 		{validContents, validNormalizeVendorName, validNormalizeModelName, false},
 	}

-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedProcCPUInfo := procCPUInfo

@@ -84,7 +80,7 @@ foo     : bar
 		procCPUInfo = savedProcCPUInfo
 	}()

-	_, _, err = getCPUDetails()
+	_, _, err := getCPUDetails()
 	// ENOENT
 	assert.Error(t, err)
 	assert.True(t, os.IsNotExist(err))
--- a/src/runtime/cmd/kata-runtime/kata-check_generic_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-check_generic_test.go
@@ -9,7 +9,6 @@
 package main

 import (
-	"os"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -18,9 +17,7 @@ import (
 func testSetCPUTypeGeneric(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedArchRequiredCPUFlags := archRequiredCPUFlags
 	savedArchRequiredCPUAttribs := archRequiredCPUAttribs
--- a/src/runtime/cmd/kata-runtime/kata-check_ppc64le_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-check_ppc64le_test.go
@@ -7,7 +7,6 @@ package main

 import (
 	"fmt"
-	"os"
 	"path/filepath"
 	"testing"

@@ -118,11 +117,7 @@ func TestArchKernelParamHandler(t *testing.T) {
 func TestKvmIsUsable(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedKvmDevice := kvmDevice
 	fakeKVMDevice := filepath.Join(dir, "kvm")
@@ -132,7 +127,7 @@ func TestKvmIsUsable(t *testing.T) {
 		kvmDevice = savedKvmDevice
 	}()

-	err = kvmIsUsable()
+	err := kvmIsUsable()
 	assert.Error(err)

 	err = createEmptyFile(fakeKVMDevice)
--- a/src/runtime/cmd/kata-runtime/kata-check_s390x_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-check_s390x_test.go
@@ -7,7 +7,6 @@ package main

 import (
 	"fmt"
-	"os"
 	"path/filepath"
 	"testing"

@@ -117,11 +116,7 @@ func TestArchKernelParamHandler(t *testing.T) {
 func TestKvmIsUsable(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedKvmDevice := kvmDevice
 	fakeKVMDevice := filepath.Join(dir, "kvm")
@@ -131,7 +126,7 @@ func TestKvmIsUsable(t *testing.T) {
 		kvmDevice = savedKvmDevice
 	}()

-	err = kvmIsUsable()
+	err := kvmIsUsable()
 	assert.Error(err)

 	err = createEmptyFile(fakeKVMDevice)
--- a/src/runtime/cmd/kata-runtime/kata-check_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-check_test.go
@@ -155,11 +155,7 @@ func makeCPUInfoFile(path, vendorID, flags string) error {

 // nolint: unused, deadcode
 func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel string, validContents string, data []testCPUDetail) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedProcCPUInfo := procCPUInfo

@@ -172,7 +168,7 @@ func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel strin
 		procCPUInfo = savedProcCPUInfo
 	}()

-	_, _, err = getCPUDetails()
+	_, _, err := getCPUDetails()
 	// ENOENT
 	assert.Error(t, err)
 	assert.True(t, os.IsNotExist(err))
@@ -197,11 +193,7 @@ func genericTestGetCPUDetails(t *testing.T, validVendor string, validModel strin
 func genericCheckCLIFunction(t *testing.T, cpuData []testCPUData, moduleData []testModuleData) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	_, config, err := makeRuntimeConfig(dir)
 	assert.NoError(err)
@@ -307,15 +299,11 @@ func TestCheckGetCPUInfo(t *testing.T) {
 		{"foo\n\nbar\nbaz\n\n", "foo", false},
 	}

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	file := filepath.Join(dir, "cpuinfo")
 	// file doesn't exist
-	_, err = getCPUInfo(file)
+	_, err := getCPUInfo(file)
 	assert.Error(err)

 	for _, d := range data {
@@ -527,11 +515,7 @@ func TestCheckHaveKernelModule(t *testing.T) {

 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedModProbeCmd := modProbeCmd
 	savedSysModuleDir := sysModuleDir
@@ -545,7 +529,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
 		sysModuleDir = savedSysModuleDir
 	}()

-	err = os.MkdirAll(sysModuleDir, testDirMode)
+	err := os.MkdirAll(sysModuleDir, testDirMode)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -577,11 +561,7 @@ func TestCheckHaveKernelModule(t *testing.T) {
 func TestCheckCheckKernelModules(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedModProbeCmd := modProbeCmd
 	savedSysModuleDir := sysModuleDir
@@ -595,7 +575,7 @@ func TestCheckCheckKernelModules(t *testing.T) {
 		sysModuleDir = savedSysModuleDir
 	}()

-	err = os.MkdirAll(sysModuleDir, testDirMode)
+	err := os.MkdirAll(sysModuleDir, testDirMode)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -662,11 +642,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
 		t.Skip(ktu.TestDisabledNeedNonRoot)
 	}

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	testData := map[string]kernelModule{
 		"foo": {
@@ -691,7 +667,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
 	}()

 	modPath := filepath.Join(sysModuleDir, "foo/parameters")
-	err = os.MkdirAll(modPath, testDirMode)
+	err := os.MkdirAll(modPath, testDirMode)
 	assert.NoError(err)

 	modParamFile := filepath.Join(modPath, "param1")
@@ -710,11 +686,7 @@ func TestCheckCheckKernelModulesUnreadableFile(t *testing.T) {
 func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	testData := map[string]kernelModule{
 		"foo": {
@@ -739,7 +711,7 @@ func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
 	}()

 	modPath := filepath.Join(sysModuleDir, "foo/parameters")
-	err = os.MkdirAll(modPath, testDirMode)
+	err := os.MkdirAll(modPath, testDirMode)
 	assert.NoError(err)

 	modParamFile := filepath.Join(modPath, "param1")
@@ -755,11 +727,7 @@ func TestCheckCheckKernelModulesInvalidFileContents(t *testing.T) {
 func TestCheckCLIFunctionFail(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	_, config, err := makeRuntimeConfig(dir)
 	assert.NoError(err)
@@ -788,11 +756,7 @@ func TestCheckCLIFunctionFail(t *testing.T) {
 func TestCheckKernelParamHandler(t *testing.T) {
 	assert := assert.New(t)

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedModProbeCmd := modProbeCmd
 	savedSysModuleDir := sysModuleDir
@@ -870,9 +834,7 @@ func TestCheckKernelParamHandler(t *testing.T) {
 func TestArchRequiredKernelModules(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	_, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(err)
@@ -885,11 +847,7 @@ func TestArchRequiredKernelModules(t *testing.T) {
 		return
 	}

-	dir, err := os.MkdirTemp("", "")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	savedModProbeCmd := modProbeCmd
 	savedSysModuleDir := sysModuleDir
--- a/src/runtime/cmd/kata-runtime/kata-env_amd64_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-env_amd64_test.go
@@ -6,7 +6,6 @@
 package main

 import (
-	"os"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -22,9 +21,7 @@ func getExpectedHostDetails(tmpdir string) (HostInfo, error) {
 func TestEnvGetEnvInfoSetsCPUType(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedArchRequiredCPUFlags := archRequiredCPUFlags
 	savedArchRequiredCPUAttribs := archRequiredCPUAttribs
--- a/src/runtime/cmd/kata-runtime/kata-env_generic_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-env_generic_test.go
@@ -9,7 +9,6 @@
 package main

 import (
-	"os"
 	"testing"

 	"github.com/stretchr/testify/assert"
@@ -18,9 +17,7 @@ import (
 func testEnvGetEnvInfoSetsCPUTypeGeneric(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedArchRequiredCPUFlags := archRequiredCPUFlags
 	savedArchRequiredCPUAttribs := archRequiredCPUAttribs
--- a/src/runtime/cmd/kata-runtime/kata-env_test.go
+++ b/src/runtime/cmd/kata-runtime/kata-env_test.go
@@ -364,11 +364,7 @@ func TestEnvGetMetaInfo(t *testing.T) {
 }

 func TestEnvGetHostInfo(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	expectedHostDetails, err := getExpectedHostDetails(tmpdir)
 	assert.NoError(t, err)
@@ -389,13 +385,9 @@ func TestEnvGetHostInfo(t *testing.T) {
 }

 func TestEnvGetHostInfoNoProcCPUInfo(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

-	_, err = getExpectedHostDetails(tmpdir)
+	_, err := getExpectedHostDetails(tmpdir)
 	assert.NoError(t, err)

 	err = os.Remove(procCPUInfo)
@@ -406,13 +398,9 @@ func TestEnvGetHostInfoNoProcCPUInfo(t *testing.T) {
 }

 func TestEnvGetHostInfoNoOSRelease(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

-	_, err = getExpectedHostDetails(tmpdir)
+	_, err := getExpectedHostDetails(tmpdir)
 	assert.NoError(t, err)

 	err = os.Remove(osRelease)
@@ -423,13 +411,9 @@ func TestEnvGetHostInfoNoOSRelease(t *testing.T) {
 }

 func TestEnvGetHostInfoNoProcVersion(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

-	_, err = getExpectedHostDetails(tmpdir)
+	_, err := getExpectedHostDetails(tmpdir)
 	assert.NoError(t, err)

 	err = os.Remove(procVersion)
@@ -440,11 +424,7 @@ func TestEnvGetHostInfoNoProcVersion(t *testing.T) {
 }

 func TestEnvGetEnvInfo(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	// Run test twice to ensure the individual component debug+trace
 	// options are tested.
@@ -474,9 +454,7 @@ func TestEnvGetEnvInfo(t *testing.T) {
 func TestEnvGetEnvInfoNoHypervisorVersion(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(err)
@@ -501,20 +479,14 @@ func TestEnvGetEnvInfoNoHypervisorVersion(t *testing.T) {
 func TestEnvGetEnvInfoAgentError(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

-	_, _, err = makeRuntimeConfig(tmpdir)
+	_, _, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(err)
 }

 func TestEnvGetEnvInfoNoOSRelease(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -530,11 +502,7 @@ func TestEnvGetEnvInfoNoOSRelease(t *testing.T) {
 }

 func TestEnvGetEnvInfoNoProcCPUInfo(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -550,11 +518,7 @@ func TestEnvGetEnvInfoNoProcCPUInfo(t *testing.T) {
 }

 func TestEnvGetEnvInfoNoProcVersion(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -570,11 +534,7 @@ func TestEnvGetEnvInfoNoProcVersion(t *testing.T) {
 }

 func TestEnvGetRuntimeInfo(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -587,11 +547,7 @@ func TestEnvGetRuntimeInfo(t *testing.T) {
 }

 func TestEnvGetAgentInfo(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	_, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -726,11 +682,7 @@ func testEnvShowJSONSettings(t *testing.T, tmpdir string, tmpfile *os.File) erro
 }

 func TestEnvShowSettings(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	tmpfile, err := os.CreateTemp("", "envShowSettings-")
 	assert.NoError(t, err)
@@ -747,11 +699,7 @@ func TestEnvShowSettings(t *testing.T) {
 }

 func TestEnvShowSettingsInvalidFile(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	tmpfile, err := os.CreateTemp("", "envShowSettings-")
 	assert.NoError(t, err)
@@ -771,11 +719,7 @@ func TestEnvShowSettingsInvalidFile(t *testing.T) {
 }

 func TestEnvHandleSettings(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -805,9 +749,7 @@ func TestEnvHandleSettings(t *testing.T) {
 func TestEnvHandleSettingsInvalidParams(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, _, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(err)
@@ -859,11 +801,7 @@ func TestEnvHandleSettingsInvalidRuntimeConfigType(t *testing.T) {
 }

 func TestEnvCLIFunction(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -904,11 +842,7 @@ func TestEnvCLIFunction(t *testing.T) {
 }

 func TestEnvCLIFunctionFail(t *testing.T) {
-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	configFile, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(t, err)
@@ -940,9 +874,7 @@ func TestEnvCLIFunctionFail(t *testing.T) {
 func TestGetHypervisorInfo(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	_, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(err)
@@ -962,9 +894,7 @@ func TestGetHypervisorInfo(t *testing.T) {
 func TestGetHypervisorInfoSocket(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	_, config, err := makeRuntimeConfig(tmpdir)
 	assert.NoError(err)
--- a/src/runtime/cmd/kata-runtime/kata-iptables.go
+++ b/src/runtime/cmd/kata-runtime/kata-iptables.go
@@ -0,0 +1,122 @@
+// Copyright (c) 2022 Apple Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+
+	containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils/shimclient"
+	"github.com/urfave/cli"
+)
+
+var (
+	sandboxID string
+	isIPv6    bool
+)
+var iptablesSubCmds = []cli.Command{
+	getIPTablesCommand,
+	setIPTablesCommand,
+}
+
+var kataIPTablesCommand = cli.Command{
+	Name:        "iptables",
+	Usage:       "get or set iptables within the Kata Containers guest",
+	Subcommands: iptablesSubCmds,
+	Action: func(context *cli.Context) {
+		cli.ShowSubcommandHelp(context)
+	},
+}
+
+var getIPTablesCommand = cli.Command{
+	Name:  "get",
+	Usage: "get iptables from the Kata Containers guest",
+	Flags: []cli.Flag{
+		cli.StringFlag{
+			Name:        "sandbox-id",
+			Usage:       "the target sandbox for getting the iptables",
+			Required:    true,
+			Destination: &sandboxID,
+		},
+		cli.BoolFlag{
+			Name:        "v6",
+			Usage:       "indicate we're requesting ipv6 iptables",
+			Destination: &isIPv6,
+		},
+	},
+	Action: func(c *cli.Context) error {
+		// verify sandbox exists:
+		if err := katautils.VerifyContainerID(sandboxID); err != nil {
+			return err
+		}
+
+		url := containerdshim.IPTablesUrl
+		if isIPv6 {
+			url = containerdshim.IP6TablesUrl
+		}
+		body, err := shimclient.DoGet(sandboxID, defaultTimeout, url)
+		if err != nil {
+			return err
+		}
+
+		fmt.Println(string(body))
+		return nil
+	},
+}
+
+var setIPTablesCommand = cli.Command{
+	Name:  "set",
+	Usage: "set iptables in a specifc Kata Containers guest based on file",
+	Flags: []cli.Flag{
+		cli.StringFlag{
+			Name:        "sandbox-id",
+			Usage:       "the target sandbox for setting the iptables",
+			Required:    true,
+			Destination: &sandboxID,
+		},
+		cli.BoolFlag{
+			Name:        "v6",
+			Usage:       "indicate we're requesting ipv6 iptables",
+			Destination: &isIPv6,
+		},
+	},
+	Action: func(c *cli.Context) error {
+		iptablesFile := c.Args().Get(0)
+
+		// verify sandbox exists:
+		if err := katautils.VerifyContainerID(sandboxID); err != nil {
+			return err
+		}
+
+		// verify iptables were provided:
+		if iptablesFile == "" {
+			return fmt.Errorf("iptables file not provided")
+		}
+
+		if !katautils.FileExists(iptablesFile) {
+			return fmt.Errorf("iptables file does not exist: %s", iptablesFile)
+		}
+
+		// Read file into buffer, and make request to the appropriate shim
+		buf, err := ioutil.ReadFile(iptablesFile)
+		if err != nil {
+			return err
+		}
+
+		url := containerdshim.IPTablesUrl
+		if isIPv6 {
+			url = containerdshim.IP6TablesUrl
+		}
+
+		if err = shimclient.DoPut(sandboxID, defaultTimeout, url, "application/octet-stream", buf); err != nil {
+			return fmt.Errorf("Error observed when making iptables-set request(%s): %s", iptablesFile, err)
+		}
+
+		return nil
+	},
+}
--- a/src/runtime/cmd/kata-runtime/kata-volume.go
+++ b/src/runtime/cmd/kata-runtime/kata-volume.go
@@ -7,10 +7,11 @@ package main

 import (
 	"encoding/json"
+	"fmt"
 	"net/url"

 	containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
-	"github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
+	volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils/shimclient"

 	"github.com/urfave/cli"
@@ -54,7 +55,10 @@ var addCommand = cli.Command{
 		},
 	},
 	Action: func(c *cli.Context) error {
-		return volume.Add(volumePath, mountInfo)
+		if err := volume.Add(volumePath, mountInfo); err != nil {
+			return cli.NewExitError(err.Error(), 1)
+		}
+		return nil
 	},
 }

@@ -69,7 +73,10 @@ var removeCommand = cli.Command{
 		},
 	},
 	Action: func(c *cli.Context) error {
-		return volume.Remove(volumePath)
+		if err := volume.Remove(volumePath); err != nil {
+			return cli.NewExitError(err.Error(), 1)
+		}
+		return nil
 	},
 }

@@ -83,13 +90,14 @@ var statsCommand = cli.Command{
 			Destination: &volumePath,
 		},
 	},
-	Action: func(c *cli.Context) (string, error) {
+	Action: func(c *cli.Context) error {
 		stats, err := Stats(volumePath)
 		if err != nil {
-			return "", err
+			return cli.NewExitError(err.Error(), 1)
 		}

-		return string(stats), nil
+		fmt.Println(string(stats))
+		return nil
 	},
 }

@@ -109,7 +117,10 @@ var resizeCommand = cli.Command{
 		},
 	},
 	Action: func(c *cli.Context) error {
-		return Resize(volumePath, size)
+		if err := Resize(volumePath, size); err != nil {
+			return cli.NewExitError(err.Error(), 1)
+		}
+		return nil
 	},
 }

@@ -119,8 +130,14 @@ func Stats(volumePath string) ([]byte, error) {
 	if err != nil {
 		return nil, err
 	}
-	urlSafeDevicePath := url.PathEscape(volumePath)
-	body, err := shimclient.DoGet(sandboxId, defaultTimeout, containerdshim.DirectVolumeStatUrl+"/"+urlSafeDevicePath)
+	volumeMountInfo, err := volume.VolumeMountInfo(volumePath)
+	if err != nil {
+		return nil, err
+	}
+
+	urlSafeDevicePath := url.PathEscape(volumeMountInfo.Device)
+	body, err := shimclient.DoGet(sandboxId, defaultTimeout,
+		fmt.Sprintf("%s?%s=%s", containerdshim.DirectVolumeStatUrl, containerdshim.DirectVolumePathKey, urlSafeDevicePath))
 	if err != nil {
 		return nil, err
 	}
@@ -133,13 +150,18 @@ func Resize(volumePath string, size uint64) error {
 	if err != nil {
 		return err
 	}
+	volumeMountInfo, err := volume.VolumeMountInfo(volumePath)
+	if err != nil {
+		return err
+	}
+
 	resizeReq := containerdshim.ResizeRequest{
-		VolumePath: volumePath,
+		VolumePath: volumeMountInfo.Device,
 		Size:       size,
 	}
 	encoded, err := json.Marshal(resizeReq)
 	if err != nil {
 		return err
 	}
-	return shimclient.DoPost(sandboxId, defaultTimeout, containerdshim.DirectVolumeResizeUrl, encoded)
+	return shimclient.DoPost(sandboxId, defaultTimeout, containerdshim.DirectVolumeResizeUrl, "application/json", encoded)
 }
--- a/src/runtime/cmd/kata-runtime/main.go
+++ b/src/runtime/cmd/kata-runtime/main.go
@@ -125,6 +125,7 @@ var runtimeCommands = []cli.Command{
 	kataMetricsCLICommand,
 	factoryCLICommand,
 	kataVolumeCommand,
+	kataIPTablesCommand,
 }

 // runtimeBeforeSubcommands is the function to run before command-line
--- a/src/runtime/cmd/kata-runtime/main_test.go
+++ b/src/runtime/cmd/kata-runtime/main_test.go
@@ -258,14 +258,12 @@ func TestMainBeforeSubCommands(t *testing.T) {
 func TestMainBeforeSubCommandsInvalidLogFile(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "katatest")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	logFile := filepath.Join(tmpdir, "log")

 	// create the file as the wrong type to force a failure
-	err = os.MkdirAll(logFile, testDirMode)
+	err := os.MkdirAll(logFile, testDirMode)
 	assert.NoError(err)

 	set := flag.NewFlagSet("", 0)
@@ -281,9 +279,7 @@ func TestMainBeforeSubCommandsInvalidLogFile(t *testing.T) {
 func TestMainBeforeSubCommandsInvalidLogFormat(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "katatest")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	logFile := filepath.Join(tmpdir, "log")

@@ -302,7 +298,7 @@ func TestMainBeforeSubCommandsInvalidLogFormat(t *testing.T) {

 	ctx := createCLIContext(set)

-	err = beforeSubcommands(ctx)
+	err := beforeSubcommands(ctx)
 	assert.Error(err)
 	assert.NotNil(kataLog.Logger.Out)
 }
@@ -310,9 +306,7 @@ func TestMainBeforeSubCommandsInvalidLogFormat(t *testing.T) {
 func TestMainBeforeSubCommandsLoadConfigurationFail(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "katatest")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	logFile := filepath.Join(tmpdir, "log")
 	configFile := filepath.Join(tmpdir, "config")
@@ -345,9 +339,7 @@ func TestMainBeforeSubCommandsLoadConfigurationFail(t *testing.T) {
 func TestMainBeforeSubCommandsShowCCConfigPaths(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "katatest")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	set := flag.NewFlagSet("", 0)
 	set.Bool("show-default-config-paths", true, "")
@@ -409,9 +401,7 @@ func TestMainBeforeSubCommandsShowCCConfigPaths(t *testing.T) {
 func TestMainFatal(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "katatest")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	var exitStatus int
 	savedExitFunc := exitFunc
@@ -633,9 +623,7 @@ func TestMainCreateRuntime(t *testing.T) {
 func TestMainVersionPrinter(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "katatest")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	savedOutputFile := defaultOutputFile

--- a/src/runtime/cmd/kata-runtime/utils_test.go
+++ b/src/runtime/cmd/kata-runtime/utils_test.go
@@ -17,18 +17,14 @@ import (
 )

 func TestFileExists(t *testing.T) {
-	dir, err := os.MkdirTemp("", "katatest")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(dir)
+	dir := t.TempDir()

 	file := filepath.Join(dir, "foo")

 	assert.False(t, katautils.FileExists(file),
 		fmt.Sprintf("File %q should not exist", file))

-	err = createEmptyFile(file)
+	err := createEmptyFile(file)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -54,14 +50,10 @@ func TestGetKernelVersion(t *testing.T) {
 		{validContents, validVersion, false},
 	}

-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	subDir := filepath.Join(tmpdir, "subdir")
-	err = os.MkdirAll(subDir, testDirMode)
+	err := os.MkdirAll(subDir, testDirMode)
 	assert.NoError(t, err)

 	_, err = getKernelVersion()
@@ -103,11 +95,7 @@ func TestGetDistroDetails(t *testing.T) {

 	const unknown = "<<unknown>>"

-	tmpdir, err := os.MkdirTemp("", "")
-	if err != nil {
-		panic(err)
-	}
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	testOSRelease := filepath.Join(tmpdir, "os-release")
 	testOSReleaseClr := filepath.Join(tmpdir, "os-release-clr")
@@ -131,7 +119,7 @@ VERSION_ID="%s"
 `, nonClrExpectedName, nonClrExpectedVersion)

 	subDir := filepath.Join(tmpdir, "subdir")
-	err = os.MkdirAll(subDir, testDirMode)
+	err := os.MkdirAll(subDir, testDirMode)
 	assert.NoError(t, err)

 	// override
--- a/src/runtime/config/configuration-clh.toml.in
+++ b/src/runtime/config/configuration-clh.toml.in
@@ -125,7 +125,8 @@ virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
 #
 # Format example:
 #   ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
-#
+# Examples:
+#   Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
 # see `virtiofsd -h` for possible options.
 virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@

@@ -179,6 +180,78 @@ block_device_driver = "virtio-blk"
 # but it will not abort container execution.
 #guest_hook_path = "/usr/share/oci/hooks"
 #
+# These options are related to network rate limiter at the VMM level, and are
+# based on the Cloud Hypervisor I/O throttling.  Those are disabled by default
+# and we strongly advise users to refer the Cloud Hypervisor official
+# documentation for a better understanding of its internals:
+# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
+# 
+# Bandwidth rate limiter options
+# 
+# net_rate_limiter_bw_max_rate controls network I/O bandwidth (size in bits/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+#net_rate_limiter_bw_max_rate = 0
+#
+# net_rate_limiter_bw_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
+# set to a non zero value.
+#net_rate_limiter_bw_one_time_burst = 0
+#
+# Operation rate limiter options
+#
+# net_rate_limiter_ops_max_rate controls network I/O bandwidth (size in ops/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+#net_rate_limiter_ops_max_rate = 0
+#
+# net_rate_limiter_ops_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if net_rate_limiter_bw_max_rate is
+# set to a non zero value.
+#net_rate_limiter_ops_one_time_burst = 0
+#
+# These options are related to disk rate limiter at the VMM level, and are
+# based on the Cloud Hypervisor I/O throttling.  Those are disabled by default
+# and we strongly advise users to refer the Cloud Hypervisor official
+# documentation for a better understanding of its internals:
+# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
+# 
+# Bandwidth rate limiter options
+# 
+# disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+#disk_rate_limiter_bw_max_rate = 0
+#
+# disk_rate_limiter_bw_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
+# set to a non zero value.
+#disk_rate_limiter_bw_one_time_burst = 0
+#
+# Operation rate limiter options
+#
+# disk_rate_limiter_ops_max_rate controls disk I/O bandwidth (size in ops/sec
+# for SB/VM).
+# The same value is used for inbound and outbound bandwidth.
+# Default 0-sized value means unlimited rate.
+#disk_rate_limiter_ops_max_rate = 0
+#
+# disk_rate_limiter_ops_one_time_burst increases the initial max rate and this
+# initial extra credit does *NOT* affect the overall limit and can be used for
+# an *initial* burst of data.
+# This is *optional* and only takes effect if disk_rate_limiter_bw_max_rate is
+# set to a non zero value.
+#disk_rate_limiter_ops_one_time_burst = 0
+
 [agent.@PROJECT_TYPE@]
 # If enabled, make the agent display debug-level messages.
 # (default: disabled)
@@ -324,3 +397,30 @@ experimental=@DEFAULTEXPFEATURES@
 # If enabled, user can run pprof tools with shim v2 process through kata-monitor.
 # (default: false)
 # enable_pprof = true
+
+# WARNING: All the options in the following section have not been implemented yet.
+# This section was added as a placeholder. DO NOT USE IT!
+[image]
+# Container image service.
+#
+# Offload the CRI image management service to the Kata agent.
+# (default: false)
+#service_offload = true
+
+# Container image decryption keys provisioning.
+# Applies only if service_offload is true.
+# Keys can be provisioned locally (e.g. through a special command or
+# a local file) or remotely (usually after the guest is remotely attested).
+# The provision setting is a complete URL that lets the Kata agent decide
+# which method to use in order to fetch the keys.
+#
+# Keys can be stored in a local file, in a measured and attested initrd:
+#provision=data:///local/key/file
+#
+# Keys could be fetched through a special command or binary from the
+# initrd (guest) image, e.g. a firmware call:
+#provision=file:///path/to/bin/fetcher/in/guest
+#
+# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
+# a HTTPS URL:
+#provision=https://my-key-broker.foo/tenant/<tenant-id>
--- a/src/runtime/config/configuration-qemu.toml.in
+++ b/src/runtime/config/configuration-qemu.toml.in
@@ -168,6 +168,8 @@ virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
 #
 # Format example:
 #   ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
+# Examples:
+#   Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
 #
 # see `virtiofsd -h` for possible options.
 virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
@@ -387,6 +389,9 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
 # be default_memory.
 #enable_guest_swap = true

+# use legacy serial for guest console if available and implemented for architecture. Default false
+#use_legacy_serial = true
+
 [factory]
 # VM templating support. Once enabled, new VMs are created from template
 # using vm cloning. They will share the same initial kernel, initramfs and
--- a/src/runtime/go-test.sh
+++ b/src/runtime/go-test.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+#
+# Copyright (c) 2017-2018 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set -e
+
+script_name=${0##*/}
+typeset -A long_options
+
+long_options=(
+	[help]="Show usage"
+	[package:]="Specify test package to run"
+)
+
+# Set up go test flags
+go_test_flags="${KATA_GO_TEST_FLAGS}"
+if [ -z "$go_test_flags" ]; then
+    # KATA_GO_TEST_TIMEOUT can be set to any value accepted by
+    # "go test -timeout X"
+    go_test_flags="-timeout ${KATA_GO_TEST_TIMEOUT:-30s}"
+
+    # -race flag is not supported on s390x
+    [ "$(go env GOARCH)" != "s390x" ] && go_test_flags+=" -race"
+
+    # s390x requires special linker flags
+    [ "$(go env GOARCH)" = s390x ] && go_test_flags+=" -ldflags '-extldflags -Wl,--s390-pgste'"
+fi
+
+# The "master" coverage file that contains the coverage results for
+# all packages run under all scenarios.
+test_coverage_file="coverage.txt"
+
+# Temporary coverage file created for a "go test" run. The results in
+# this file will be added to the master coverage file.
+tmp_coverage_file="${test_coverage_file}.tmp"
+
+warn()
+{
+	local msg="$*"
+	echo >&2 "WARNING: $msg"
+}
+
+usage()
+{
+	cat <<EOF
+
+Usage: $script_name [options]
+
+Options:
+
+EOF
+
+	local option
+	local description
+
+	local long_option_names="${!long_options[@]}"
+
+	# Sort space-separated list by converting to newline separated list
+	# and back again.
+	long_option_names=$(echo "$long_option_names"|tr ' ' '\n'|sort|tr '\n' ' ')
+
+	# Display long options
+	for option in ${long_option_names}
+	do
+		description=${long_options[$option]}
+
+		# Remove any trailing colon which is for getopt(1) alone.
+		option=$(echo "$option"|sed 's/:$//g')
+
+		printf "    --%-10.10s # %s\n" "$option" "$description"
+	done
+}
+
+# Run a command as either root or the current user (which might still be root).
+#
+# If the first argument is "root", run using sudo, else run as the current
+# user. All arguments after the first will be treated as the command to run.
+run_as_user()
+{
+	local user="$1"
+
+	shift
+
+	local cmd=$*
+
+	if [ "$user" = root ]; then
+		# use a shell to ensure PATH is correct.
+		sudo -E PATH="$PATH" sh -c "$cmd"
+	else
+		eval "$cmd"
+	fi
+}
+
+# Test a single golang package
+test_go_package()
+{
+	local -r pkg="$1"
+	local -r user="$2"
+
+	printf "INFO: Running 'go test' as %s user on package '%s' with flags '%s'\n" \
+		"$user" "$pkg" "$go_test_flags"
+
+	run_as_user "$user" go test "$go_test_flags" -covermode=atomic -coverprofile=$tmp_coverage_file "$pkg"
+
+	# Merge test results into the master coverage file.
+	run_as_user "$user" tail -n +2 "$tmp_coverage_file" >> "$test_coverage_file"
+	rm -f "$tmp_coverage_file"
+}
+
+# Run all tests and generate a test coverage file.
+test_coverage()
+{
+	echo "mode: atomic" > "$test_coverage_file"
+
+	users="current"
+
+	if [ "$(id -u)" -eq 0 ]; then
+		warn "Already running as root so will not re-run tests as non-root user."
+		warn "As a result, only a subset of tests will be run"
+		warn "(run this script as a non-privileged to ensure all tests are run)."
+	else
+		# Run the unit-tests *twice* (since some must run as
+		# root and others must run as non-root), combining the
+		# resulting test coverage files.
+		users+=" root"
+	fi
+
+	echo "INFO: Currently running as user '$(id -un)'"
+	for user in $users; do
+	    test_go_package "$package" "$user"
+	done
+}
+
+main()
+{
+	local long_option_names="${!long_options[@]}"
+
+	local args=$(getopt \
+		-n "$script_name" \
+		-a \
+		--options="h" \
+		--longoptions="$long_option_names" \
+		-- "$@")
+
+	package="./..."
+
+	eval set -- "$args"
+	[ $? -ne 0 ] && { usage >&2; exit 1; }
+
+	while [ $# -gt 1 ]
+	do
+		case "$1" in
+			-h|--help) usage; exit 0 ;;
+			--package) package="$2"; shift 2;;
+			--) shift; break ;;
+		esac
+
+		shift
+	done
+
+	test_coverage
+}
+
+main "$@"
--- a/src/runtime/pkg/containerd-shim-v2/create_test.go
+++ b/src/runtime/pkg/containerd-shim-v2/create_test.go
@@ -50,7 +50,6 @@ func TestCreateSandboxSuccess(t *testing.T) {
 	}()

 	tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
-	// defer os.RemoveAll(tmpdir)

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -99,7 +98,6 @@ func TestCreateSandboxFail(t *testing.T) {
 	assert := assert.New(t)

 	tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(tmpdir)

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -137,7 +135,6 @@ func TestCreateSandboxConfigFail(t *testing.T) {
 	assert := assert.New(t)

 	tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(tmpdir)

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -187,7 +184,6 @@ func TestCreateContainerSuccess(t *testing.T) {
 	}

 	tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(tmpdir)

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -227,7 +223,6 @@ func TestCreateContainerFail(t *testing.T) {
 	assert := assert.New(t)

 	tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(tmpdir)

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -278,7 +273,6 @@ func TestCreateContainerConfigFail(t *testing.T) {
 	}()

 	tmpdir, bundlePath, ociConfigFile := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(tmpdir)

 	runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
 	assert.NoError(err)
@@ -382,9 +376,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
 func TestCreateLoadRuntimeConfig(t *testing.T) {
 	assert := assert.New(t)

-	tmpdir, err := os.MkdirTemp("", "")
-	assert.NoError(err)
-	defer os.RemoveAll(tmpdir)
+	tmpdir := t.TempDir()

 	config, err := createAllRuntimeConfigFiles(tmpdir, "qemu")
 	assert.NoError(err)
--- a/src/runtime/pkg/containerd-shim-v2/delete_test.go
+++ b/src/runtime/pkg/containerd-shim-v2/delete_test.go
@@ -7,7 +7,6 @@
 package containerdshim

 import (
-	"os"
 	"testing"

 	taskAPI "github.com/containerd/containerd/runtime/v2/task"
@@ -25,8 +24,8 @@ func TestDeleteContainerSuccessAndFail(t *testing.T) {
 		MockID: testSandboxID,
 	}

-	rootPath, bundlePath, _ := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(rootPath)
+	_, bundlePath, _ := ktu.SetupOCIConfigFile(t)
+
 	_, err := compatoci.ParseConfigJSON(bundlePath)
 	assert.NoError(err)

--- a/src/runtime/pkg/containerd-shim-v2/service.go
+++ b/src/runtime/pkg/containerd-shim-v2/service.go
@@ -10,6 +10,7 @@ import (
 	"io"
 	"os"
 	sysexec "os/exec"
+	goruntime "runtime"
 	"sync"
 	"syscall"
 	"time"
@@ -31,6 +32,7 @@ import (
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
+	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
 	vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
 	"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
@@ -234,9 +236,19 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin

 	cmd.ExtraFiles = append(cmd.ExtraFiles, f)

+	goruntime.LockOSThread()
+	if os.Getenv("SCHED_CORE") != "" {
+		if err := utils.Create(utils.ProcessGroup); err != nil {
+			return "", errors.Wrap(err, "enable sched core support")
+		}
+	}
+
 	if err := cmd.Start(); err != nil {
 		return "", err
 	}
+
+	goruntime.UnlockOSThread()
+
 	defer func() {
 		if retErr != nil {
 			cmd.Process.Kill()
--- a/src/runtime/pkg/containerd-shim-v2/service_test.go
+++ b/src/runtime/pkg/containerd-shim-v2/service_test.go
@@ -41,8 +41,7 @@ func TestServiceCreate(t *testing.T) {

 	assert := assert.New(t)

-	tmpdir, bundleDir, _ := ktu.SetupOCIConfigFile(t)
-	defer os.RemoveAll(tmpdir)
+	_, bundleDir, _ := ktu.SetupOCIConfigFile(t)

 	ctx := context.Background()

--- a/src/runtime/pkg/containerd-shim-v2/shim_management.go
+++ b/src/runtime/pkg/containerd-shim-v2/shim_management.go
@@ -29,11 +29,17 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
 	"github.com/prometheus/common/expfmt"
+	"github.com/sirupsen/logrus"
 )

 const (
+	DirectVolumePathKey   = "path"
+	AgentUrl              = "/agent-url"
 	DirectVolumeStatUrl   = "/direct-volume/stats"
 	DirectVolumeResizeUrl = "/direct-volume/resize"
+	IPTablesUrl           = "/iptables"
+	IP6TablesUrl          = "/ip6tables"
+	MetricsUrl            = "/metrics"
 )

 var (
@@ -139,7 +145,16 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily {
 }

 func (s *service) serveVolumeStats(w http.ResponseWriter, r *http.Request) {
-	volumePath, err := url.PathUnescape(strings.TrimPrefix(r.URL.Path, DirectVolumeStatUrl))
+	val := r.URL.Query().Get(DirectVolumePathKey)
+	if val == "" {
+		msg := fmt.Sprintf("Required parameter %s not found", DirectVolumePathKey)
+		shimMgtLog.Info(msg)
+		w.WriteHeader(http.StatusBadRequest)
+		w.Write([]byte(msg))
+		return
+	}
+
+	volumePath, err := url.PathUnescape(val)
 	if err != nil {
 		shimMgtLog.WithError(err).Error("failed to unescape the volume stat url path")
 		w.WriteHeader(http.StatusInternalServerError)
@@ -184,6 +199,48 @@ func (s *service) serveVolumeResize(w http.ResponseWriter, r *http.Request) {
 	w.Write([]byte(""))
 }

+func (s *service) ip6TablesHandler(w http.ResponseWriter, r *http.Request) {
+	s.genericIPTablesHandler(w, r, true)
+}
+
+func (s *service) ipTablesHandler(w http.ResponseWriter, r *http.Request) {
+	s.genericIPTablesHandler(w, r, false)
+}
+
+func (s *service) genericIPTablesHandler(w http.ResponseWriter, r *http.Request, isIPv6 bool) {
+	logger := shimMgtLog.WithFields(logrus.Fields{"handler": "iptables", "ipv6": isIPv6})
+
+	switch r.Method {
+	case http.MethodPut:
+		body, err := ioutil.ReadAll(r.Body)
+		if err != nil {
+			logger.WithError(err).Error("failed to read request body")
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(err.Error()))
+			return
+		}
+
+		if err = s.sandbox.SetIPTables(context.Background(), isIPv6, body); err != nil {
+			logger.WithError(err).Error("failed to set IPTables")
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(err.Error()))
+		}
+		w.Write([]byte(""))
+
+	case http.MethodGet:
+		buf, err := s.sandbox.GetIPTables(context.Background(), isIPv6)
+		if err != nil {
+			logger.WithError(err).Error("failed to get IPTables")
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(err.Error()))
+		}
+		w.Write(buf)
+	default:
+		w.WriteHeader(http.StatusNotImplemented)
+		return
+	}
+}
+
 func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
 	// metrics socket will under sandbox's bundle path
 	metricsAddress := SocketAddress(s.id)
@@ -204,10 +261,12 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec

 	// bind handler
 	m := http.NewServeMux()
-	m.Handle("/metrics", http.HandlerFunc(s.serveMetrics))
-	m.Handle("/agent-url", http.HandlerFunc(s.agentURL))
+	m.Handle(MetricsUrl, http.HandlerFunc(s.serveMetrics))
+	m.Handle(AgentUrl, http.HandlerFunc(s.agentURL))
 	m.Handle(DirectVolumeStatUrl, http.HandlerFunc(s.serveVolumeStats))
 	m.Handle(DirectVolumeResizeUrl, http.HandlerFunc(s.serveVolumeResize))
+	m.Handle(IPTablesUrl, http.HandlerFunc(s.ipTablesHandler))
+	m.Handle(IP6TablesUrl, http.HandlerFunc(s.ip6TablesHandler))
 	s.mountPprofHandle(m, ociSpec)

 	// register shim metrics
--- a/src/runtime/pkg/containerd-shim-v2/stream_test.go
+++ b/src/runtime/pkg/containerd-shim-v2/stream_test.go
@@ -26,9 +26,7 @@ func TestNewTtyIOFifoReopen(t *testing.T) {
 	assert := assert.New(t)
 	ctx := context.TODO()

-	testDir, err := os.MkdirTemp("", "kata-")
-	assert.NoError(err)
-	defer os.RemoveAll(testDir)
+	testDir := t.TempDir()

 	fifoPath, err := os.MkdirTemp(testDir, "fifo-path-")
 	assert.NoError(err)
@@ -104,9 +102,7 @@ func TestIoCopy(t *testing.T) {
 	testBytes2 := []byte("Test2")
 	testBytes3 := []byte("Test3")

-	testDir, err := os.MkdirTemp("", "kata-")
-	assert.NoError(err)
-	defer os.RemoveAll(testDir)
+	testDir := t.TempDir()

 	fifoPath, err := os.MkdirTemp(testDir, "fifo-path-")
 	assert.NoError(err)
--- a/src/runtime/pkg/containerd-shim-v2/wait.go
+++ b/src/runtime/pkg/containerd-shim-v2/wait.go
@@ -78,7 +78,7 @@ func wait(ctx context.Context, s *service, c *container, execID string) (int32,
 				shimLog.WithField("sandbox", s.sandbox.ID()).Error("failed to delete sandbox")
 			}
 		} else {
-			if _, err = s.sandbox.StopContainer(ctx, c.id, false); err != nil {
+			if _, err = s.sandbox.StopContainer(ctx, c.id, true); err != nil {
 				shimLog.WithError(err).WithField("container", c.id).Warn("stop container failed")
 			}
 		}
--- a/src/runtime/pkg/direct-volume/utils.go
+++ b/src/runtime/pkg/direct-volume/utils.go
@@ -6,17 +6,36 @@
 package volume

 import (
+	b64 "encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	"strings"
 )

 const (
 	mountInfoFileName = "mountInfo.json"
+
+	FSGroupMetadataKey             = "fsGroup"
+	FSGroupChangePolicyMetadataKey = "fsGroupChangePolicy"
+)
+
+// FSGroupChangePolicy holds policies that will be used for applying fsGroup to a volume.
+// This type and the allowed values are tracking the PodFSGroupChangePolicy defined in
+// https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/api/core/v1/types.go
+// It is up to the client using the direct-assigned volume feature (e.g. CSI drivers) to determine
+// the optimal setting for this change policy (i.e. from Pod spec or assuming volume ownership
+// based on the storage offering).
+type FSGroupChangePolicy string
+
+const (
+	// FSGroupChangeAlways indicates that volume's ownership should always be changed.
+	FSGroupChangeAlways FSGroupChangePolicy = "Always"
+	// FSGroupChangeOnRootMismatch indicates that volume's ownership will be changed
+	// only when ownership of root directory does not match with the desired group id.
+	FSGroupChangeOnRootMismatch FSGroupChangePolicy = "OnRootMismatch"
 )

 var kataDirectVolumeRootPath = "/run/kata-containers/shared/direct-volumes"
@@ -37,19 +56,20 @@ type MountInfo struct {

 // Add writes the mount info of a direct volume into a filesystem path known to Kata Container.
 func Add(volumePath string, mountInfo string) error {
-	volumeDir := filepath.Join(kataDirectVolumeRootPath, volumePath)
+	volumeDir := filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath)))
 	stat, err := os.Stat(volumeDir)
-	if err != nil && !errors.Is(err, os.ErrNotExist) {
-		return err
-	}
-	if stat != nil && !stat.IsDir() {
-		return fmt.Errorf("%s should be a directory", volumeDir)
-	}
-	if errors.Is(err, os.ErrNotExist) {
+	if err != nil {
+		if !errors.Is(err, os.ErrNotExist) {
+			return err
+		}
 		if err := os.MkdirAll(volumeDir, 0700); err != nil {
 			return err
 		}
 	}
+	if stat != nil && !stat.IsDir() {
+		return fmt.Errorf("%s should be a directory", volumeDir)
+	}
+
 	var deserialized MountInfo
 	if err := json.Unmarshal([]byte(mountInfo), &deserialized); err != nil {
 		return err
@@ -60,14 +80,12 @@ func Add(volumePath string, mountInfo string) error {

 // Remove deletes the direct volume path including all the files inside it.
 func Remove(volumePath string) error {
-	// Find the base of the volume path to delete the whole volume path
-	base := strings.SplitN(volumePath, string(os.PathSeparator), 2)[0]
-	return os.RemoveAll(filepath.Join(kataDirectVolumeRootPath, base))
+	return os.RemoveAll(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
 }

 // VolumeMountInfo retrieves the mount info of a direct volume.
 func VolumeMountInfo(volumePath string) (*MountInfo, error) {
-	mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, volumePath, mountInfoFileName)
+	mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath)), mountInfoFileName)
 	if _, err := os.Stat(mountInfoFilePath); err != nil {
 		return nil, err
 	}
@@ -84,16 +102,17 @@ func VolumeMountInfo(volumePath string) (*MountInfo, error) {

 // RecordSandboxId associates a sandbox id with a direct volume.
 func RecordSandboxId(sandboxId string, volumePath string) error {
-	mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, volumePath, mountInfoFileName)
+	encodedPath := b64.URLEncoding.EncodeToString([]byte(volumePath))
+	mountInfoFilePath := filepath.Join(kataDirectVolumeRootPath, encodedPath, mountInfoFileName)
 	if _, err := os.Stat(mountInfoFilePath); err != nil {
 		return err
 	}

-	return ioutil.WriteFile(filepath.Join(kataDirectVolumeRootPath, volumePath, sandboxId), []byte(""), 0600)
+	return ioutil.WriteFile(filepath.Join(kataDirectVolumeRootPath, encodedPath, sandboxId), []byte(""), 0600)
 }

 func GetSandboxIdForVolume(volumePath string) (string, error) {
-	files, err := ioutil.ReadDir(filepath.Join(kataDirectVolumeRootPath, volumePath))
+	files, err := ioutil.ReadDir(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
 	if err != nil {
 		return "", err
 	}
--- a/src/runtime/pkg/direct-volume/utils_test.go
+++ b/src/runtime/pkg/direct-volume/utils_test.go
@@ -6,6 +6,7 @@
 package volume

 import (
+	b64 "encoding/base64"
 	"encoding/json"
 	"errors"
 	"os"
@@ -18,16 +19,17 @@ import (

 func TestAdd(t *testing.T) {
 	var err error
-	kataDirectVolumeRootPath, err = os.MkdirTemp(os.TempDir(), "add-test")
-	assert.Nil(t, err)
-	defer os.RemoveAll(kataDirectVolumeRootPath)
+	kataDirectVolumeRootPath = t.TempDir()
 	var volumePath = "/a/b/c"
-	var basePath = "a"
 	actual := MountInfo{
 		VolumeType: "block",
 		Device:     "/dev/sda",
 		FsType:     "ext4",
-		Options:    []string{"journal_dev", "noload"},
+		Metadata: map[string]string{
+			FSGroupMetadataKey:             "3000",
+			FSGroupChangePolicyMetadataKey: string(FSGroupChangeOnRootMismatch),
+		},
+		Options: []string{"journal_dev", "noload"},
 	}
 	buf, err := json.Marshal(actual)
 	assert.Nil(t, err)
@@ -41,22 +43,22 @@ func TestAdd(t *testing.T) {
 	assert.Equal(t, expected.Device, actual.Device)
 	assert.Equal(t, expected.FsType, actual.FsType)
 	assert.Equal(t, expected.Options, actual.Options)
+	assert.Equal(t, expected.Metadata, actual.Metadata)

+	_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
+	assert.Nil(t, err)
 	// Remove the file
 	err = Remove(volumePath)
 	assert.Nil(t, err)
-	_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath, basePath))
+	_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath))))
 	assert.True(t, errors.Is(err, os.ErrNotExist))
-
-	// Test invalid mount info json
-	assert.Error(t, Add(volumePath, "{invalid json}"))
+	_, err = os.Stat(filepath.Join(kataDirectVolumeRootPath))
+	assert.Nil(t, err)
 }

 func TestRecordSandboxId(t *testing.T) {
 	var err error
-	kataDirectVolumeRootPath, err = os.MkdirTemp(os.TempDir(), "recordSanboxId-test")
-	assert.Nil(t, err)
-	defer os.RemoveAll(kataDirectVolumeRootPath)
+	kataDirectVolumeRootPath = t.TempDir()

 	var volumePath = "/a/b/c"
 	mntInfo := MountInfo{
@@ -82,9 +84,7 @@ func TestRecordSandboxId(t *testing.T) {

 func TestRecordSandboxIdNoMountInfoFile(t *testing.T) {
 	var err error
-	kataDirectVolumeRootPath, err = os.MkdirTemp(os.TempDir(), "recordSanboxId-test")
-	assert.Nil(t, err)
-	defer os.RemoveAll(kataDirectVolumeRootPath)
+	kataDirectVolumeRootPath = t.TempDir()

 	var volumePath = "/a/b/c"
 	sandboxId := uuid.Generate().String()
--- a/src/runtime/pkg/kata-monitor/metrics.go
+++ b/src/runtime/pkg/kata-monitor/metrics.go
@@ -15,6 +15,7 @@ import (
 	"sync"
 	"time"

+	containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
 	mutils "github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
 	"github.com/kata-containers/kata-containers/src/runtime/pkg/utils/shimclient"
 	"github.com/prometheus/client_golang/prometheus"
@@ -78,6 +79,21 @@ func (km *KataMonitor) ProcessMetricsRequest(w http.ResponseWriter, r *http.Requ
 		scrapeDurationsHistogram.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Millisecond)))
 	}()

+	// this is likely the same as `kata-runtime metrics <SANDBOX>`.
+	sandboxID, err := getSandboxIDFromReq(r)
+	if err == nil && sandboxID != "" {
+		metrics, err := GetSandboxMetrics(sandboxID)
+		if err != nil {
+			w.WriteHeader(http.StatusInternalServerError)
+			w.Write([]byte(err.Error()))
+			return
+		}
+		w.Write([]byte(metrics))
+		return
+	}
+
+	// if no sandbox provided, will get all sandbox's metrics.
+
 	// prepare writer for writing response.
 	contentType := expfmt.Negotiate(r.Header)

@@ -224,7 +240,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
 }

 func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*dto.MetricFamily, error) {
-	body, err := shimclient.DoGet(sandboxID, defaultTimeout, "metrics")
+	body, err := shimclient.DoGet(sandboxID, defaultTimeout, containerdshim.MetricsUrl)
 	if err != nil {
 		return nil, err
 	}
@@ -234,7 +250,7 @@ func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*

 // GetSandboxMetrics will get sandbox's metrics from shim
 func GetSandboxMetrics(sandboxID string) (string, error) {
-	body, err := shimclient.DoGet(sandboxID, defaultTimeout, "metrics")
+	body, err := shimclient.DoGet(sandboxID, defaultTimeout, containerdshim.MetricsUrl)
 	if err != nil {
 		return "", err
 	}
--- a/Show More
+++ b/Show More